# Numpy Introduction
### Numpy is not so popular but very important because it is used for pandas and matplotlib

In [2]:
# import library
import sys
import numpy as np


In [7]:
# basic calc with python
n = 3
bit = 2 ** n

In [8]:
print(bit)

8


In [124]:
# size of an int in python
sys.getsizeof(1)

14

In [127]:
# size of an int with numpy
np.dtype(int).itemsize

4


Numpy is library that has a very advance numeric processing.

Single calculation in python can take up to 20 byte memory because of its high level programming languange

In [10]:
np.int8 # create int with only 8 bit/1 byte memory (it can be precised by user)

numpy.int8

# Basic Arrays in Numpy
Numpy is all about arrays

In [3]:
# basic Numpy Arrays
np.array([1,2,3,4,5])

array([1, 2, 3, 4, 5])

In [4]:
a = np.array([1,2,3,4]) # create an array with numpy(np)

In [20]:
b = np.array([0, .5, 1, 1.5, 2.2])

In [15]:
a[0], a[1], b[3], a[-1], a[[0, 2, -1]]

(1, 2, 1.5, 4, array([1, 3, 4]))

# Types of Array

In [16]:
a

array([1, 2, 3, 4])

In [17]:
a.dtype 

dtype('int32')

In [26]:
np.array([1, 2, 3], dtype = np.float) # using dtype to assign new data type
# np.array([1, 2, 3], dtype = np.float).dtype

array([1., 2., 3.])

In [27]:
print(b)
b.dtype

[0.  0.5 1.  1.5 2.2]


dtype('float64')

In [28]:
c = np.array(['a', 'b', 'c']) # numpy can assign string to arrayb

In [29]:
c.dtype # but usually numpy is only for numeric processing

dtype('<U1')

# Dimensions and shapes


In [36]:
A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
    ]) # two dimensional array with numpy

In [33]:
A.shape # shape of two dimensional array

(3, 3)

In [34]:
A.ndim # dimensions

2

In [35]:
A.size # size of an array

9

In [43]:
# create a three dimensional array with numpy
B = np.array([
    [
        [1, 2],
        [3, 4]
    ],
    [
        [5, 6],
        [7, 8]
    ]
], dtype = np.int8)

In [44]:
B.shape

(2, 2, 2)

In [45]:
B.ndim

3

In [46]:
B.size

8

In [47]:
B.dtype

dtype('int8')

# Matrix Indexing and Slicing


In [48]:
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [49]:
A[1] # this index at 1 indicates at row[1]

array([4, 5, 6])

In [56]:
# value at specific row and column
A[1][1], A[0][0], A[2][0]

(5, 1, 7)

In [57]:
# better way with using multidimensional selection in numpy
# A[d1, d2] .... A[d1, d2, ...., dn]
A[1, 1], A[0, 0], A[2, 0]

(5, 1, 7)

In [58]:
# value modification
A[0, 0] = 100
A[2] = 99
A[1] = np.array([1, 1, 1]) 

In [59]:
A

array([[100,   2,   3],
       [  1,   1,   1],
       [ 99,  99,  99]])

# Basic Statistic using numpy

In [60]:
# new array
numbers = np.array([23, 213, 4235, 123, 656, 34, 1, 2, 3, 4, 5, 64, 7, 9, 0, -1, -23, 54])

In [62]:
numbers.sum()

5409

In [63]:
numbers.mean()

300.5

In [64]:
numbers.std() # standard deviation

966.381179107568

In [65]:
numbers.var() # variance

933892.5833333334

In [66]:
# also work with 2 dim matrices
A

array([[100,   2,   3],
       [  1,   1,   1],
       [ 99,  99,  99]])

In [67]:
A.sum()

405

In [68]:
A.mean()

45.0

In [69]:
A.std()

48.52719741432518

In [70]:
A.var()

2354.8888888888887

In [71]:
# also work with axis
A.sum(axis=0) # sum at each column (x-axis)

array([200, 102, 103])

In [73]:
A.sum(axis=1) # sum at each row (y-axis)/(vertical axis = 1)

array([105,   3, 297])

In [74]:
A.std(axis=1)

array([45.96375384,  0.        ,  0.        ])

# Broadcasting and Vectorized operations

In [88]:
# create a simple array
simple = np.arange(5)
simple

array([0, 1, 2, 3, 4])

In [89]:
simple + 10 # add 10 to each value but not assigning (called vectorizing)

array([10, 11, 12, 13, 14])

In [90]:
simple # back to original

array([0, 1, 2, 3, 4])

In [91]:
simple += 100 
simple # add 100 and assigning (called broadcasting)

array([100, 101, 102, 103, 104])

# Boolean arrays (also called masks)

In [94]:
bool_array = np.arange(4) # create an array
bool_array

array([0, 1, 2, 3])

In [96]:
bool_array[[True, False, True, False]] # select element at index 0 and 2 same with 'bool_array[[0,2]]' or 'bool_array[0], bool_array[2]'

array([0, 2])

In [97]:
bool_array == 3 # see if any element equal 3

array([False, False, False,  True])

In [99]:
bool_array[bool_array > bool_array.mean()] # return every element that greater than its mean

array([2, 3])

In [104]:
# create a random (1-100) 2 dimensional array (size (3,3))
random = np.random.randint(100, size=(3,3))
random

array([[23,  5, 38],
       [40, 54, 89],
       [53, 35, 72]])

In [105]:
random > random.mean()

array([[False, False, False],
       [False,  True,  True],
       [ True, False,  True]])

In [107]:
random[random >= random.mean()] # basic idea of filtering element

array([54, 89, 53, 72])

# Basic Linear Algebra

In [110]:
# create 2 arrays (2 dim)
A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])
B = np.array([
    [6, 5],
    [4, 3],
    [2, 1]
])

In [111]:
A.dot(B) # dot product

array([[20, 14],
       [56, 41],
       [92, 68]])

In [112]:
A @ B # another command dot product

array([[20, 14],
       [56, 41],
       [92, 68]])

In [113]:
A.T # transpose

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [116]:
B.T @ A.T 

array([[20, 56, 92],
       [14, 41, 68]])

# Summary

## Performance test
### (sum of the square of first 1000000 int number with original python and numpy)

In [129]:
# python list
l = list(range(1000000))

In [130]:
# numpy list
nl = np.arange(1000000)

In [131]:
%time sum([x**2 for x in l]) # python time performance

Wall time: 459 ms


333332833333500000

In [132]:
%time np.sum(nl ** 2) # numpy time performance

Wall time: 2.01 ms


584144992