# Numpy Library

In [1]:
import numpy as np

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)
## Creating Numpy Array

### From Python list

In [8]:
arr_1 = np.array([1,2,3,4])
arr_2 = np.array([[1,2,3], [4,5,6]])
arr_3 = np.array([[1, 2, 3], [4, 5 ,6]], dtype='float32')

### ndarray attributes

In [12]:
print(type(arr_3))
print(arr_3.shape)
print(arr_3.ndim)
print(arr_3.dtype)
print(arr_3.size)

<class 'numpy.ndarray'>
(2, 3)
2
float32
6


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)
## Creating Numpy Array from Scratch

- np.zeros()

In [17]:
np.zeros((2, 4), dtype=int) # (2,4)

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

- np.ones()

In [18]:
np.ones((2, 4), dtype=int)

array([[1, 1, 1, 1],
       [1, 1, 1, 1]])

- np.arange()

In [53]:
# np.arange(start, stop, step)
np.arange(0, 10, 3) # (4,)

array([0, 3, 6, 9])

- np.full()

In [54]:
# np.full(shape, value)
np.full((2, 4), 5.5) # (2,4)

array([[5.5, 5.5, 5.5, 5.5],
       [5.5, 5.5, 5.5, 5.5]])

- np.linspace()

In [33]:
# np.linspace(a, b, N)
# tạo mảng gồm N phần tử cách đều nhau từ a đến b
np.linspace(0, 1, 10) 

array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
       0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ])

### random

- np.random.random(size)

In [58]:
# np.random.random(size), size: an integer or a tuple
# return a 2x4 array with random floats in [0, 1)
np.random.random((2, 4))

array([[0.1289263 , 0.31542835, 0.36371077, 0.57019677],
       [0.43860151, 0.98837384, 0.10204481, 0.20887676]])

- np.random.rand(d0, d1, ..., dn)

In [50]:
# np.random.rand(d0, d1, ..., dn)
# return a 2x4 array with random floats in [0, 1)
np.random.rand(2,4)

array([[0.81216873, 0.47997717, 0.3927848 , 0.83607876],
       [0.33739616, 0.64817187, 0.36824154, 0.95715516]])

- np.random.seed()

In [38]:
# Seed for reproducibility
np.random.seed(0)
np.random.random((2,4))

array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ]])

- np.random.normal()

In [42]:
# np.random.normal(loc=0.0, scale=1.0, size=None)
# generate random numbers from a normal (Gaussian) distribution
np.random.normal(0, 1, (2,4))

array([[-0.88778575, -1.98079647, -0.34791215,  0.15634897],
       [ 1.23029068,  1.20237985, -0.38732682, -0.30230275]])

- np.random.randint()

In [62]:
# np.random.randint(low, high=None, size=None, dtype=int)
# generate random integers from low (inclusive) to high (exclusive)
np.random.randint(0, 10, (2, 4))

array([[3, 4, 4, 8],
       [4, 3, 7, 5]])

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)
## Array Indexing & Slicing

### 1-dimensional subarray

In [55]:
x1 = np.random.randint(20, size=6)
x1

array([17, 15,  4,  9, 10,  1])

In [67]:
# indexing
x1[4]

10

In [69]:
# slicing x[start:stop:step]
x1[0:4:1]

array([17, 15,  4,  9])

### multi-dimensional array

In [63]:
x2 = np.random.randint(10, size=(2, 4))
x2

array([[5, 0, 1, 5],
       [9, 3, 0, 5]])

In [66]:
x2[1][1]

3

In [71]:
x2[:2,:3]

array([[5, 0, 1],
       [9, 3, 0]])

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)
## Reshaping of Arrays & Transpose

### Reshape vs Resize

- reshape()

In [90]:
x1 = np.arange(1, 10) # (9,)
x1 = x1.reshape(9,1)
x1

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

- resize()

In [91]:
x2 = np.arange(1, 10) # (9,)
x2.resize(9,1)
x2

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

### Transpose (cols <-> rows)

In [18]:
x = np.array([[1, 2 ,3], [4, 5 ,6]])
x

array([[1, 2, 3],
       [4, 5, 6]])

In [19]:
x.T

array([[1, 4],
       [2, 5],
       [3, 6]])

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)
## Array Concatenation and Splitting

In [22]:
x = np.array([[1, 2 ,3], 
              [4, 5 ,6]])
y = np.array([[8, 8 ,8], 
              [9, 9 ,9]])

### np.concatenate, np.vstack and np.hstack

In [28]:
np.concatenate((x, y), axis=0)
# np.vstack((x, y))

array([[1, 2, 3],
       [4, 5, 6],
       [8, 8, 8],
       [9, 9, 9]])

In [30]:
np.concatenate((x, y), axis=1)
# np.hstack((x, y))

array([[1, 2, 3, 8, 8, 8],
       [4, 5, 6, 9, 9, 9]])

### np.split()

In [33]:
x = np.array([44, 55 ,66, 77, 88 ,99])

In [34]:
np.split(x, [2, 3])
# x1, x2, x3 = np.split(x, [2, 3])


[array([44, 55]), array([66]), array([77, 88, 99])]

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

## Broadcasting and Vectorized operations

Broadcasting is simply a set of rules for applying binary ufuncs (e.g., addition, subtraction, multiplication, etc.) on arrays of different sizes.

![image-broadcasting](https://jakevdp.github.io/PythonDataScienceHandbook/figures/02.05-broadcasting.png)

In [36]:
x = np.arange(3)
x

array([0, 1, 2])

In [38]:
x + 5 #broadcasting

array([5, 6, 7])

In [39]:
np.array([[1], [2], [3]]) + np.array([1, 1, 1]) 

array([[2, 2, 2],
       [3, 3, 3],
       [4, 4, 4]])

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

## Manipulating and Comparing arrays

### Aggregation
Aggregation = performing the same operation on a number of things

In [40]:
x = np.array([1, 2 ,3])

In [43]:
# Python build-in sum() function
sum(x)

6

In [44]:
# Numpy's sum() function
np.sum(x)

6

In [46]:
# Compare time-execute
massive_array = np.random.random(10000)
massive_array.shape

(10000,)

In [48]:
%timeit sum(massive_array)
%timeit np.sum(massive_array)

1.72 ms ± 197 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
21.9 µs ± 4.31 µs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

## Statistics

In [54]:
dog_height = np.array([600, 470, 170, 430, 300])

In [59]:
# Variance
np.var(dog_height)

21704.0

In [60]:
# Standard deviation
np.std(dog_height)

147.32277488562318

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

## Sorting array
np.sort() uses quicksort algorithm<br>
np.sort() sorts ascending only<br>
np.argsort() returns indices of the sorted elements

### Sort 1-dimensional array

In [89]:
x = np.random.randint(0, 100, 10)
x

array([67, 59, 11,  0, 41, 41, 75, 53, 70, 75])

In [90]:
# ascending
np.sort(x)

array([ 0, 11, 41, 41, 53, 59, 67, 70, 75, 75])

In [94]:
# descending
np.sort(x)[::-1]

array([75, 75, 70, 67, 59, 53, 41, 41, 11,  0])

In [96]:
np.argsort(x)

array([3, 2, 4, 5, 7, 1, 0, 8, 6, 9], dtype=int64)

### multi-dimensional array: sort along rows or columns

In [99]:
np.random.seed(42)
MatA = np.random.randint(0, 10, size=(4,6))
MatA

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [101]:
np.sort(MatA, axis=0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [102]:
np.sort(MatA, axis=1)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)
## Linear algebra

In [106]:
MatA  = np.array([[1, 2 ,3],
               [4, 5, 6],
               [7, 8 ,9]])

MatB = np.array([[1, 2],
                 [1, 2],
                 [1, 2]])

In [107]:
# MatA (3x3) dot product MatB (3x2)
MatA.dot(MatB)

array([[ 6, 12],
       [15, 30],
       [24, 48]])

In [108]:
MatA @ MatB

array([[ 6, 12],
       [15, 30],
       [24, 48]])

### Dot product example

In [110]:
# Number of Banh My sold
np.random.seed(0)

sales_amount = np.random.randint(0, 20, size=(5,3))
sales_amount

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [111]:
# Create weekly sales Dataframe
import pandas as pd

weekly_sales = pd.DataFrame(sales_amount, index=['Mon', 'Tue', 'Wed', 'Thu', 'Fri'], 
                            columns=['Thit nuong', 'Heo quay', 'Xuc xich'])
weekly_sales

Unnamed: 0,Thit nuong,Heo quay,Xuc xich
Mon,12,15,0
Tue,3,3,7
Wed,9,19,18
Thu,4,6,12
Fri,1,6,7


In [112]:
# Create price array

prices = np.array([10, 8, 12])

In [115]:
banhmy_prices = pd.DataFrame(prices.reshape(1, 3), index=['Price'], columns=['Thit nuong', 'Heo quay', 'Xuc xich'])
banhmy_prices

Unnamed: 0,Thit nuong,Heo quay,Xuc xich
Price,10,8,12


In [120]:
weekly_sales.shape, banhmy_prices.T.shape

((5, 3), (3, 1))

In [123]:
total_prices = weekly_sales @ banhmy_prices.T

total_prices

Unnamed: 0,Price
Mon,240
Tue,138
Wed,458
Thu,232
Fri,142


In [124]:
weekly_sales['Total price'] = total_prices

weekly_sales

Unnamed: 0,Thit nuong,Heo quay,Xuc xich,Total price
Mon,12,15,0,240
Tue,3,3,7,138
Wed,9,19,18,458
Thu,4,6,12,232
Fri,1,6,7,142
