<img src="https://user-images.githubusercontent.com/7065401/39118381-910eb0c2-46e9-11e8-81f1-a5b897401c23.jpeg"
    style="width:300px; float: right; margin: 0 40px 40px 40px;"></img>

# Numpy: Numeric computing library

NumPy (Numerical Python) is one of the core packages for numerical computing in Python. Pandas, Matplotlib, Statmodels and many other Scientific libraries rely on NumPy.

NumPy major contributions are:

* Efficient numeric computation with C primitives
* Efficient collections with vectorized operations
* An integrated and natural Linear Algebra API
* A C API for connecting NumPy with libraries written in C, C++, or FORTRAN.

Let's develop on efficiency. In Python, **everything is an object**, which means that even simple ints are also objects, with all the required machinery to make object work. We call them "Boxed Ints". In contrast, NumPy uses primitive numeric types (floats, ints) which makes storing and computation efficient.

In [236]:
import numpy as np

In [237]:
np.array([1,2,3,4])

array([1, 2, 3, 4])

In [238]:
np.array([1.1,2.2,3,4])


array([1.1, 2.2, 3. , 4. ])

Numpy sẽ convert tất cả phần tử sang kiểu dữ liệu cao nhất có trong array

In [239]:
np.array([1.1,2.2,3,4], dtype  = 'float32')

array([1.1, 2.2, 3. , 4. ], dtype=float32)

array([1.1, 2.2, 3. , 4. ], dtype=float32)

In [None]:
np.array([1.1,2.2,3,4], dtype  = 'int')

In [None]:
X = np.array([1.1,2.2,3,4])

In [242]:
type(X)

numpy.ndarray

In [243]:
X.shape

(4,)

In [244]:
X.ndim

1

In [245]:
X.dtype

dtype('float64')

# Creating Numpy Array from Scratch

In [246]:
np.zeros([2,4])

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [247]:
np.ones([2,4])

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [248]:
#Tạo 1 mảng range từ 0 , 20 với step = 2
np.arange(0, 21,2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [249]:
np.full([2,3],9)

array([[9, 9, 9],
       [9, 9, 9]])

In [250]:
#tạo array có các khoảng cách nhau đoạn tuyến tính ( linear space)
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

# Random

In [251]:
np.random.random((4,4))

array([[0.7670237 , 0.41182014, 0.67543908, 0.24979628],
       [0.31321833, 0.96541622, 0.58846509, 0.65966841],
       [0.53320625, 0.23053302, 0.39486929, 0.61880856],
       [0.47486752, 0.47013219, 0.71607453, 0.287991  ]])

In [254]:
#Seed for reproducibility
#muốn chạy random bao nhiêu lần thì nó đều ra kết quả như giá trị chạy đầu tiên
np.random.seed(0)
np.random.random((4,4))

array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ],
       [0.96366276, 0.38344152, 0.79172504, 0.52889492],
       [0.56804456, 0.92559664, 0.07103606, 0.0871293 ]])

In [255]:
np.random.normal(0,1, (3,3))

array([[ 0.44386323,  0.33367433,  1.49407907],
       [-0.20515826,  0.3130677 , -0.85409574],
       [-2.55298982,  0.6536186 ,  0.8644362 ]])

In [256]:
np.random.randint(3,4, (4,5))

array([[3, 3, 3, 3, 3],
       [3, 3, 3, 3, 3],
       [3, 3, 3, 3, 3],
       [3, 3, 3, 3, 3]])

In [257]:
np.random.rand(3,4,5)

array([[[0.45615033, 0.56843395, 0.0187898 , 0.6176355 , 0.61209572],
        [0.616934  , 0.94374808, 0.6818203 , 0.3595079 , 0.43703195],
        [0.6976312 , 0.06022547, 0.66676672, 0.67063787, 0.21038256],
        [0.1289263 , 0.31542835, 0.36371077, 0.57019677, 0.43860151]],

       [[0.98837384, 0.10204481, 0.20887676, 0.16130952, 0.65310833],
        [0.2532916 , 0.46631077, 0.24442559, 0.15896958, 0.11037514],
        [0.65632959, 0.13818295, 0.19658236, 0.36872517, 0.82099323],
        [0.09710128, 0.83794491, 0.09609841, 0.97645947, 0.4686512 ]],

       [[0.97676109, 0.60484552, 0.73926358, 0.03918779, 0.28280696],
        [0.12019656, 0.2961402 , 0.11872772, 0.31798318, 0.41426299],
        [0.0641475 , 0.69247212, 0.56660145, 0.26538949, 0.52324805],
        [0.09394051, 0.5759465 , 0.9292962 , 0.31856895, 0.66741038]]])

# Array index and Slicing

## One-dimensional subarray

In [258]:
X1 = np.random.randint(20,size = 6)

In [259]:
X1

array([19, 16,  0,  0,  6, 19])

In [260]:
X1[4]

In [261]:
X1[:-1]

array([19, 16,  0,  0,  6])

## Multi-dimension array

In [262]:
X2 = np.random.randint(10, size = (2,3))

In [263]:
X2

array([[3, 8, 8],
       [8, 2, 3]])

## Slicing

#### X[start:stop:step]

In [264]:
X2[0:1]

array([[3, 8, 8]])

In [265]:
X2[0:1, 0:2]

array([[3, 8]])

In [266]:
X2[:2, :2]

array([[3, 8],
       [8, 2]])

In [267]:
X1 = np.array([1,3,4,52,4])


In [268]:
X1[2 : 6]

array([ 4, 52,  4])

In [269]:
X1[::2]

array([1, 4, 4])

## Reshaping of Array and Transpose

In [270]:
grid = np.arange(1,10)

grid.shape

(9,)

In [271]:
grid.reshape((3,3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [272]:
grid.reshape((3,3)).T

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

## Array Concantenation and Splitting

### Concantenation

In [273]:
x = np.array([1,2,3])
y = np.array([3,2,1])

In [274]:
np.concatenate((x,y), axis = 0)

array([1, 2, 3, 3, 2, 1])

In [275]:
x = np.random.randint(5,size = (3,3))
y = np.random.randint(5,size = (3,3))

In [276]:
x

array([[2, 3, 0],
       [0, 0, 3],
       [0, 2, 2]])

In [277]:
y

array([[0, 4, 3],
       [4, 0, 4],
       [3, 3, 4]])

In [278]:
np.concatenate((x,y), axis = 0)

array([[2, 3, 0],
       [0, 0, 3],
       [0, 2, 2],
       [0, 4, 3],
       [4, 0, 4],
       [3, 3, 4]])

In [279]:
np.concatenate((x,y), axis = 0).shape

(6, 3)

In [280]:
np.concatenate((x,y), axis = 1)

array([[2, 3, 0, 0, 4, 3],
       [0, 0, 3, 4, 0, 4],
       [0, 2, 2, 3, 3, 4]])

In [281]:
np.concatenate((x,y), axis = 1).shape

(3, 6)

In [282]:
#vstack ( vertical stack)
#Nối theo chiều ngang ( thêm hàng)
x = np.array([1,2,3])
y = np.array([[0, 3, 2],
       [0, 0, 1],
       [1, 0, 3]])

In [283]:
np.vstack((x,y))

array([[1, 2, 3],
       [0, 3, 2],
       [0, 0, 1],
       [1, 0, 3]])

In [284]:
#hstack(horixontal stack)
#nối theo chiều dọc ( thêm dòng)
y = np.array([0, 3, 2],
       )
np.hstack((x,y))

array([1, 2, 3, 0, 3, 2])

### Splitting array

In [285]:
x = np.array([1,2,3,4,21,42])

In [286]:
np.split(x, [3,5])

[array([1, 2, 3]), array([ 4, 21]), array([42])]

In [287]:
x1,x2,x3 = np.split(x, [3,5])

# Broadcasting and Vectorize operations

### Broadcasting is simply a set of rules for applying binary ufuncs (e.g., addition, subtraction, multiplication, etc.) on arrays of different sizes.

![image-broadcasting](https://jakevdp.github.io/PythonDataScienceHandbook/figures/02.05-broadcasting.png)

### Dễ hiểu hơn là numpy sẽ nhân bản các phần tử sao cho 2 ma trận sẽ có cùng chiều hoặc có số chiều phù hợp để thực hiện các phép tính

In [288]:
a = np.arange(3)

In [289]:
a

array([0, 1, 2])

In [290]:
a + 5

array([5, 6, 7])

In [291]:
b = np.ones((3,3))
b

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [292]:
a + b

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

# Manipulating and Comparing Arrays

## Aggregation 

#### Agreegation = performing the same operation on a number of thing

In [293]:
list_num = [1,2,3]

In [295]:
ll = np.array (list_num)

In [296]:
ll


array([1, 2, 3])

In [297]:
sum(ll)

6

In [298]:
np.sum(ll)

6

In [299]:
massive_array = np.random.random(10000)

In [300]:
massive_array[:5]

array([0.7670237 , 0.41182014, 0.67543908, 0.24979628, 0.31321833])

In [301]:
massive_array.shape

(10000,)

In [303]:
%timeit sum(massive_array) # python build in function sum()

1.19 ms ± 97 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [304]:
%timeit np.sum(massive_array)

13.4 µs ± 96.8 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


## Độ lệch chuẩn và phương sai trong numpy 

### ví dụ tính độ lệch chuẩn và phương sai trong chiều cao những con chóa

In [305]:
# chiều cao các con chó
dog_height = [600, 470, 170, 430, 300]

In [306]:
dog_height = np.array(dog_height)

In [307]:
# độ lệch chuẩn
np.std(dog_height)

147.32277488562318

In [309]:
# phương sai 
np.std(dog_height) ** 2

21704.000000000004

In [311]:
# phương sai hàm có sẵn
np.var(dog_height)

21704.0

# Sorting Array

### NP.SORT dựa trên thauatj toán quicksort (O(nlogn))

In [312]:
x = np.array([2,1,4,3,5])

In [313]:
np.sort(x)

array([1, 2, 3, 4, 5])

In [314]:
#A related function is argsort, which instead returns the indices of the sorted elements:
np.argsort(x)

array([1, 0, 3, 2, 4], dtype=int64)

### Sorting along rows or columns 

In [317]:
np.random.seed(42)

matA = np.random.randint(0,10, size = (4,6))

In [318]:
matA

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [319]:
#sắp xếp theo cột
np.sort(matA, axis = 0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [320]:
# sắp xếp theo hàng
np.sort(matA, axis = 1)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

# Linear Algebra

In [324]:
np.random.seed()
A = np.random.randint(0,10, size = (3,3))

In [325]:
A

array([[0, 7, 7],
       [6, 6, 8],
       [4, 1, 0]])

In [329]:
np.random.seed()
B= np.random.randint(0,10, size = (3,2))

In [330]:
B

array([[5, 8],
       [4, 6],
       [5, 9]])

#### DOT product 

In [331]:
# cách 1
A.dot(B)

array([[ 63, 105],
       [ 94, 156],
       [ 24,  38]])

In [332]:
# cách 2 
A @ B

array([[ 63, 105],
       [ 94, 156],
       [ 24,  38]])

In [333]:
# Number of jars sold

np.random.seed(0)
sales_amounts = np.random.randint(20, size =(5,3))

In [334]:
sales_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [336]:
#Create weekly DataFrame
import pandas as pd

In [342]:
weekly_sales = pd.DataFrame(sales_amounts, index = ['Mon', 'Tue', 'Wed', 'Thurs', 'Fri'], columns = ['Things 1', 'Things 2', 'Things 3'])

In [343]:
weekly_sales

Unnamed: 0,Things 1,Things 2,Things 3
Mon,12,15,0
Tue,3,3,7
Wed,9,19,18
Thurs,4,6,12
Fri,1,6,7


In [346]:
#create a price array

prices = [10, 20 , 30]
prices = np.array(prices)
things_prices  = pd.DataFrame(prices.reshape(1,3), index = ['Price'], columns = ['Things 1', 'Things 2', 'Things 3'])

In [347]:
things_prices

Unnamed: 0,Things 1,Things 2,Things 3
Price,10,20,30


### Tính tổng số tiền bán ra 5 thứ  

In [349]:
weekly_sales.shape, things_prices.shape

((5, 3), (1, 3))

In [462]:
total_price = weekly_sales.dot(things_prices.T)

In [463]:
total_price

Unnamed: 0,Price
Mon,420
Tue,300
Wed,1010
Thurs,520
Fri,340


In [464]:
total_price.sum()

Price    2590
dtype: int64

In [465]:
weekly_sales['Total Price'] = total_price

In [466]:
weekly_sales

Unnamed: 0,Things 1,Things 2,Things 3,Total Price
Mon,12,15,0,420
Tue,3,3,7,300
Wed,9,19,18,1010
Thurs,4,6,12,520
Fri,1,6,7,340
