# Numpy: Numeric computing library

NumPy (Numerical Python) is one of the core packages for numerical computing in Python.

Pandas, Matplotlib, Statmodels and many other Scientific libiaries rely on NumPy.

NumPy major contributions are:
- Efficient numeric computation with C primitives
- Efficient collections with vectorized operations
- An integrated and natural Linear Algebra API
- A C API for connecting NumPy with libraries written in C, C++, or FORTRAN.

Let's develop on efficiency. In Python, **everything is an object**, which means that even simple ints are also objects, with all the required machinery to make object work. We call them "Boxed Ints". In contrast, NumPy uses primitive numeric types (floats, ints) which makes storing and computation efficient.

In [4]:
import numpy as np

# Creating Numpy Arrays from Python Lists

In [2]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [3]:
np.array([3.14, 4, 2, 3]) # auto convert to float

array([3.14, 4.  , 2.  , 3.  ])

In [7]:
np.array([1, 2, 3, 4], dtype='float32')

array([1., 2., 3., 4.], dtype=float32)

In [9]:
a1 = np.array([1, 2, 3, 4])
type(a1)

numpy.ndarray

In [11]:
a2 = np.array([[1, 2, 3],
               [4, 5, 6]])
type(a2)

numpy.ndarray

In [12]:
a2.shape

(2, 3)

In [13]:
a2.ndim

2

In [14]:
a2.dtype

dtype('int64')

In [15]:
a2.size # tổng element trong array

6

# Creating Numpy Arrays from Scratch

`zeros`, `ones`, `full`, `arange`, `linspace`


Ấn `shift` + `Tab` để xem docs

In [33]:
np.zeros(shape = [2, 4], dtype = int) # shape có 2 hàng 4 cột

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

In [34]:
np.ones(shape = [4, 4], dtype = float)

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [35]:
np.arange(start = 0, stop = 20, step = 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [37]:
np.full((3, 5), 6.9) # fill toàn là 6.9 với 3 hàng 5 cột

array([[6.9, 6.9, 6.9, 6.9, 6.9],
       [6.9, 6.9, 6.9, 6.9, 6.9],
       [6.9, 6.9, 6.9, 6.9, 6.9]])

In [40]:
np.linspace(0, 1, 5) # chia đều từ 0 đến 1 sao cho có 5 phần tử

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

# Random

In [49]:
# Seed
np.random.seed(0)
np.random.random((4, 4))

array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ],
       [0.96366276, 0.38344152, 0.79172504, 0.52889492],
       [0.56804456, 0.92559664, 0.07103606, 0.0871293 ]])

In [51]:
np.random.random((4, 4))

array([[0.45615033, 0.56843395, 0.0187898 , 0.6176355 ],
       [0.61209572, 0.616934  , 0.94374808, 0.6818203 ],
       [0.3595079 , 0.43703195, 0.6976312 , 0.06022547],
       [0.66676672, 0.67063787, 0.21038256, 0.1289263 ]])

In [60]:
np.random.rand(4, 4) # tương tự cái trên, nhưng không cần cho list vào

array([[0.43328806, 0.75610669, 0.39609828, 0.89603839],
       [0.63892108, 0.89155444, 0.68005557, 0.44919774],
       [0.97857093, 0.11620191, 0.7670237 , 0.41182014],
       [0.67543908, 0.24979628, 0.31321833, 0.96541622]])

In [53]:
np.random.normal(0, 1, (3, 3)) # mô phỏng phân phối chuẩn

array([[-1.42001794, -1.70627019,  1.9507754 ],
       [-0.50965218, -0.4380743 , -1.25279536],
       [ 0.77749036, -1.61389785, -0.21274028]])

In [58]:
np.random.randint(0, 10, (3, 4)) # không bao gồm 10 (max)

array([[7, 2, 9, 2],
       [3, 3, 2, 3],
       [4, 1, 2, 9]])

# Arrray Indexing & Slicing

## One-dimensional subarray

In [61]:
x1 = np.random.randint(20, size = 6)

In [62]:
x1

array([16, 19,  1,  8,  0,  4])

In [66]:
x1[4], x1[0], x1[-1]

(0, 16, 4)

## Multi-dimensional array

In [68]:
x2 = np.random.randint(10, size = (3,4))

In [69]:
x2

array([[9, 6, 5, 7],
       [8, 8, 9, 2],
       [8, 6, 6, 9]])

In [72]:
x2[1, 2]

9

In [73]:
x2[1, 2] = 6

In [74]:
x2

array([[9, 6, 5, 7],
       [8, 8, 6, 2],
       [8, 6, 6, 9]])

## Slicing

In [75]:
x1

array([16, 19,  1,  8,  0,  4])

In [77]:
x1[0:3]

array([16, 19,  1])

In [78]:
x1[2:4]

array([1, 8])

In [79]:
# every other element, every 2 step
x1[::2]

array([16,  1,  0])

In [80]:
x2

array([[9, 6, 5, 7],
       [8, 8, 6, 2],
       [8, 6, 6, 9]])

In [82]:
x2[:2, :3] # 2 dòng đầu tiên và 3 cột đầu tiên

array([[9, 6, 5],
       [8, 8, 6]])

In [86]:
x2[:, :2] # lấy 2 cột đầu tiên

array([[9, 6],
       [8, 8],
       [8, 6]])

# Reshaping of Arrays & Transpose

In [88]:
grid = np.arange(1, 10)
grid.shape

(9,)

In [89]:
grid.reshape((3, 3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [90]:
x = np.array([1, 2, 3])

In [91]:
x.shape

(3,)

In [94]:
x.reshape((1, 3)).shape

(1, 3)

In [96]:
x = np.array([[1., 2.], [3., 4.]])

In [99]:
x # 2 hàng 2 cột

array([[1., 2.],
       [3., 4.]])

In [101]:
x.T # transpose

array([[1., 3.],
       [2., 4.]])

# Array Concatenation and Splitting

In [103]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])

In [107]:
np.concatenate((x, y), axis = 0) # axis = 0 (rows), axis = 1 (cols)

array([1, 2, 3, 3, 2, 1])

In [108]:
grid = np.array([[1, 2, 3],
                 [4, 5, 6]])
grid

array([[1, 2, 3],
       [4, 5, 6]])

In [110]:
np.concatenate((grid, grid), axis = 1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [111]:
# vstack và hstack: rõ ràng hơn là nối hay chiều ngang hay dọc
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],
                 [6, 5, 2]])
np.vstack((x, grid))

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 2]])

In [113]:
y = np.array([[99], [99]])
np.hstack((y, grid))

array([[99,  9,  8,  7],
       [99,  6,  5,  2]])

## Splitting

In [114]:
x = np.array([1, 2, 3, 99,69,3,2,1])

In [121]:
x1, x2, x3 = np.split(x, [3, 5]) # cắt tại vị trí 3 và 5

In [122]:
x1

array([1, 2, 3])

In [123]:
x2

array([99, 69])

In [124]:
x3

array([3, 2, 1])

# Broadcasting and Vectorized operations

Broadcasting is simply a set of rules for applying binary funcs (e.g., addition, subtraction, multiplication, etc.) on arrays of different sizes.

In [5]:
a = np.arange(3)

In [6]:
a

array([0, 1, 2])

In [7]:
a + 5 # broadcasting

array([5, 6, 7])

In [129]:
b = np.ones((3, 3))

In [131]:
a.shape, b.shape

((3,), (3, 3))

In [132]:
a + b

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [134]:
a * b

array([[0., 1., 2.],
       [0., 1., 2.],
       [0., 1., 2.]])

In [135]:
c = np.arange(3).reshape((3, 1))

In [136]:
c

array([[0],
       [1],
       [2]])

In [137]:
a + c

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

# Manipulating & Comparing Arrays

### Aggregation
Aggregation = performing the same operation on a number of things

In [139]:
list_number = [1, 2, 3]

In [140]:
ll = np.array(list_number)

In [141]:
ll

array([1, 2, 3])

In [143]:
sum(ll) # Python sum()

6

In [147]:
np.sum(ll) # Numpy sum() # tính toán nhanh hơn

6

In [148]:
# Create a massive Numpy array

massive_array = np.random.random(10000)
massive_array.shape

(10000,)

In [149]:
%timeit sum(massive_array) # Python built-in function sum()
%timeit np.sum(massive_array) # Numpy sum()

391 µs ± 12.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
3.25 µs ± 114 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


- Built-in `sum()` mất 391 micro second
- `np.sum()` mất 3.25 micro second

<mark> **Phải dùng numpy khi làm việc với mảng đa chiều** </mark>

In [150]:
np.mean(massive_array)

0.5005009622044586

In [151]:
np.max(massive_array)

0.9999709462497284

In [152]:
np.min(massive_array)

2.0027530115096503e-05

In [156]:
dog_height = [600, 470, 170, 430, 300]
dog_height = np.array(dog_height)

In [157]:
np.std(dog_height)

147.32277488562318

In [158]:
np.sqrt(np.var(dog_height))

147.32277488562318

# Sorting Arrays
`np.sort` uses an quicksort algorithm

In [159]:
x = np.array([2, 1, 4, 3, 5])
np.sort(x)

array([1, 2, 3, 4, 5])

In [163]:
np.argsort(x)

array([1, 0, 3, 2, 4])

## Sorting along rows or columns
NumPy's sorting algorithms is the ability to sort along specific rows or columns of a multidimensional array using the axis argument

In [166]:
np.random.seed(42)

MatA = np.random.randint(0, 10, size=(4, 6))

In [167]:
MatA

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [169]:
np.sort(MatA, axis = 0) # sắp xếp theo cột

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [170]:
np.sort(MatA, axis = 1) # sắp xếp theo hàng

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

# Linear Algebra

In [173]:
A = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])

In [174]:
B = np.array([[6, 5],
              [4, 3],
              [2, 1]])

In [176]:
A.dot(B) # Nhân ma trận

array([[20, 14],
       [56, 41],
       [92, 68]])

In [178]:
A @ B # Nhân ma trận (cách 2)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [179]:
B.T @ A

array([[36, 48, 60],
       [24, 33, 42]])

## Dot Product Example

In [180]:
# Number of jars sold
np.random.seed(0)

sales_amounts = np.random.randint(20, size=(5, 3))

In [181]:
sales_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [184]:
# Create weeklu_sales DataFrame
import pandas as pd
weekly_sales = pd.DataFrame(sales_amounts, index = ["Mon", "Tues", "Wed", "Thurs", "Fri"],
                           columns = ["Almond Butter", "Peanut Butter", "Cashew Butter"])

In [185]:
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Mon,12,15,0
Tues,3,3,7
Wed,9,19,18
Thurs,4,6,12
Fri,1,6,7


In [187]:
# Create a price array
prices = np.array([10, 8, 12]).reshape((1, 3))

In [188]:
butter_prices = pd.DataFrame(prices, index = ["Price"], columns = ["Almond Butter", "Peanut Butter", "Cashew Butter"])

In [189]:
butter_prices

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Price,10,8,12


In [190]:
weekly_sales.shape, butter_prices.shape

((5, 3), (1, 3))

In [191]:
total_price = weekly_sales @ butter_prices.T

In [192]:
total_price

Unnamed: 0,Price
Mon,240
Tues,138
Wed,458
Thurs,232
Fri,142


In [193]:
weekly_sales['Total Price'] = total_price
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter,Total Price
Mon,12,15,0,240
Tues,3,3,7,138
Wed,9,19,18,458
Thurs,4,6,12,232
Fri,1,6,7,142
