In [2]:
import numpy as np

# 1. Creating Numpy Arrays from Python Lists


In [2]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [4]:
l1 = [3.14, 4, 2, 3]

In [5]:
# np upcast the list to float
np.array(l1)

array([3.14, 4.  , 2.  , 3.  ])

In [6]:
# specify the dtype
np.array([1, 2, 3, 4], dtype="float32")

array([1., 2., 3., 4.], dtype=float32)

In [9]:
# type of np array is numpy.ndarray
a1 = np.array([1, 2, 3, 4])
print(type(a1))

a2 = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
print(type(a2))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [10]:
# Shape of the array
a2.shape

(2, 4)


In [11]:
# Number of dimensions
a2.ndim

2


In [12]:
# type of the elements
a2.dtype

dtype('int32')

In [13]:
# number of elements
a2.size

8

# 2. Creating Numpy Arrays from Scratch


## `zeros, ones, full, arange, linspace`


In [19]:
# zeros(shape, dtype) to create an array of zeros (*shape we can pass tuple or list)
print(np.zeros((2, 4)))
print(np.zeros([2, 4], dtype="int32"))

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[0 0 0 0]
 [0 0 0 0]]


In [17]:
# ones(shape, dtype) to create an array of ones
print(np.ones([2, 4]))
print(np.ones((2, 4), dtype="int32"))

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[1 1 1 1]
 [1 1 1 1]]


In [22]:
# full(shape, fill_value, dtype) to create an array of fill_value
np.full((2, 4), 3.14)

array([[3, 3, 3, 3],
       [3, 3, 3, 3]])

In [20]:
# Create an array of a range of numbers
# Starting at 0, ending at 20, step 2
# arange(start, stop, step) ~ range()
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [23]:
# Create an array of five values evenly spaced between 0 and 1
# linspace(start, stop, number_of_values)
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

## `random.random, random.randint, random.normal, random.uniform`


In [41]:
# Create a 2x4 array of random numbers from 0 to 1
print(np.random.random((2, 4)))

# np.random.rand(shape) -> random numbers from 0 to 1 ~ random but no need to pass tuple or list
print(np.random.rand(2, 4))

[[0.40078491 0.40662835 0.44216716 0.26494677]
 [0.62320652 0.23854702 0.95480667 0.66576454]]
[[0.16167165 0.98055104 0.76434974 0.79346612]
 [0.86176148 0.78405107 0.98455937 0.90047793]]


In [28]:
# Seed -> to random number exactly the same
np.random.seed(0)
print(np.random.random((2, 4)))

np.random.seed(0)
print(np.random.random((2, 4)))

print("====")
print(np.random.random((2, 4)))

[[0.5488135  0.71518937 0.60276338 0.54488318]
 [0.4236548  0.64589411 0.43758721 0.891773  ]]
[[0.5488135  0.71518937 0.60276338 0.54488318]
 [0.4236548  0.64589411 0.43758721 0.891773  ]]
====
[[0.96366276 0.38344152 0.79172504 0.52889492]
 [0.56804456 0.92559664 0.07103606 0.0871293 ]]


In [39]:
# Create a 2x4 array of random floats in the normal distribution
# np.random.normal(mean, standard_deviation, shape)
np1 = np.random.normal(0, 10, (100, 100))
# Get the distribution of the random numbers
print(np.mean(np1))
print(np.std(np1))
np1

-0.05766647625706347
10.087312703555924


array([[  8.41432776,   3.35616344,  28.49966296, ...,   2.96857643,
         -3.46800091,   3.61105259],
       [-13.00056624,   8.04334726,   9.75909905, ...,   2.88629647,
          3.12607227, -13.71684723],
       [-17.11026191,   1.30361004,  13.65598202, ..., -17.2972111 ,
          5.52278908,  -6.6910795 ],
       ...,
       [-14.47137889,  11.3553628 ,  -6.76498776, ...,  -4.25858754,
         14.82695989,   8.57883009],
       [ 18.45852433,   5.03463893,  -1.14819605, ...,  -0.13613592,
        -19.23330733,   4.85264024],
       [ -0.07907644,  -2.12527339,  29.00436151, ..., -17.12077756,
         13.91502502,  -0.99151154]])

In [38]:
# Create a 2x4 array of random integers
# np.random.randint(start, end, shape)
np.random.randint(0, 10, (2, 4))

array([[6, 6, 0, 7],
       [2, 1, 5, 7]])

In [42]:
# Create a 3x3 of random float in the uniform distribution
# np.random.uniform(low, high, shape)
np.random.uniform(0, 1, (3, 3))

array([[0.6997268 , 0.40810494, 0.43459857],
       [0.23143361, 0.07335677, 0.80867559],
       [0.17220216, 0.24156345, 0.16554276]])

# 3. Array Indexing & Slicing


### One-dimensional Arrays


In [44]:
x1 = np.random.randint(20, size=6)
x1

array([17, 10, 18,  1, 16,  5])

In [46]:
x1[4], x1[0], x1[-1]

(16, 17, 5)

### Multi-dimensional Arrays


In [47]:
x2 = np.random.randint(10, size=(3, 4))
x2

array([[6, 6, 5, 0],
       [8, 0, 0, 0],
       [0, 1, 7, 7]])

In [50]:
x2[0, 1]

6

In [51]:
x2[0, 1] = 999
x2

array([[  6, 999,   5,   0],
       [  8,   0,   0,   0],
       [  0,   1,   7,   7]])

### Slicing `x[start:stop:step]`


In [52]:
x1

array([17, 10, 18,  1, 16,  5])

In [53]:
# Get the first 3 elements of x1
x1[0:3]

array([17, 10, 18])

In [54]:
# Get elements with step 2
x1[::2]

array([18,  1])

In [55]:
x2

array([[  6, 999,   5,   0],
       [  8,   0,   0,   0],
       [  0,   1,   7,   7]])

In [56]:
# two rows, three columns
x2[:2, :3]

array([[  6, 999,   5],
       [  8,   0,   0]])

# 4. Reshaping Arrays & Transposing


In [57]:
grid = np.arange(1, 10)
grid.shape

(9,)

In [58]:
grid.reshape((3, 3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [59]:
x = np.array([1, 2, 3])
x.shape

(3,)

In [64]:
x.reshape((1, 3))

array([[1, 2, 3]])

In [68]:
x = np.array([[1, 2], [3, 4]])

In [69]:
# Transpose
x.T

array([[1, 3],
       [2, 4]])

# 5. Array concatenation & splitting


### Concatenation: `np.concatenate, np.vstack, np.hstack`


In [70]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])

In [72]:
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [75]:
grid = np.array([[1, 2, 3], [4, 5, 6]])
print(grid.shape)
grid

(2, 3)


array([[1, 2, 3],
       [4, 5, 6]])

In [76]:
temp_grid = np.concatenate([grid, grid], axis=1)
print(temp_grid.shape)
temp_grid

(2, 6)


array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [79]:
x = np.array([1, 2, 3])
y = np.array([[99], [99]])

grid = np.array([[9, 8, 7], [6, 5, 4]])

In [78]:
# np.vstack() -> vertical stack
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [80]:
# np.hstack() -> horizontal stack
np.hstack([y, grid])

array([[99,  9,  8,  7],
       [99,  6,  5,  4]])

### Splitting: `np.split, np.vsplit, np.hsplit`


In [81]:
x = np.array([1, 2, 3, 99, 99, 3, 2, 1])

In [86]:
# np.split(array, [index1, index2, ...]) -> split the array at the index
np.split(x, [1, 3, 5])

[array([1]), array([2, 3]), array([99, 99]), array([3, 2, 1])]

In [83]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [85]:
# np.vsplit(array, [index1, index2, ...]) -> vertical split
upper, lower = np.vsplit(grid, [2])

print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [87]:
# np.hsplit(array, [index1, index2, ...]) -> horizontal split
left, right = np.hsplit(grid, [2])

print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


# 6. Broadcasting & Vectorized Operations


The term broadcasting describes how NumPy treats arrays with different shapes during arithmetic
operations. Subject to certain constraints, the smaller array is “broadcast” across the larger
array so that they have compatible shapes.
![image-2.png](attachment:image-2.png)


In [10]:
a = np.arange(3)
a

array([0, 1, 2])

In [91]:
a + 5  # broadcasting

array([5, 6, 7])

In [96]:
b = np.ones((3, 3))
b

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [93]:
a.shape, b.shape

((3,), (3, 3))

In [94]:
a + b

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [13]:
c = np.arange(3).reshape((3, 1))
c

array([[0],
       [1],
       [2]])

In [14]:
c + a

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

# 7. Manipulating & Comparing Arrays


### Aggregation: `sum, std, var, mean, median, min, max, argmin, argmax`

Aggregation = performing the same operation on a number of things


In [15]:
list_number = [1, 2, 3]

In [16]:
ll = np.array(list_number)
ll

array([1, 2, 3])

In [17]:
sum(ll)  # python sum

6

In [18]:
np.sum(ll)  # numpy sum -> faster

6

In [21]:
# Create a massive Numpy array
massive_array = np.random.random(10000)
print(massive_array.shape)
massive_array[:5]

(10000,)


array([0.19675179, 0.67408237, 0.99741319, 0.09905013, 0.45750601])

In [22]:
%timeit sum(massive_array)
%timeit np.sum(massive_array)

582 µs ± 76.5 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
4.76 µs ± 317 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [23]:
np.mean(massive_array)

0.49808941631682485

In [24]:
np.max(massive_array)

0.9999453475042197

In [25]:
np.min(massive_array)

4.7270857717052195e-07

In [39]:
dog_height = [600, 470, 170, 430, 300]
dog_height = np.array(dog_height)

# Standard Deviation and Variance (std = sqrt(var))
print("Standard Deviation: ", dog_height.std())
print("Variance: ", dog_height.var())
print(np.sqrt(dog_height.var()))

Standard Deviation:  147.32277488562318
Variance:  21704.0
147.32277488562318


In [40]:
# Average height of the dogs
print("Average(mean) height: ", dog_height.mean())
# Median height of the dogs
print("Median height: ", np.median(dog_height))

Average(mean) height:  394.0
Median height:  430.0


![image.png](attachment:image.png)


In [41]:
# Percentile height of the dogs -> returns the q-th percentile(s) of the array elements.
print(np.percentile(dog_height, 50))
print(np.percentile(dog_height, 25))
print(
    np.percentile(dog_height, 90)
)  # P90 = (5 - 1) * 90% = 3.6 -> 60% 4th element, 40% 5th element = 60% * 600 + 40% * 470 = 548

430.0
300.0
548.0


In [42]:
# Min max
print("Min height: ", np.min(dog_height))
print("Max height: ", np.max(dog_height))

Min height:  170
Max height:  600


In [43]:
# Argmax and Argmin -> returns the index of the max and min value
print("Index of the max height: ", np.argmax(dog_height))
print("Index of the min height: ", np.argmin(dog_height))

Index of the max height:  0
Index of the min height:  2


# 8. Sorting Arrays


![image.png](attachment:image.png)

### Mặc định, nếu bạn không chỉ định loại sắp xếp, np.sort sẽ sử dụng quicksort.


In [46]:
import numpy as np

arr = np.random.randint(0, 100, 10000)

# Sử dụng Quicksort (mặc định)
%timeit np.sort(arr, kind="quicksort")

# Sử dụng Heapsort
%timeit np.sort(arr, kind="heapsort")

# Sử dụng Mergesort
%timeit np.sort(arr, kind="mergesort")

# Sử dụng Timsort (Stable)
%timeit np.sort(arr, kind="stable")

299 µs ± 45.1 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
556 µs ± 39.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
414 µs ± 80.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
384 µs ± 39.4 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


![image.png](attachment:image.png)


In [50]:
# A related function is argsort, which instead returns the indices of the sorted elements:
x = np.array([2, 1, 4, 3, 5])
np.argsort(x)

array([1, 0, 3, 2, 4], dtype=int64)

### Sorting along rows or columns

Numpy's sorting algorithm is the ability to sort along specific rows or columns of multi-dimensional arrays.


In [52]:
np.random.seed(42)

MatA = np.random.randint(0, 10, size=(4, 6))
MatA

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [53]:
np.sort(MatA, axis=0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [54]:
np.sort(MatA, axis=1)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

# 9. Linear Algebra


In [61]:
A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [60]:
B = np.array([[6, 5], [4, 3], [2, 1]])
B

array([[6, 5],
       [4, 3],
       [2, 1]])

In [59]:
# A (3x3) dot product B (3x2) -> (3x2)
A.dot(B)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [62]:
# Another way to do dot product
A @ B

array([[20, 14],
       [56, 41],
       [92, 68]])

In [66]:
# 3x2 dot product 3x3 -> 3x3
# B.dot(A) # Error

# Transpose B to 2x3
print(B.T)
print(B.T.dot(A))
print(B.T @ A)

[[6 4 2]
 [5 3 1]]
[[36 48 60]
 [24 33 42]]
[[36 48 60]
 [24 33 42]]


### Dot product example


In [68]:
# Number of jars sold
np.random.seed(0)

sales_amount = np.random.randint(20, size=(5, 3))
sales_amount

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [69]:
# Create weekly_sales DataFrame
import pandas as pd

weekly_sales = pd.DataFrame(
    sales_amount,
    index=["Mon", "Tue", "Wed", "Thu", "Fri"],
    columns=["Almond Butter", "Peanut Butter", "Cashew Butter"],
)
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Mon,12,15,0
Tue,3,3,7
Wed,9,19,18
Thu,4,6,12
Fri,1,6,7


In [70]:
# create prices array
prices = np.array([10, 8, 12])
prices

array([10,  8, 12])

In [72]:
# create butter_prices DataFrame
butter_prices = pd.DataFrame(
    prices.reshape(1, 3),
    index=["Price"],
    columns=["Almond Butter", "Peanut Butter", "Cashew Butter"],
)
butter_prices

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Price,10,8,12


In [76]:
total_prices = weekly_sales.dot(butter_prices.T)
total_prices

Unnamed: 0,Price
Mon,240
Tue,138
Wed,458
Thu,232
Fri,142


In [78]:
weekly_sales["Total ($)"] = total_prices
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter,Total ($)
Mon,12,15,0,240
Tue,3,3,7,138
Wed,9,19,18,458
Thu,4,6,12,232
Fri,1,6,7,142
