# Lecture 4: Introduction to Numpy

In [3]:
import numpy as np

## Tổng quan
- Numpy (Numeric Python): là một thư viện toán học phổ biến và mạnh mẽ của Python. Cho phép làm việc hiệu quả với ma trận và mảng
- Các phẩn tử của array phải cùng kiểu dtype (ép kiểu sẽ xảy ra khi cần thiết)
- Numpy array là party mutable: có thể thay đổi các element đang có, nhưng không thể thay đổi kích thước của cả array (no apeend, remove, delete)

### Tạo Numpy array

In [2]:
# 1. From a list
l = [1, 2, 3, 4]
a = np.array(l)

print(a)
print(type(a))
print(a.dtype)

[1 2 3 4]
<class 'numpy.ndarray'>
int32


In [3]:
# 2. From a tuple
t = (1, 2, 3, 4)
a = np.array(t)

print(a)
print(type(a))
print(a.dtype)

[1 2 3 4]
<class 'numpy.ndarray'>
int32


In [4]:
t = (1, 2, 3, 'a')
a = np.array(t)

a

array(['1', '2', '3', 'a'], dtype='<U11')

In [6]:
# Using numpy.arange()
a = np.arange(9)

print(a)
print(type(a))
print(a.dtype)

[0 1 2 3 4 5 6 7 8]
<class 'numpy.ndarray'>
int32


In [4]:
# 4. Multi-dimensional array
a = np.array([[1,2],[3,4]])

print(a)
print(type(a))  # type của toàn bộ a
print(a.dtype)  # type của từng element trong a 

[[1 2]
 [3 4]]
<class 'numpy.ndarray'>
int32


In [10]:
# Try to modify an array (OK)
a = np.arange(9)
print(a)

a[0] = 99
print(a)

[0 1 2 3 4 5 6 7 8]
[99  1  2  3  4  5  6  7  8]


In [11]:
# Try to modify an array'size (Error)
a = np.arange(9)
print(a)

del a[0]

[0 1 2 3 4 5 6 7 8]


ValueError: cannot delete array elements

In [12]:
# Convert bool to int
a = np.array([True, False, 1, 2])
print(a)
print(a.dtype)

[1 0 1 2]
int32


In [13]:
# Convert in to string
a = np.array([1, 2, "a"])

print(a)
print(a.dtype)

['1' '2' 'a']
<U11


In [12]:
# # Other method to create arrays
a = np.zeros((2,2))   
print(a)              # mảng dữ liệu 0

b = np.ones((1,2))
print(b)              # mảng dữ liệu 1

c = np.full((2,2), 7)
print(c)

d = np.eye(2)
print(d)              # ma trận chéo

e = np.arange(0,4)
print(e)

[[0. 0.]
 [0. 0.]]
[[1. 1.]]
[[7 7]
 [7 7]]
[[1. 0.]
 [0. 1.]]
[0 1 2 3]


### Thao tác với Numpy array

#### Lấy thông tin cơ bản

In [18]:
a = np.array([1,2,3])
b = np.array([[1,2],[3,4]])

In [20]:
#Size
print(a.size)   
print(len(a))

print(b.size)   #size - 4 phần tử
print(len(b))   #len = 2 ele lists

3
3
4
2


In [21]:
# dtype
print(a.dtype)
print(b.dtype)

int32
int32


In [22]:
# shape
print(a.shape)
print(b.shape)

(3,)
(2, 2)


In [23]:
# dimension
print(a.ndim)
print(b.ndim)

1
2


#### Indexing & Slicing

##### 1-D array

In [26]:
a = np.arange(12)
print(a)

[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [30]:
# 1st element
print(a[0])

# last element
print(a[-1])

# 1st to 3rd
print(a[:3])

# 3rd to last
print(a[3:])

0
11
[0 1 2]
[ 3  4  5  6  7  8  9 10 11]


In [13]:
a = np.array([[1,2,3,4], [5,6,7,8,], [9,10,11,12]])
print(a)
print(a[:2, 1:3])

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[[2 3]
 [6 7]]


##### n-D array

In [33]:
# Note: try with order = "F"
a = np.arange(9).reshape((3,3))
print(a)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [36]:
# Element at 0,0
print(a[0,0])

# First row
print(a[0]) #print(a[0,:])l

# last row 
print(a[-1])

0
[0 1 2]
[6 7 8]


In [37]:
# First column
print(a[:,0])

# last column
print(a[:,-1])

# First and Last column
print(a[:,[0,-1]])

#Everything
print(a[:,:])

[0 3 6]
[2 5 8]
[[0 2]
 [3 5]
 [6 8]]
[[0 1 2]
 [3 4 5]
 [6 7 8]]


### Array math

In [38]:
import numpy as np

x = np.array([[1,2],[3,4]], dtype = np.float64)
y = np.array([[5,6],[7,8]], dtype = np.float64)
print(x)
print(y)

[[1. 2.]
 [3. 4.]]
[[5. 6.]
 [7. 8.]]


In [39]:
# Elementwise sum ; both produce the array
print(x + y)
print(np.add(x,y))

[[ 6.  8.]
 [10. 12.]]
[[ 6.  8.]
 [10. 12.]]


In [40]:
# Elementwise difference ; both produce the array
print(x - y)
print(np.subtract(x,y))

[[-4. -4.]
 [-4. -4.]]
[[-4. -4.]
 [-4. -4.]]


In [41]:
# Elementwise product ; both produce the array
print(x * y)
print(np.multiply(x,y))

[[ 5. 12.]
 [21. 32.]]
[[ 5. 12.]
 [21. 32.]]


In [45]:
# Elementwise square root; product the array
print(np.sqrt(x))

[[1.         1.41421356]
 [1.73205081 2.        ]]


In [46]:
import numpy as np

x = np.array([[1,2],[3,4]])
y = np.array([[5,6],[7,8]])

v = np.array([9,10])
w = np.array([11,12])

In [48]:
# Inner product of vectors; both produce 219
print(v.dot(w))
print(np.dot(v,w))

# Vector multiplies Vector
#9*11 + 10*12

219
219


In [49]:
# Matrix / vector product; both produce the rank 1 array [29 67]
print(x.dot(v))
print(np.dot(x,v))

# Matrix multiplies Vector
#(1*9 + 2*10 , 3*9 + 4*10)

[29 67]
[29 67]


In [50]:
# Matrix / Matrix product; both produce the rank 2 array
print(x.dot(y))
print(np.dot(x,y))

#Matrix multiplies Matrix
# 1*5 + 2*7  ,  1*6 + 2*8
# 3*5 + 4*7  ,  3*6 + 4*8

[[19 22]
 [43 50]]
[[19 22]
 [43 50]]


In [14]:
x = np.array([[1,2],[3,4]])
print(x)

print(np.sum(x))   #compute all elements; output = 10
print(np.sum(x, axis = 0))   #compute sum of each column; output = [4,6]
print(np.sum(x, axis = 1))   #compute sum of each row, output = [3,7]

[[1 2]
 [3 4]]
10
[4 6]
[3 7]


In [53]:
x = np.array([[1,2],[3,4]])

print(x)

print(x.T) # Phương trình nghịch đảo #Đảo chiều #Transpose

[[1 2]
 [3 4]]
[[1 3]
 [2 4]]


In [55]:
# Note that taking the transpose of a rank 1 array does nothing
v = np.array([1,2,3])
print(v)
print(v.T)

'''
Lý do Vector trong python chỉ có 1 chiều ngang, không cả ngang và dọc nên không transpose được

[1 2 3]
[1 2 3]


### Vectorization (broadcasting)

In [56]:
import timeit

In [58]:
a = np.arange(10)
print(a)

[0 1 2 3 4 5 6 7 8 9]


In [62]:
# Add
print(a + 10)
print(timeit.timeit(setup = 'import numpy as np', stmt = 'np.arange(10) + 10'))



[10 11 12 13 14 15 16 17 18 19]
1.588975299964659


In [63]:
# Nếu dùng pure Python
print([x + 10 for x in a])
print(timeit.timeit(setup = 'import numpy as np', stmt = '[x + 10 for x in np.arange(10)]'))

#Nhược điểm: Dài hơn và Chậm hơn

[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
2.502280000015162


In [67]:
# Subtract
print(a - 8)

# Multiply
print(a * 10)

# Exponential
print(a ** 3)

[-8 -7 -6 -5 -4 -3 -2 -1  0  1]
[ 0 10 20 30 40 50 60 70 80 90]
[  0   1   8  27  64 125 216 343 512 729]


### [Important] Masking (lọc dữ liệu)

In [68]:
# Note: phần tử arrat có thể khác vì dùng random
a = np.random.randint(-10, 10, size = 10)
print(a)

[  4  -9   5   6   4  -1  -5 -10  -4   9]


In [69]:
# Masking a > 0
print(a > 0)

[ True False  True  True  True False False False False  True]


In [72]:
# Dùng masking để filter
mask = a > 0
print(a[mask])
print(sorted(a[mask]))

[4 5 6 4 9]
[4, 4, 5, 6, 9]


In [73]:
# Dùng masking để filter (2)
print(a[a > 0])

[4 5 6 4 9]


In [74]:
# Lấy các số chẵn
print(a[a % 2 ==0])

[  4   6   4 -10  -4]


In [76]:
mask = a > 5
print(mask)

[False False False  True False False False False False  True]


In [77]:
# Kiểm tra với mọi element (all)
print(np.all(a > 0))

False


In [79]:
# Kiểm tra tồn tại (any)
print(np.any(a > 0))

True


## Structured Array

Cần thì xem lại, vì có trong Pandas 

## Thống kê mô tả (Descriptive Statistics)

### 1-D array

In [87]:
a = np.arange(11)
print(a)

#Min
print(a.min())
print(np.min(a))

#Max
print(a.max())
print(np.max(a))

#Mean
print(a.mean())
print(np.mean(a))

#Median
print(np.median(a))

#Standard deviation
print(np.std(a))

[ 0  1  2  3  4  5  6  7  8  9 10]
0
0
10
10
5.0
5.0
5.0
3.1622776601683795


### 2-D array

In [96]:
a = np.arange(100).reshape(10,10)
print(a)

[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]
 [30 31 32 33 34 35 36 37 38 39]
 [40 41 42 43 44 45 46 47 48 49]
 [50 51 52 53 54 55 56 57 58 59]
 [60 61 62 63 64 65 66 67 68 69]
 [70 71 72 73 74 75 76 77 78 79]
 [80 81 82 83 84 85 86 87 88 89]
 [90 91 92 93 94 95 96 97 98 99]]


In [99]:
#Mean row-wise (across rows)
print(a.mean(axis = 0))

#column
print(a.mean(axis = 1))

[45. 46. 47. 48. 49. 50. 51. 52. 53. 54.]
[ 4.5 14.5 24.5 34.5 44.5 54.5 64.5 74.5 84.5 94.5]


## Exercise

In [103]:
'''
    Create a 1D array of numbers from 0 to 9
'''
import numpy as np
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [106]:
'''
    Extract all odd numbers from arr
    Replace all odd numbers in arr with -1
'''
a [a % 2 > 0] = -1
print(a)

[ 0 -1  2 -1  4 -1  6 -1  8 -1]


In [109]:
''' 
    Convert a 1D arrat to a 2D arrat with 2 rows
'''
a.reshape((2,5))
a

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

In [110]:
'''
    Get the common items between a and b using intersect1d
    a = np.array([1,2,3,2,3,4,3,4,5,6])
    b = np.array([7,2,10,2,7,4,9,4,9,8])
'''
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
np.intersect1d(a,b)

array([2, 4])

In [112]:
'''
    From array a, remove all items present in array b using setfidd1d
    a = np.array([1,2,3,4,5])
    b = np.array([5,6,7,8,9])
'''
a1 = np.array([1,2,3,4,5])
b1 = np.array([5,6,7,8,9])
np.setdiff1d(a1,b1)


array([1, 2, 3, 4])

In [122]:
'''
    Get all items between 5 and 10 a
    a = np.array([2,6,1,9,10,3,27])
'''
a = np.array([2,6,1,9,10,3,27])
x = a[a<10]
y = a[a>5]
np.intersect1d(x,y)

a[(a>5) & (a=<10)]


SyntaxError: invalid syntax (4116503558.py, line 10)

In [124]:
'''
    Swap columns 1 and 2 in the array ar
    ar = np.arange(9).reshape(3,3)
'''
ar = np.arange(9).reshape(3,3)
print(ar)

ar[:,[0,2]] = ar [:,[2,0]]
ar

[[0 1 2]
 [3 4 5]
 [6 7 8]]


array([[2, 1, 0],
       [5, 4, 3],
       [8, 7, 6]])

In [125]:
ar[:,[1,0,2]]

array([[1, 2, 0],
       [4, 5, 3],
       [7, 8, 6]])

In [126]:
'''
    Reserse the rows of a 2D array ar above
'''
ar[::-1]

array([[8, 7, 6],
       [5, 4, 3],
       [2, 1, 0]])

In [127]:
'''
    Find the mean, median, standard deviation of iris's sepallenth (1st)
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
    iris = np.genfromtxt(url, delimiter = ',', dtype = 'object')
    sepallength = np.genfromtxt(url, delimiter = ',' , dtype = 'float', usecols = [0])
'''
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter = ',', dtype = 'object')
sepallength = np.genfromtxt(url, delimiter = ',' , dtype = 'float', usecols = [0])

sepallength


array([5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,
       4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,
       5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,
       5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. , 7. , 6.4,
       6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5. , 5.9, 6. , 6.1, 5.6,
       6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7,
       6. , 5.7, 5.5, 5.5, 5.8, 6. , 5.4, 6. , 6.7, 6.3, 5.6, 5.5, 5.5,
       6.1, 5.8, 5. , 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3,
       6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5,
       7.7, 7.7, 6. , 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2,
       7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6. , 6.9, 6.7, 6.9, 5.8,
       6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9])

In [131]:
print(np.mean(sepallength, axis = 0))
print(np.mean(sepallength))
print(np.median(sepallength))
print(np.std(sepallength))

5.843333333333334
5.843333333333334
5.8
0.8253012917851409


In [133]:
'''
    Compute the maximun fro each row in the given array
    a = np.random.randint(1,10, [5,3])
'''
a = np.random.randint(1,10, [5,3])
print(a)
np.max(a,axis = 1)

[[8 6 4]
 [6 6 3]
 [3 9 7]
 [2 8 7]
 [9 5 2]]


array([8, 6, 9, 8, 9])