# Numpy

file made with Yeongho Lee, UNIST

- **Numpy** is the basic package for data analysis with Python, specialized for computation of arrays.   
This package is also the basis of other popular packages including **Pandas**.



### Import Numpy



In [1]:
import numpy as np

### Basics

- Vectors

In [2]:
a = [1,2,3] # list
b = np.array([1,2,3]) #ndarry

print(a)
print(b)

[1, 2, 3]
[1 2 3]


In [3]:
np.array(a)

array([1, 2, 3])

In [4]:
b

array([1, 2, 3])

In [5]:
print([1,2,3]+[4,5,6]) #list 
print(np.array([1,2,3])+np.array([4,5,6])) #ndarry 

[1, 2, 3, 4, 5, 6]
[5 7 9]


In [6]:
#indexing

print(b[0])
print(b[0:2])

1
[1 2]


In [7]:
b+b

array([2, 4, 6])

In [8]:
b*b # this is not matrix multiplication

array([1, 4, 9])

In [9]:
b/b

array([1., 1., 1.])

In [10]:
2 * b

array([2, 4, 6])

In [11]:
b/5

array([0.2, 0.4, 0.6])

In [12]:
print(type(b))
print(b.shape)

<class 'numpy.ndarray'>
(3,)


- Matrices

In [13]:
mat=np.array([[2,5,18,14,4], [12,15,1,2,8]])
mat

array([[ 2,  5, 18, 14,  4],
       [12, 15,  1,  2,  8]])

In [14]:
#indexing

print(mat[1,2]) # Element in 2nd row, 3rd column
print(mat[1][2])

1
1


In [15]:
print(mat[0,:]) 

[ 2  5 18 14  4]


In [16]:
print(mat[:,0]) 

[ 2 12]


In [17]:
print(mat[1,2:4])

[1 2]


In [18]:
print(mat[:,2:4])

[[18 14]
 [ 1  2]]


In [19]:
print(mat.shape) 

(2, 5)


※ Numpy arrays are not restricted to 2-dimensional arrays.
We can construct n-dimensional arrays for any n:




In [20]:
mat2=np.array([[[2,5,18,14,4], [12,15,1,2,8]],[[3,5,8,12,7], [11,1,0,20,8]]])

In [21]:
print(mat2[0,1,2])
print(mat2[0][1][2])
print(mat2.shape)

1
1
(2, 2, 5)


In [22]:
x = np.random.rand(5,5) 
print(x)
#each element is sampled from the uniform distribution [0,1)

[[0.60496716 0.44619995 0.66985679 0.50524558 0.80889479]
 [0.31046278 0.36515741 0.79323181 0.2703596  0.52142266]
 [0.08600509 0.04025616 0.66377514 0.85856036 0.37413589]
 [0.49233057 0.18351347 0.47944002 0.34160531 0.72064294]
 [0.33846223 0.86728571 0.9094446  0.6401719  0.57712836]]


In [23]:
x = np.random.randint(100,size=(2,3))
print(x)


[[89 99 35]
 [35 49 97]]


In [24]:
x = np.zeros((4,4))
print(x)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [25]:
x = np.ones((4,4))
print(x)

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]


In [26]:
x = np.eye(4)
print(x)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


In [27]:
x = np.identity(4)
print(x)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


In [28]:
#Be carefull!!!

Mat=np.array([[1,2,3],[4,5,6]])
B=Mat[:2,:2]
print(B)
B[0,0]=0
print(B)
print(Mat)

[[1 2]
 [4 5]]
[[0 2]
 [4 5]]
[[0 2 3]
 [4 5 6]]


- To avoid the above problem, use `numpy.copy()` function:

In [29]:
Mat=np.array([[1,2,3],[4,5,6]])
B=np.copy(Mat[:2,:2])
print(B)
B[0,0]=0
print(B)
print(Mat)

[[1 2]
 [4 5]]
[[0 2]
 [4 5]]
[[1 2 3]
 [4 5 6]]


In [30]:
x = np.random.rand(4,3)
print(x)

[[0.36521673 0.02395318 0.19710769]
 [0.17489704 0.52619128 0.71526798]
 [0.09479927 0.93559598 0.92462854]
 [0.55957248 0.49121886 0.5625831 ]]


In [31]:
x[1,2] = -5 
print(x)

[[ 0.36521673  0.02395318  0.19710769]
 [ 0.17489704  0.52619128 -5.        ]
 [ 0.09479927  0.93559598  0.92462854]
 [ 0.55957248  0.49121886  0.5625831 ]]


In [32]:
x[0:2,:] += 1 
print(x)

[[ 1.36521673  1.02395318  1.19710769]
 [ 1.17489704  1.52619128 -4.        ]
 [ 0.09479927  0.93559598  0.92462854]
 [ 0.55957248  0.49121886  0.5625831 ]]


In [33]:
x[2:4,1:3] = 0.5 
print(x)

[[ 1.36521673  1.02395318  1.19710769]
 [ 1.17489704  1.52619128 -4.        ]
 [ 0.09479927  0.5         0.5       ]
 [ 0.55957248  0.5         0.5       ]]


In [34]:
x[x>0.5] = 0
print(x)

[[ 0.          0.          0.        ]
 [ 0.          0.         -4.        ]
 [ 0.09479927  0.5         0.5       ]
 [ 0.          0.5         0.5       ]]


In [35]:
x

array([[ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        , -4.        ],
       [ 0.09479927,  0.5       ,  0.5       ],
       [ 0.        ,  0.5       ,  0.5       ]])

In [36]:
x>0.1

array([[False, False, False],
       [False, False, False],
       [False,  True,  True],
       [False,  True,  True]])

In [37]:
print(2*x+1)

[[ 1.          1.          1.        ]
 [ 1.          1.         -7.        ]
 [ 1.18959854  2.          2.        ]
 [ 1.          2.          2.        ]]


### Matrix multiplication

In [38]:
A=np.random.rand(4,3)

In [39]:
B=np.random.rand(3,3)

In [40]:
A.dot(B) 

array([[0.72065931, 0.9179208 , 0.65062198],
       [0.87203606, 1.1602384 , 1.05004485],
       [0.21480967, 0.20634608, 0.43661844],
       [0.36646117, 0.50133148, 0.18112942]])

In [41]:
np.dot(A,B)

array([[0.72065931, 0.9179208 , 0.65062198],
       [0.87203606, 1.1602384 , 1.05004485],
       [0.21480967, 0.20634608, 0.43661844],
       [0.36646117, 0.50133148, 0.18112942]])

In [42]:
B.dot(A)

ValueError: ignored

Note !!

In [43]:
A

array([[0.99545884, 0.62196653, 0.34127708],
       [0.91411571, 0.7401018 , 0.95939433],
       [0.09005369, 0.47504404, 0.23535741],
       [0.63747538, 0.15606845, 0.06833003]])

In [44]:
A.shape

(4, 3)

In [45]:
y = np.array([1,0,0]) # 3 * 1
print(A.dot(y))

[0.99545884 0.91411571 0.09005369 0.63747538]


In [46]:
y = np.array([1,0,1,0]) #  1 * 4
print(y.dot(A))

[1.08551253 1.09701057 0.5766345 ]


In [47]:
y.shape

(4,)

https://stats.stackexchange.com/questions/284995/are-1-dimensional-numpy-arrays-equivalent-to-vectors

$\rightarrow$ Python automatically transposes the one-dimensional array with shape (n,) in a way that matrix multiplication can be conducted.

$\rightarrow$ Only for vectors with shape (n,), not vectors with shape (n,1) or (1,n).

In [48]:
y=np.array([[1],[0],[1],[0]])

In [49]:
y

array([[1],
       [0],
       [1],
       [0]])

In [50]:
y.shape

(4, 1)

In [51]:
#you can do also 
y = np.array([1,0,1,0]).reshape((4,1))

In [52]:
y

array([[1],
       [0],
       [1],
       [0]])

In [53]:
print(y.dot(A)) # 4 * 1   4 * 3

ValueError: ignored

### Transpose

In [55]:
print(y.T.dot(A))

[[1.08551253 1.09701057 0.5766345 ]]


$$
A_{ij}^T = A_{ji}
$$

In [56]:
A.T

array([[0.99545884, 0.91411571, 0.09005369, 0.63747538],
       [0.62196653, 0.7401018 , 0.47504404, 0.15606845],
       [0.34127708, 0.95939433, 0.23535741, 0.06833003]])

In [57]:
A

array([[0.99545884, 0.62196653, 0.34127708],
       [0.91411571, 0.7401018 , 0.95939433],
       [0.09005369, 0.47504404, 0.23535741],
       [0.63747538, 0.15606845, 0.06833003]])

### inner products and outer products of vectors

In [58]:
#inner products 

y = np.array([2,-1,3])
z = np.array([-1,2,2])
print(np.dot(y,z)) # y.dot(z)
print(np.dot(z,y))

2
2


In [59]:
#outer products

print(np.outer(y,z))

[[-2  4  4]
 [ 1 -2 -2]
 [-3  6  6]]


inner product : (1,3) * (3, 1) ====> (1,1)
\
outer product : (3,1) * (1,3)  ====> (3,3)

### Inverse

In [60]:
x=np.random.rand(4,4)
y=np.linalg.inv(x)
print(x)
print(y)
print(x.dot(y))

[[0.29357961 0.00562349 0.10440281 0.98849825]
 [0.45967604 0.59238833 0.10531662 0.09272088]
 [0.14479512 0.6208668  0.34070607 0.32883976]
 [0.03770905 0.01365625 0.80352545 0.02177459]]
[[ 0.55915642  2.53443324 -2.4370707   0.62851694]
 [-0.56031368 -0.14533849  1.76887534 -0.65817666]
 [-0.03983246 -0.09637013  0.06515281  1.23469648]
 [ 0.85296318 -0.74171027  0.70685495 -0.31332811]]
[[ 1.00000000e+00 -1.50463039e-16 -9.62719074e-17 -3.73911783e-17]
 [ 6.82986657e-17  1.00000000e+00 -1.44745656e-16 -5.17533217e-18]
 [-4.71465716e-17 -3.56913635e-17  1.00000000e+00  4.33779510e-17]
 [-1.61550027e-18 -1.11271479e-17 -1.12928388e-18  1.00000000e+00]]


In [61]:
np.round(x.dot(y),3)

array([[ 1., -0., -0., -0.],
       [ 0.,  1., -0., -0.],
       [-0., -0.,  1.,  0.],
       [-0., -0., -0.,  1.]])

In [62]:
np.round(3.5678,3)

3.568

In [63]:
print(x)

[[0.29357961 0.00562349 0.10440281 0.98849825]
 [0.45967604 0.59238833 0.10531662 0.09272088]
 [0.14479512 0.6208668  0.34070607 0.32883976]
 [0.03770905 0.01365625 0.80352545 0.02177459]]


In [64]:
# some functions such as np.round() are just applied elementwise

np.sqrt(np.abs(x))

array([[0.54182987, 0.07498993, 0.32311424, 0.99423249],
       [0.67799413, 0.76966768, 0.32452522, 0.30450104],
       [0.38051953, 0.78795101, 0.58370033, 0.57344551],
       [0.19418818, 0.11685996, 0.89639581, 0.14756216]])

## Mean, Var, Std, Median, Sum, Cov

In [65]:
x = np.random.rand(5,3)
x

array([[0.90166499, 0.17819402, 0.45183003],
       [0.19176103, 0.53778219, 0.77353736],
       [0.45218809, 0.95938318, 0.2254742 ],
       [0.70811951, 0.68988866, 0.65523565],
       [0.91100667, 0.98448048, 0.98697204]])

### np.mean()


In [67]:
#print("Two Dimension array :\n", x)
print("Mean with no axis :", np.mean(x))
print("Mean with axis along column :", np.mean(x, axis=0)) #np.mean(x, 0)
print("Mean with axis along row :", np.mean(x, axis=1)) # np.mean(x,1)

Mean with no axis : 0.6405012063939247
Mean with axis along column : [0.63294806 0.66994571 0.61860985]
Mean with axis along row : [0.51056301 0.50102686 0.54568183 0.68441461 0.96081973]


### np.var()


- Sample Variance
$$
S^2 = {1\over N-1} \sum^n_{i=1}{(X_i - \bar X)}^2
$$
- `numpy.var()` however uses $N$ instead of $N-1$ in the denominator (It calculates the population variance instead of sample variance.)

In [68]:
print("Two Dimension array\n :", x)
print("Variance with no axis :", np.var(x))
print("Variance with axis along column :", np.var(x, axis=0)) #np.var(x,0)
print("Variance with axis along row :", np.var(x, axis=1)) #np.var(x,1)

Two Dimension array
 : [[0.90166499 0.17819402 0.45183003]
 [0.19176103 0.53778219 0.77353736]
 [0.45218809 0.95938318 0.2254742 ]
 [0.70811951 0.68988866 0.65523565]
 [0.91100667 0.98448048 0.98697204]]
Variance with no axis : 0.07835394890118282
Variance with axis along column : [0.07649926 0.08847816 0.06868117]
Variance with axis along row : [0.08895982 0.05708609 0.09414094 0.0004811  0.00124171]


### np.std()

 

In [69]:
print("Two Dimension array\n :", x)
print("Standard Deviation with no axis :", np.std(x))
print("Standard Deviation with axis along column :", np.std(x, axis=0)) #np.std(x,0)
print("Standard Deviation with axis along row :", np.std(x, axis=1)) #np.std(x,1)

Two Dimension array
 : [[0.90166499 0.17819402 0.45183003]
 [0.19176103 0.53778219 0.77353736]
 [0.45218809 0.95938318 0.2254742 ]
 [0.70811951 0.68988866 0.65523565]
 [0.91100667 0.98448048 0.98697204]]
Standard Deviation with no axis : 0.2799177538156214
Standard Deviation with axis along column : [0.27658499 0.29745279 0.26207092]
Standard Deviation with axis along row : [0.29826133 0.23892696 0.30682395 0.02193399 0.03523784]


### np.median()


- When $n$ is odd.
\
\
    np.median = 
$$
Median(X) = {X_{\left[{{{n+1} \over 2}}\right]}}
$$

- When $n$ is even
\
\
    np.median = 
$$
Median(X) = {1\over 2} \{{X_{\lfloor{{n+1\over 2}}\rfloor}}+X_{\lceil{{n+1\over 2}}\rceil}\}
$$

In [70]:
print("Two Dimension array\n :", x)
print("Median with no axis :", np.median(x))
print("Median with axis along column :", np.median(x, axis=0)) #np.median(x, 0)
print("Median with axis along row :", np.median(x, axis=1)) #np.median(x, 1)

Two Dimension array
 : [[0.90166499 0.17819402 0.45183003]
 [0.19176103 0.53778219 0.77353736]
 [0.45218809 0.95938318 0.2254742 ]
 [0.70811951 0.68988866 0.65523565]
 [0.91100667 0.98448048 0.98697204]]
Median with no axis : 0.6898886611505486
Median with axis along column : [0.70811951 0.68988866 0.65523565]
Median with axis along row : [0.45183003 0.53778219 0.45218809 0.68988866 0.98448048]


### np.sum()


In [71]:
print("Two Dimension array\n :", x)
print("Sum with no axis :", np.sum(x))
print("Sum with axis along column :", np.sum(x, axis=0)) #np.sum(x, 0)
print("Sum with axis along row :", np.sum(x, axis=1)) #np.sum(x, 1)

Two Dimension array
 : [[0.90166499 0.17819402 0.45183003]
 [0.19176103 0.53778219 0.77353736]
 [0.45218809 0.95938318 0.2254742 ]
 [0.70811951 0.68988866 0.65523565]
 [0.91100667 0.98448048 0.98697204]]
Sum with no axis : 9.60751809590887
Sum with axis along column : [3.16474029 3.34972853 3.09304927]
Sum with axis along row : [1.53168903 1.50308057 1.63704548 2.05324382 2.88245919]


### np.cov()


- Sample Covariance
$$
Cov(x,y) = S_{xy} = {1\over N-1} \sum^n_{i=1}{(X_i - \bar X)}{(Y_i - \bar Y)}
$$

$$
\begin{pmatrix} Cov(x,x) & Cov(x,y) \\ Cov(x,y) & Cov(y,y) \end{pmatrix}
$$

In [72]:
x = np.array([-2.1, -1,  4.3]) # N =3
y = np.array([3,  1.1,  0.12])

In [73]:
#print("Two 1- Dimension array\n :", x,y)
print("Covariance Matrix of 1-Dimension array\n :", np.cov(x,y))
print("Covariance of x and y is:",np.cov(x,y)[0][1])

Covariance Matrix of 1-Dimension array
 : [[11.71       -4.286     ]
 [-4.286       2.14413333]]
Real Covariance of x and y is: -4.2860000000000005


In [74]:
np.var(x) # divides by N

7.806666666666666

In [75]:
np.cov(x,y)[0,0] #divides by N-1

11.709999999999999

In [76]:
np.var(x)*3/2

11.709999999999999

### Compute the mean, variance, and standard deviation of ( $\mathbf X$ : $N * 1$) as matrix operation

- Mean

$$\bar X = {1\over N} \sum^n_{i=1}X_i $$

$$
= {1\over N} \mathbf 1 \circ X = {1\over N} \mathbf 1^T  X
$$

$$
{1\over N} \begin{bmatrix} 1 &  \cdots &  1 \end{bmatrix}_{1*n} * \begin{bmatrix} X_1  \\ \vdots \\ X_n \end{bmatrix}_{n*1} = \bar X_{1*1}
$$

In [77]:
temp1 = np.random.rand(4,1) # X
temp1

array([[0.40547964],
       [0.80990644],
       [0.95422128],
       [0.70262176]])

In [80]:
def MEAN_MADE(vector):
    N=vector.shape[0]  # can also do N=len(vector)
    return vector.T.dot(np.ones((N,1)))/N

In [81]:
print("Mean function in numpy:", np.mean(temp1))
print("Mean calculated by made function:", MEAN_MADE(temp1))

Mean function in numpy: 0.7180572805940588
Mean calculated by made function: [[0.71805728]]


In [82]:
MEAN_MADE(temp1).item()

0.7180572805940588

- Variance
    - VAR_MADE
$$
S^2 = {1\over N-1}{(X- \mathbf 1 \bar X)^T}{(X- \mathbf 1 \bar X)}
$$

In [83]:
temp1 # X

array([[0.40547964],
       [0.80990644],
       [0.95422128],
       [0.70262176]])

In [84]:
def VAR_MADE(vector):
    mu = MEAN_MADE(vector).item()
    N = vector.shape[0]
    temp = vector - mu * np.ones((N,1))
    
    return temp.T.dot(temp)/(N-1)

- VAR_MADE2

$$
= {1\over N-1}{(\mathbf I X- {1\over N} \mathbf 1 \mathbf 1^T X)^T}{(\mathbf I X- {1\over N} \mathbf 1 \mathbf 1^T X)}
$$

$$
= {1\over N-1}{X^T(\mathbf I - {1\over N} \mathbf 1 \mathbf 1^T )^T}{(\mathbf I - {1\over N} \mathbf 1 \mathbf 1^T )X}
$$


In [85]:
def VAR_MADE2(vector):
    N = vector.shape[0]
    
    temp = np.eye(N)
    one_vec = np.ones((N,1))
    
    
    temp = temp - one_vec.dot(one_vec.T)/N
    
    return vector.T.dot(temp.T).dot(temp).dot(vector)/(N-1)

- VAR_MADE3
$$
= {1\over N-1}{X^T(\mathbf I - {1\over N} \mathbf 1 \mathbf 1^T )}X
$$

In [86]:
def VAR_MADE3(vector):
    N = vector.shape[0]
    
    temp = np.eye(N)
    one_vec = np.ones((N,1))
    
    temp = temp - one_vec.dot(one_vec.T)/N
    
    return vector.T.dot(temp).dot(vector)/(N-1)

In [87]:
print("Variance function in numpy:", np.var(temp1)*len(temp1)/(len(temp1)-1))
print("Variance calculated by made function1:", VAR_MADE(temp1))
print("Variance calculated by made function2:", VAR_MADE2(temp1))
print("Variance calculated by made function3:", VAR_MADE3(temp1))

Variance function in numpy: 0.054050914803421064
Variance calculated by made function1: [[0.05405091]]
Variance calculated by made function2: [[0.05405091]]
Variance calculated by made function3: [[0.05405091]]


In [88]:
VAR_MADE(temp1).item()

0.054050914803421064

In [89]:
VAR_MADE2(temp1).item()

0.05405091480342106

In [90]:
VAR_MADE3(temp1).item()

0.054050914803421064

- Standard Deviation

$$Std(X) = \sqrt{Var(X)} = \sqrt{S^2} = S$$

In [95]:
def STD_MADE(vector):
    return np.sqrt(VAR_MADE(vector))

In [96]:
print("Using function in numpy:", np.std(temp1)*np.sqrt(len(temp1)/(len(temp1)-1)))
print("Using STD_MADE:", STD_MADE(temp1))

Variance function in numpy: 0.23248852617585467
Variance calculated by made function: [[0.23248853]]


In [98]:
STD_MADE(temp1).item()

0.23248852617585467

- Covariance
    
$$
S_{xy} = {1\over N-1}∑{(X_i - \bar X)^T}{(Y_i - \bar Y)}
$$
- COVAR_MADE1
$$
= {1\over N-1}{(X- \mathbf 1 \bar X)^T}{(Y- \mathbf 1 \bar Y)}
$$
    
$$
= {1\over N-1}{(\mathbf I X- {1\over N} \mathbf 1 \mathbf 1^T X)^T}{(\mathbf I Y- {1\over N} \mathbf 1 \mathbf 1^T Y)}
$$
- COVAR_MADE2
$$
= {1\over N-1}{X^T(\mathbf I - {1\over N} \mathbf 1 \mathbf 1^T )^T}{(\mathbf I - {1\over N} \mathbf 1 \mathbf 1^T )Y}
$$
- COVAR_MADE3
$$
= {1\over N-1}{X^T(\mathbf I - {1\over N} \mathbf 1 \mathbf 1^T )}Y
$$

In [99]:
def COVAR_MADE(vector1, vector2):
    mu1 = MEAN_MADE(vector1)[0,0]
    mu2 = MEAN_MADE(vector2)[0,0]
    N = len(vector1)
    
    temp1 = vector1 - mu1*np.ones((N,1))
    temp2 = vector2 - mu2*np.ones((N,1))
    
    return temp1.T.dot(temp2)/(N-1)

In [100]:
def COVAR_MADE2(vector1, vector2):
    
    N = len(vector1)
    temp = np.eye(N)
    one_vec = np.ones((N,1))
    
    temp = temp - one_vec.dot(one_vec.T)/N
    
    return vector1.T.dot(temp.T).dot(temp).dot(vector2)/(N-1)

In [101]:
def COVAR_MADE3(vector1, vector2):
    N = len(vector1)
    
    temp = np.eye(N)
    one_vec = np.ones((N,1))
    
    temp = temp - one_vec.dot(one_vec.T)/N
    
    return vector1.T.dot(temp).dot(vector2)/(N-1)

In [102]:
x = np.array([-2.1, -1,  4.3])
y = np.array([3,  1.1,  0.12])

In [103]:
print(np.cov(x,y))
print("Covariance of x and y calculated by function in numpy:", np.cov(x,y)[0][1])

x=x.reshape((3,1))
y=y.reshape((3,1))

print("Covariance calculated by made function1:", COVAR_MADE(x,y))
print("Covariance calculated by made function2:", COVAR_MADE2(x,y))
print("Covariance calculated by made function3:", COVAR_MADE3(x,y))

[[11.71       -4.286     ]
 [-4.286       2.14413333]]
Covariance of x and y calculated by function in numpy: -4.2860000000000005
Covariance calculated by made function1: [[-4.286]]
Covariance calculated by made function2: [[-4.286]]
Covariance calculated by made function3: [[-4.286]]


In [104]:
COVAR_MADE(x,y).item()

-4.2860000000000005

In [105]:
COVAR_MADE2(x,y).item()

-4.286

In [106]:
COVAR_MADE3(x,y).item()

-4.286