# Numpy

file made with Yeongho Lee, UNIST

- **Numpy** is the basic package for data analysis with Python, specialized for computation of arrays.   
This package is also the basis of other popular packages including **Pandas**.



### Import Numpy



In [1]:
import numpy as np

### Basics

- Vectors

In [2]:
a = [1,2,3] # list
b = np.array([1,2,3]) #ndarry

print(a)
print(b)

[1, 2, 3]
[1 2 3]


In [3]:
print(a+a) #list
print(b+b) #ndarry

[1, 2, 3, 1, 2, 3]
[2 4 6]


In [6]:
print(np.array(a))

[1 2 3]


In [10]:
#indexing

print(b[0])
print(b[0:2])

1
[1 2]


In [11]:
# elementwise operation

b+b

array([2, 4, 6])

In [12]:
b*b

array([1, 4, 9])

In [13]:
b/b

array([1., 1., 1.])

In [14]:
2 * b

array([2, 4, 6])

In [15]:
b/5

array([0.2, 0.4, 0.6])

- Matrices

In [16]:
mat=np.array([[2,5,18,14,4], [12,15,1,2,8]])
print(mat.shape)
mat

(2, 5)


array([[ 2,  5, 18, 14,  4],
       [12, 15,  1,  2,  8]])

In [17]:
#indexing

print(mat[1,2])
print(mat[1][2])

1
1


In [18]:
print(mat[0,:])

[ 2  5 18 14  4]


In [19]:
print(mat[:,0])

[ 2 12]


In [21]:
print(mat[1,2:4])

[1 2]


In [22]:
print(mat[:,2:4])

[[18 14]
 [ 1  2]]


※ Numpy arrays are not restricted to 2-dimensional arrays.
We can construct n-dimensional arrays for any n:




In [23]:
mat2=np.array([[[2,5,18,14,4], [12,15,1,2,8]],[[3,5,8,12,7], [11,1,0,20,8]]])

In [24]:
print(mat2.shape)
print(mat2[0,1,2])
print(mat2[0][1][2])

(2, 2, 5)
1
1


In [25]:
x = np.random.rand(5,5)
print(x)
#each element is sampled from the uniform distribution [0,1)

[[0.98379025 0.30415399 0.54591397 0.42440842 0.84349714]
 [0.31419387 0.01389741 0.5089074  0.11465909 0.5562589 ]
 [0.09649336 0.77079406 0.25321957 0.56536862 0.38795085]
 [0.16034187 0.83480911 0.01138835 0.13741849 0.86002783]
 [0.71951478 0.35798526 0.20004904 0.16419783 0.68227342]]


In [26]:
x = np.random.randint(100,size=(2,3))
print(x)


[[ 4 73 34]
 [75 31 20]]


In [27]:
x = np.zeros((4,4))
print(x)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [28]:
x = np.ones((4,4))
print(x)

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]


In [29]:
x = np.eye(4) #identity matrix
print(x)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


In [30]:
x = np.identity(4)
print(x)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


In [31]:
#Be carefull!!!

Mat=np.array([[1,2,3],[4,5,6]])
B=Mat[:2,:2]
print(B)
B[0,0]=0
print(B)
print(Mat)

[[1 2]
 [4 5]]
[[0 2]
 [4 5]]
[[0 2 3]
 [4 5 6]]


- To avoid the above problem, use `numpy.copy()` function:

In [32]:
Mat=np.array([[1,2,3],[4,5,6]])
B=np.copy(Mat[:2,:2])
print(B)
B[0,0]=0
print(B)
print(Mat)

[[1 2]
 [4 5]]
[[0 2]
 [4 5]]
[[1 2 3]
 [4 5 6]]


In [39]:
x = np.random.rand(4,3)
print(x)

[[0.42109222 0.30238383 0.41650693]
 [0.40127891 0.69404208 0.10925494]
 [0.77489226 0.77350623 0.09217272]
 [0.32758971 0.45678771 0.94515075]]


In [40]:
x[1,2] = -5
print(x)

[[ 0.42109222  0.30238383  0.41650693]
 [ 0.40127891  0.69404208 -5.        ]
 [ 0.77489226  0.77350623  0.09217272]
 [ 0.32758971  0.45678771  0.94515075]]


In [41]:
x[0:2,:] += 1
print(x)

[[ 1.42109222  1.30238383  1.41650693]
 [ 1.40127891  1.69404208 -4.        ]
 [ 0.77489226  0.77350623  0.09217272]
 [ 0.32758971  0.45678771  0.94515075]]


In [42]:
x[2:4,1:3] = 0.5
print(x)

[[ 1.42109222  1.30238383  1.41650693]
 [ 1.40127891  1.69404208 -4.        ]
 [ 0.77489226  0.5         0.5       ]
 [ 0.32758971  0.5         0.5       ]]


In [43]:
x[x>0.5] = 0
print(x)

[[ 0.          0.          0.        ]
 [ 0.          0.         -4.        ]
 [ 0.          0.5         0.5       ]
 [ 0.32758971  0.5         0.5       ]]


In [44]:
x

array([[ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        , -4.        ],
       [ 0.        ,  0.5       ,  0.5       ],
       [ 0.32758971,  0.5       ,  0.5       ]])

In [45]:
x>0.1

array([[False, False, False],
       [False, False, False],
       [False,  True,  True],
       [ True,  True,  True]])

### Matrix multiplication

In [46]:
A=np.random.rand(4,3)

In [47]:
B=np.random.rand(3,3)

In [48]:
A.dot(B)

array([[0.8065716 , 1.10879063, 0.60721623],
       [1.06910569, 1.47231554, 0.70058694],
       [0.98490403, 1.10627967, 0.66155823],
       [1.55702219, 1.54561443, 0.60363363]])

In [49]:
np.dot(A,B)

array([[0.8065716 , 1.10879063, 0.60721623],
       [1.06910569, 1.47231554, 0.70058694],
       [0.98490403, 1.10627967, 0.66155823],
       [1.55702219, 1.54561443, 0.60363363]])

In [50]:
B.dot(A)

ValueError: ignored

Note !!

In [52]:
A

array([[0.40916945, 0.41923743, 0.4344068 ],
       [0.51951449, 0.31825296, 0.83641447],
       [0.21195169, 0.90888749, 0.09522719],
       [0.00620711, 0.92702272, 0.71050881]])

In [53]:
A.shape

(4, 3)

In [58]:
y = np.array([1,0,0])
print('y',y)
print('y shape' ,y.shape)
print(A.dot(y))

y [1 0 0]
y shape (3,)
[0.40916945 0.51951449 0.21195169 0.00620711]


https://stats.stackexchange.com/questions/284995/are-1-dimensional-numpy-arrays-equivalent-to-vectors

$\rightarrow$ Python automatically transposes the one-dimensional array with shape (n,) in a way that matrix multiplication can be conducted.

$\rightarrow$ Only for vectors with shape (n,), not vectors with shape (n,1) or (1,n).

In [63]:
y=np.array([[1,0,1]])

In [64]:
y

array([[1, 0, 1]])

In [65]:
y.shape

(1, 3)

In [67]:
#you can do also
y = np.array([1,0,1]).reshape((1,3))

In [68]:
y

array([[1, 0, 1]])

In [70]:
print(A.dot(y)) #  4 * 3 dot 1*3

ValueError: ignored

### Transpose

In [80]:
y.T

array([[1],
       [0],
       [1]])

In [82]:
print(A.dot(y.T))

[[0.84357625]
 [1.35592896]
 [0.30717888]
 [0.71671592]]


### inner products and outer products of vectors

In [83]:
#inner products

y = np.array([2,-1,3])
z = np.array([-1,2,2])
print(np.dot(y,z)) # y.dot(z)
print(np.dot(z,y))

2
2


In [84]:
#outer products yz^T

print(np.outer(y,z))

[[-2  4  4]
 [ 1 -2 -2]
 [-3  6  6]]


inner product : (1,3) * (3, 1) ====> (1,1)
\
outer product : (3,1) * (1,3)  ====> (3,3)

### Inverse

In [85]:
A=np.random.rand(4,4)
B=np.linalg.inv(A)
print(A)
print(B)
print(A.dot(B)) #inv function is not exact

[[0.36755292 0.5603907  0.02921297 0.03149512]
 [0.58972898 0.38897041 0.8895268  0.2795681 ]
 [0.32295055 0.0386788  0.65097642 0.67797246]
 [0.6842651  0.05879946 0.22520237 0.3583805 ]]
[[-0.36924095  0.35952247 -1.12545219  1.88108406]
 [ 2.02151673 -0.22418396  0.62676055 -1.18845643]
 [-0.94267739  1.48043203 -0.21304507 -0.66899047]
 [ 0.96569923 -1.57995084  2.17989737 -0.18589552]]
[[ 1.00000000e+00  2.86275763e-17 -1.15928692e-16  8.64047922e-17]
 [ 1.43374800e-16  1.00000000e+00  3.19720563e-17  3.45644480e-17]
 [ 2.17448782e-18 -4.02427788e-17  1.00000000e+00  4.36208537e-17]
 [ 3.87169872e-17  1.75520091e-17  2.77643327e-17  1.00000000e+00]]


In [86]:
#applied elementwise

np.round(A.dot(B),3)

array([[ 1.,  0., -0.,  0.],
       [ 0.,  1.,  0.,  0.],
       [ 0., -0.,  1.,  0.],
       [ 0.,  0.,  0.,  1.]])

In [None]:
np.round(3.5678,3)

3.568

In [92]:
A=[[1,4,9],[16,-4,25]]

In [93]:
np.sqrt(np.abs(A))

array([[1., 2., 3.],
       [4., 2., 5.]])

## Mean, Var, Std, Median, Sum, Cov

In [95]:
A = np.random.rand(5,3)
A

array([[0.03624033, 0.07173769, 0.23786729],
       [0.69574236, 0.27228309, 0.25734243],
       [0.26624089, 0.70098099, 0.356262  ],
       [0.49582468, 0.50823033, 0.86889093],
       [0.21941998, 0.02973815, 0.8218506 ]])

### np.mean()


In [99]:
print("Mean:", np.mean(A))
print("Mean along rows :", np.mean(A, axis=0)) #np.mean(A, 0)
print("Mean along columns :", np.mean(A, axis=1)) # np.mean(A,1)

Mean: 0.38924344886250706
Mean along rows : [0.34269365 0.31659405 0.50844265]
Mean along columns : [0.11528177 0.40845596 0.44116129 0.62431531 0.35700291]


### np.var()


- Sample Variance
$$
S^2 = {1\over N-1} \sum^n_{i=1}{(X_i - \bar X)}^2
$$
- `numpy.var()` however uses $N$ instead of $N-1$ in the denominator (It calculates the population variance instead of sample variance.)

In [100]:
print("Variance:", np.var(A))
print("Variance along rows :", np.var(A, axis=0)) #np.var(A,0)
print("Variance along columns :", np.var(A, axis=1)) #np.var(A,1)

Variance: 0.07250432686789875
Variance along rows : [0.05260951 0.06573644 0.07751376]
Variance along columns : [0.00772362 0.04130394 0.03510377 0.02993427 0.11403822]


### np.std()



In [101]:
print("Standard Deviation :", np.std(A))
print("Standard Deviation along rows  :", np.std(A, axis=0)) #np.std(A,0)
print("Standard Deviation along columns :", np.std(A, axis=1)) #np.std(A,1)

Standard Deviation : 0.26926627502882483
Standard Deviation along rows  : [0.22936764 0.25639118 0.27841293]
Standard Deviation along columns : [0.0878841  0.20323371 0.18736001 0.17301522 0.33769545]


### np.median()


- When $n$ is odd.
\
\
    np.median =
$$
Median(X) = {X_{\left[{{{n+1} \over 2}}\right]}}
$$

- When $n$ is even
\
\
    np.median =
$$
Median(X) = {1\over 2} \{{X_{\lfloor{{n+1\over 2}}\rfloor}}+X_{\lceil{{n+1\over 2}}\rceil}\}
$$

In [102]:
print("Median:", np.median(A))
print("Median along rows :", np.median(A, axis=0)) #np.median(A, 0)
print("Median along columns :", np.median(A, axis=1)) #np.median(A, 1)

Median: 0.2722830890554905
Median along rows : [0.26624089 0.27228309 0.356262  ]
Median along columns : [0.07173769 0.27228309 0.356262   0.50823033 0.21941998]


### np.sum()


In [103]:
print("Sum:", np.sum(A))
print("Sum along rows:", np.sum(A, axis=0)) #np.sum(A, 0)
print("Sum along columns :", np.sum(A, axis=1)) #np.sum(A, 1)

Sum: 5.838651732937606
Sum along rows: [1.71346823 1.58297025 2.54221325]
Sum along columns : [0.34584531 1.22536788 1.32348388 1.87294594 1.07100873]


### np.cov()


- Sample Covariance
$$
Cov(x,y) = S_{xy} = {1\over N-1} \sum^n_{i=1}{(X_i - \bar X)}{(Y_i - \bar Y)}
$$

$$
\begin{pmatrix} Cov(x,x) & Cov(x,y) \\ Cov(x,y) & Cov(y,y) \end{pmatrix}
$$

In [104]:
x = np.array([-2.1, -1,  4.3]) # N =3
y = np.array([3,  1.1,  0.12])

In [105]:
print("Covariance Matrix \n :", np.cov(x,y))
print("cov(x,y):",np.cov(x,y)[0][1])

Covariance Matrix 
 : [[11.71       -4.286     ]
 [-4.286       2.14413333]]
cov(x,y): -4.2860000000000005


In [106]:
np.var(x) # divides by N

7.806666666666666

In [107]:
np.cov(x,y)[0,0] #=var(x), #divides by N-1

11.709999999999999

In [108]:
np.var(x)*3/2

11.709999999999999

### Compute the mean, variance, and standard deviation of ( $\mathbf X$ : $N * 1$) as matrix operation

- Mean

$$\bar X = {1\over N} \sum^n_{i=1}X_i $$

$$
= {1\over N} \mathbf 1 \circ X = {1\over N} \mathbf 1^T  X
$$

$$
{1\over N} \begin{bmatrix} 1 &  \cdots &  1 \end{bmatrix}_{1*n} * \begin{bmatrix} X_1  \\ \vdots \\ X_n \end{bmatrix}_{n*1} = \bar X_{1*1}
$$

In [130]:
x = np.random.rand(4,1)

In [127]:
#complete the code
def MEAN_MADE(vector):
    N=  #length of the vector
    return ##

In [138]:
print("Mean calculated from numpy:", np.mean(x))
print("Mean calculated from MEAN_MADE:", MEAN_MADE(x).item())

Mean calculated from numpy: 0.8071299779251303
Mean calculated from MEAN_MADE: 0.8071299779251303


- Variance
    - VAR_MADE
$$
S^2 = {1\over N-1}{(X- \mathbf 1 \bar X)^T}{(X- \mathbf 1 \bar X)}
$$

In [117]:
x

array([[0.47197774],
       [0.04686109],
       [0.85618126],
       [0.03432622]])

In [118]:
#complete the code
def VAR_MADE(vector):
    mu = MEAN_MADE(vector).item()
    N = #lenth of the vector
    ##

    return ##

In [119]:
print("Variance function in numpy:", np.var(x)*len(x)/(len(x)-1))
print("Variance from VAR_MADE:", VAR_MADE(x).item())

Variance function in numpy: 0.15420644313976736
Variance from VAR_MADE: 0.15420644313976736


- Standard Deviation

$$Std(X) = \sqrt{Var(X)} = \sqrt{S^2} = S$$

In [123]:
def STD_MADE(vector):
    return np.sqrt(VAR_MADE(vector))

In [122]:
print("STD function in numpy:", np.std(x)*np.sqrt(len(x)/(len(x)-1)))
print("STD from STD_MADE:", STD_MADE(x).item())

STD function in numpy: 0.39269128223041483
STD from STD_MADE: 0.3926912822304149


- Covariance
    
$$
S_{xy} = {1\over N-1}∑{(X_i - \bar X)^T}{(Y_i - \bar Y)}
$$
- COVAR_MADE
$$
= {1\over N-1}{(X- \mathbf 1 \bar X)^T}{(Y- \mathbf 1 \bar Y)}
$$

In [124]:
def COVAR_MADE(vector1, vector2):
    mu1 = MEAN_MADE(vector1)[0,0]
    mu2 = MEAN_MADE(vector2)[0,0]
    N = len(vector1)

    temp1 = vector1 - mu1*np.ones((N,1))
    temp2 = vector2 - mu2*np.ones((N,1))

    return temp1.T.dot(temp2)/(N-1)

In [125]:
x = np.array([-2.1, -1,  4.3])
y = np.array([3,  1.1,  0.12])

In [126]:
print(np.cov(x,y))
print("Covariance of x and y from function in numpy:", np.cov(x,y)[0][1])

x=x.reshape((3,1))
y=y.reshape((3,1))

print("Covariance from COVAR_MADE:", COVAR_MADE(x,y).item())


[[11.71       -4.286     ]
 [-4.286       2.14413333]]
Covariance of x and y from function in numpy: -4.2860000000000005
Covariance from COVAR_MADE: -4.2860000000000005
