# Numpy maths and stats

https://www.youtube.com/watch?v=RwFiNlL4Q8g

In [265]:
import numpy as np

In [266]:
np.random.seed(0)
A = np.random.randint(0, 10, (2, 3))
A

array([[5, 0, 3],
       [3, 7, 9]])

## Methods NDArray

https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html

In [267]:
A.sum(axis=0)

array([ 8,  7, 12])

In [268]:
A.min(axis=1)

array([0, 3])

In [269]:
A.argmin(axis=1)

array([1, 0])

In [270]:
print(A.argsort())
A

[[1 2 0]
 [0 1 2]]


array([[5, 0, 3],
       [3, 7, 9]])

In [271]:
A.sort()
A

array([[0, 3, 5],
       [3, 7, 9]])

## Maths functions

In [272]:
X = np.random.randn(2, 3)
X

array([[-1.42232584,  1.52006949, -0.29139398],
       [-0.13309028, -0.1730696 , -1.76165167]])

In [273]:
np.exp(X)

array([[0.24115248, 4.57254293, 0.74722123],
       [0.87538605, 0.84107908, 0.17176094]])

In [274]:
np.sin(X)

array([[-0.98899849,  0.99871367, -0.28728772],
       [-0.13269773, -0.17220689, -0.98184234]])

## Stat functions

In [275]:
A = np.random.randint(0, 10, (2, 3))
B = np.random.randint(0, 10, (3, 2))
print(A)
print(B)

[[7 7 8]
 [1 5 9]]
[[8 9]
 [4 3]
 [0 3]]


In [276]:
A.mean()

np.float64(6.166666666666667)

In [277]:
A.std()

np.float64(2.608745973749755)

In [278]:
# numpy array with lines correlations
np.corrcoef(A)

array([[1.       , 0.8660254],
       [0.8660254, 1.       ]])

In [279]:
A = np.random.randint(0, 10, (5, 5))
values, counts = np.unique(A, return_counts=True)
print(values)
print(counts)

[0 1 2 3 4 5 7 8 9]
[5 2 3 5 2 2 3 1 2]


In [280]:
counts.argsort()

array([7, 1, 4, 5, 8, 2, 6, 3, 0])

In [281]:
values[counts.argsort()]

array([8, 1, 4, 5, 9, 2, 7, 3, 0])

In [282]:
for v, c in zip(values[counts.argsort()], counts[counts.argsort()]):
    print(f"value {v} occurs {c} times")

value 8 occurs 1 times
value 1 occurs 2 times
value 4 occurs 2 times
value 5 occurs 2 times
value 9 occurs 2 times
value 2 occurs 3 times
value 7 occurs 3 times
value 3 occurs 5 times
value 0 occurs 5 times


## NaN corrections

In [283]:
A = np.random.randn(5, 5)
A[0, 2] = np.nan
A[4, 3] = np.nan
A

array([[-0.4157447 , -0.52451219,         nan, -0.22925063,  2.16171737],
       [-0.95693143,  0.06731083,  0.20649884, -0.45688133, -1.05997576],
       [ 0.61495732,  1.42966077, -0.21195226, -0.08033726,  0.40539778],
       [ 0.11860659,  1.25441407,  1.41910204, -0.74385608, -2.5174371 ],
       [-1.50709602,  1.14907613, -1.19357825,         nan,  1.50944508]])

In [284]:
A.mean()

np.float64(nan)

In [285]:
np.nanmean(A)

np.float64(0.019071034514388442)

In [286]:
np.nanstd(A)

np.float64(1.1010909310579768)

In [287]:
np.isnan(A)

array([[False, False,  True, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False,  True, False]])

In [288]:
np.isnan(A).sum()

np.int64(2)

In [289]:
np.isnan(A).sum()/A.size

np.float64(0.08)

In [290]:
A[np.isnan(A)] = 0
A

array([[-0.4157447 , -0.52451219,  0.        , -0.22925063,  2.16171737],
       [-0.95693143,  0.06731083,  0.20649884, -0.45688133, -1.05997576],
       [ 0.61495732,  1.42966077, -0.21195226, -0.08033726,  0.40539778],
       [ 0.11860659,  1.25441407,  1.41910204, -0.74385608, -2.5174371 ],
       [-1.50709602,  1.14907613, -1.19357825,  0.        ,  1.50944508]])

## Linear algebra

https://numpy.org/doc/stable/reference/routines.linalg.html

In [291]:
A = np.ones((3, 2))
B = np.ones((2, 3))
A

array([[1., 1.],
       [1., 1.],
       [1., 1.]])

In [292]:
B

array([[1., 1., 1.],
       [1., 1., 1.]])

In [293]:
A.T

array([[1., 1., 1.],
       [1., 1., 1.]])

In [294]:
A.dot(B)

array([[2., 2., 2.],
       [2., 2., 2.],
       [2., 2., 2.]])

In [295]:
B.dot(A)

array([[3., 3.],
       [3., 3.]])

In [296]:
A = np.random.randint(0, 10, (5, 5))
A

array([[0, 3, 2, 0, 7],
       [5, 9, 0, 2, 7],
       [2, 9, 2, 3, 3],
       [2, 3, 4, 1, 2],
       [9, 1, 4, 6, 8]])

In [297]:
np.linalg.det(A)

np.float64(5754.000000000004)

In [298]:
np.linalg.inv(A)

array([[-0.16996872,  0.19951338, -0.20403198,  0.18178658,  0.00521376],
       [-0.03232534,  0.05839416,  0.0552659 ,  0.04275287, -0.05422315],
       [ 0.02294056, -0.07907543, -0.01233924,  0.25460549, -0.00990615],
       [ 0.04483837, -0.25304136,  0.331943  , -0.32811957,  0.13972888],
       [ 0.15015641, -0.00243309, -0.02015989, -0.09106708,  0.02606882]])

In [299]:
np.linalg.pinv(A)

array([[-0.16996872,  0.19951338, -0.20403198,  0.18178658,  0.00521376],
       [-0.03232534,  0.05839416,  0.0552659 ,  0.04275287, -0.05422315],
       [ 0.02294056, -0.07907543, -0.01233924,  0.25460549, -0.00990615],
       [ 0.04483837, -0.25304136,  0.331943  , -0.32811957,  0.13972888],
       [ 0.15015641, -0.00243309, -0.02015989, -0.09106708,  0.02606882]])

In [300]:
np.linalg.eig(A)

EigResult(eigenvalues=array([20.14469141+0.j        ,  3.22584634+4.41845793j,
        3.22584634-4.41845793j, -4.45331931+0.j        ,
       -2.14306478+0.j        ]), eigenvectors=array([[-3.22851050e-01+0.j        , -1.80929134e-01+0.07228395j,
        -1.80929134e-01-0.07228395j,  8.22725040e-01+0.j        ,
         6.15295398e-01+0.j        ],
       [-5.49267575e-01+0.j        ,  1.32923284e-01+0.45179019j,
         1.32923284e-01-0.45179019j, -4.62266602e-04+0.j        ,
         2.55880273e-02+0.j        ],
       [-4.46640640e-01+0.j        ,  6.10019388e-01+0.j        ,
         6.10019388e-01-0.j        ,  6.93033753e-02+0.j        ,
         3.31928063e-01+0.j        ],
       [-2.72255802e-01+0.j        ,  3.31376032e-01-0.2305942j ,
         3.31376032e-01+0.2305942j , -1.53164460e-01+0.j        ,
        -6.51183964e-01+0.j        ],
       [-5.66092967e-01+0.j        , -3.60263117e-01-0.27451735j,
        -3.60263117e-01+0.27451735j, -5.43011037e-01+0.j        ,
     

## Exercice

Standardize matrix A on each column.
$$
  \frac{A - \bar{A}}{\sigma(A)}
$$
Result: each column must have average of 0 and stdev of 1

In [309]:
np.random.seed(0)
A = np.random.randint(0, 100, (10, 5))
A

array([[44, 47, 64, 67, 67],
       [ 9, 83, 21, 36, 87],
       [70, 88, 88, 12, 58],
       [65, 39, 87, 46, 88],
       [81, 37, 25, 77, 72],
       [ 9, 20, 80, 69, 79],
       [47, 64, 82, 99, 88],
       [49, 29, 19, 19, 14],
       [39, 32, 65,  9, 57],
       [32, 31, 74, 23, 35]])

In [310]:
def std_cols(A):
    A = A.astype(float, copy=True)
    _, cols = A.shape
    for c in range(cols):
        A[:,c] = (A[:,c] - A[:,c].mean()) / A[:,c].std()
    return A

In [311]:
B = std_cols(A)
B

array([[-0.02206157,  0.        ,  0.13173823,  0.72539252,  0.10755798],
       [-1.56637126,  1.61579632, -1.48676006, -0.33034307,  0.96802178],
       [ 1.12513992,  1.84021247,  1.03508612, -1.14768676, -0.27965074],
       [ 0.90452425, -0.35906585,  0.99744662,  0.0102168 ,  1.01104497],
       [ 1.6104944 , -0.44883231, -1.33620208,  1.0659524 ,  0.32267393],
       [-1.56637126, -1.21184724,  0.73397016,  0.7935045 ,  0.62383626],
       [ 0.11030784,  0.76301493,  0.80924915,  1.81518411,  1.01104497],
       [ 0.1985541 , -0.80789816, -1.56203905, -0.90929485, -2.17267111],
       [-0.24267724, -0.67324847,  0.16937773, -1.24985473, -0.32267393],
       [-0.55153918, -0.7181317 ,  0.50813319, -0.77307091, -1.26918412]])

In [312]:
for c in range(B.shape[1]):
    print(f"mean  column {c+1} = {B[:, c].mean():.2f}")
    print(f"stdev column {c+1} = {B[:, c].std():.2f}")

mean  column 1 = 0.00
stdev column 1 = 1.00
mean  column 2 = 0.00
stdev column 2 = 1.00
mean  column 3 = 0.00
stdev column 3 = 1.00
mean  column 4 = -0.00
stdev column 4 = 1.00
mean  column 5 = 0.00
stdev column 5 = 1.00
