# Python for DataSciences

## Numpy

NumPy is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays (Wikipedia).

### Numpy table creation

In [1]:
# Import of numpy
import numpy as np

# 1D array
arr = np.array([1,2,3])
arr

array([1, 2, 3])

In [2]:
# Linspace function
np.linspace(0, 50, 51)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
       26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
       39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.])

In [3]:
# Arrange function
np.arange(0,51)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50])

In [4]:
# 2D array
arr_bis = np.array([[1,2,3],
                    [4,5,6],
                    [7,8,9]])
arr_bis

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [5]:
# Null matrix
O = np.zeros([3,5])
O

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [6]:
# Identity matrix
I = np.identity(5)
I

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [7]:
# Ones matrix
One = np.ones([3,5])
One

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [8]:
# Add two matrix
O + One

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [9]:
# Soubstract two matrix
One-O

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [10]:
# Scalar multiplication
One * 10

array([[10., 10., 10., 10., 10.],
       [10., 10., 10., 10., 10.],
       [10., 10., 10., 10., 10.]])

### Random

In [11]:
# Random 1d array in the value 0 and 1 (Ideal to simulate a random weight of a wallet for example)
ran = np.random.rand(6)
ran

array([0.61620436, 0.07933041, 0.87352904, 0.18087285, 0.81802255,
       0.80004765])

In [12]:
# BONUS: How to find the shape of a array and transorfm 1d array to 2d array
print(f"Shape is {np.shape(ran)}")

# Reshape
ran = ran.reshape(-1,1)

# New shape
print(f"Shape is {np.shape(ran)}")

Shape is (6,)
Shape is (6, 1)


In [13]:
# Random integer value
arr_int = np.random.randint(100, size=(5,5))
arr_int

array([[37, 48, 73, 57,  3],
       [33, 31, 82, 58, 11],
       [86, 77, 49, 95, 93],
       [36, 27, 87, 95, 84],
       [74, 85, 21, 44,  5]])

In [14]:
# Normal 1 Dim
np.random.normal(loc=0, scale=0.1, size=(6,))

array([ 0.05100083,  0.13540906,  0.15666101,  0.10734351, -0.01752821,
        0.11367179])

In [15]:
# Normal 2 Dim
np.random.normal(loc=0, scale=0.1, size=(6,6))

array([[ 0.10419204, -0.30770845, -0.07798614, -0.04037322,  0.00486764,
         0.16631122],
       [-0.01906316,  0.12872758, -0.05080886, -0.0607633 , -0.0698635 ,
        -0.08973872],
       [ 0.08961122, -0.11681776, -0.05573409, -0.02610501,  0.06697496,
        -0.15358976],
       [-0.06868737,  0.11412479, -0.16283287, -0.2331318 , -0.0555314 ,
        -0.11661412],
       [ 0.04285877,  0.04794433,  0.10398765,  0.05105134, -0.09226026,
        -0.14227867],
       [-0.01922161,  0.10388461,  0.1449929 ,  0.00695149, -0.11267013,
        -0.00115003]])

In [16]:
# Set the seed
print(np.random.rand(3))
print(np.random.rand(3))

np.random.seed(seed=56)
print(np.random.rand(3))

np.random.seed(seed=56)
print(np.random.rand(3))

np.random.seed(seed=56)
print(np.random.rand(3))

[0.75719926 0.51670196 0.04477556]
[0.51084823 0.29356571 0.57075375]
[0.98419185 0.33341227 0.67370162]
[0.98419185 0.33341227 0.67370162]
[0.98419185 0.33341227 0.67370162]


### Indexing Slicing Transformation

In [17]:
# Choose one value in a matrix
arr_bis = np.array([[1,2,3],
                    [7,1,6],
                    [9,6,3]])
arr_bis[0][0]

np.int64(1)

In [18]:
arr_bis[0,0]

np.int64(1)

In [19]:
# Choose sub matrix in the matrix
arr_bis[0:2,0:1]

array([[1],
       [7]])

In [20]:
# Choose one columns or one row
print(arr_bis[1,:])
print(arr_bis[:,1])

[7 1 6]
[2 1 6]


In [21]:
# Max
# On the matrix
print(arr_bis.max())

# By the rows
print(arr_bis.max(axis=1))

# By the columns
print(arr_bis.max(axis=0))

9
[3 7 9]
[9 6 6]


In [22]:
# Min
# On the matrix
print(arr_bis.min())

# By the rows
print(arr_bis.min(axis=1))

# By the columns
print(arr_bis.min(axis=0))

1
[1 1 3]
[1 1 3]


In [23]:
# Mean
# On the matrix
print(arr_bis.mean())

# By the rows
print(arr_bis.mean(axis=1))

# By the columns
print(arr_bis.mean(axis=0))

4.222222222222222
[2.         4.66666667 6.        ]
[5.66666667 3.         4.        ]


In [24]:
# Std
# On the matrix
print(arr_bis.std())

# By the rows
print(arr_bis.std(axis=1))

# By the columns
print(arr_bis.std(axis=0))

2.698879511442471
[0.81649658 2.62466929 2.44948974]
[3.39934634 2.1602469  1.41421356]


In [25]:
# Other way available 
np.std(arr_bis, axis=0)

array([3.39934634, 2.1602469 , 1.41421356])

In [26]:
# Log function 
np.log(arr_bis)

array([[0.        , 0.69314718, 1.09861229],
       [1.94591015, 0.        , 1.79175947],
       [2.19722458, 1.79175947, 1.09861229]])

In [27]:
# Exponential function 
np.exp(arr_bis)

array([[2.71828183e+00, 7.38905610e+00, 2.00855369e+01],
       [1.09663316e+03, 2.71828183e+00, 4.03428793e+02],
       [8.10308393e+03, 4.03428793e+02, 2.00855369e+01]])

In [28]:
# Squared root function 
np.sqrt(arr_bis)

array([[1.        , 1.41421356, 1.73205081],
       [2.64575131, 1.        , 2.44948974],
       [3.        , 2.44948974, 1.73205081]])

In [29]:
# Concatenate 
# Params: tuple of arrays and the axis of the concatenation
# Concat 1 dimension
arr1 = arr_bis[:,1]
arr2 = arr_bis[:,2]

print(f"ARR1 {arr1}")
print(f"ARR2 {arr2}")

print(np.concatenate((arr1, arr2), axis=0))

ARR1 [2 1 6]
ARR2 [3 6 3]
[2 1 6 3 6 3]


In [30]:
# Concat 2 dimension
arr1 = arr1.reshape(-1,1)
arr2 = arr2.reshape(-1,1)

print(f"ARR1 {arr1}")
print(f"ARR2 {arr2}")

print(np.concatenate((arr1, arr2), axis=0))

ARR1 [[2]
 [1]
 [6]]
ARR2 [[3]
 [6]
 [3]]
[[2]
 [1]
 [6]
 [3]
 [6]
 [3]]


In [31]:
# Concat 2 dimension
print(np.concatenate((arr1, arr2), axis=1))

[[2 3]
 [1 6]
 [6 3]]
