<a href="https://colab.research.google.com/github/thihanaung-thnn/notes_Python/blob/main/notes_jose_portilla_lectures/machine_learning_with_python/01_numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Numpy**
- Introduction
- Indexing and selection
- Operations

In [1]:
import numpy as np

## Introduction

### Why use Numpy array instead of a list?
- Memory efficiency
- Easily expands to N-dimensional objects
- speed of calculations of numpy array
- broadcasting operations and functions with numpy

In [2]:
my_list = [1,2,3]
my_array = np.array([1,2,3])
print(type(my_list), type(my_array))

<class 'list'> <class 'numpy.ndarray'>


In [3]:
# creating numpy arrays from objects
# from python list
np.array(my_list)

array([1, 2, 3])

In [4]:
my_matrix = [[1,2,3],[4,5,6],[7,8,9]]
my_matrix

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [5]:
np.array(my_matrix)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### Methods to create arrays
- arange
- zeros and ones
- linspace
- eye
- rand
- randn
- randint
- seed


In [6]:
# arange
np.arange(0,10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [7]:
np.arange(0,11,2)

array([ 0,  2,  4,  6,  8, 10])

In [8]:
# zeros and ones
np.zeros(3)

array([0., 0., 0.])

In [10]:
np.zeros((3,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [11]:
np.ones(3)

array([1., 1., 1.])

In [12]:
np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [14]:
# linspace
np.linspace(0,10,3)

array([ 0.,  5., 10.])

In [15]:
np.linspace(0,5,10)

array([0.        , 0.55555556, 1.11111111, 1.66666667, 2.22222222,
       2.77777778, 3.33333333, 3.88888889, 4.44444444, 5.        ])

In [16]:
# eye - create identity matrix 
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [17]:
# random 
# rand - random number from uniform distribution over [0, 1)
np.random.rand(3)

array([0.76776704, 0.52006772, 0.17615746])

In [19]:
np.random.rand(3,3)

array([[0.3394237 , 0.0458129 , 0.70258483],
       [0.87717438, 0.79052245, 0.06848634],
       [0.92815677, 0.30375937, 0.79430602]])

In [20]:
# randn - random number(s) from standard normal distribution [sd = 1]
np.random.randn(3)

array([-0.10135964, -1.24908645, -0.6170238 ])

In [21]:
np.random.randn(3,3)

array([[ 0.46415986, -0.89163834, -1.3589166 ],
       [ 0.47912743, -1.08203472, -0.63372049],
       [-0.22072599, -2.44839077,  1.84856461]])

In [22]:
# randint - random number(s) from low (inclusive) to high (exclusive)
np.random.randint(1,100) # random number from 1 to 99

32

In [23]:
np.random.randint(1,100,10) # 10 random numbers from 1 to 99

array([57, 85, 57, 42, 56,  5, 33, 65, 22, 66])

In [24]:
# seed - set the random state to make code reproducible 
np.random.seed(13)
np.random.rand(4)

array([0.77770241, 0.23754122, 0.82427853, 0.9657492 ])

In [25]:
np.random.seed(13) # will produce same result from above code
np.random.rand(4)

array([0.77770241, 0.23754122, 0.82427853, 0.9657492 ])

### Array Attributes and Methods
- shape 
- reshape 
- min, max, argmax, argmin
- dtype

In [27]:
arr = np.arange(25)
randarr = np.random.randint(0,50,10)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [29]:
arr.shape

(25,)

In [30]:
# reshape 
arr.reshape(5,5) # change 5x5 matrix

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [31]:
arr.reshape(25,1)

array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15],
       [16],
       [17],
       [18],
       [19],
       [20],
       [21],
       [22],
       [23],
       [24]])

In [32]:
arr.reshape(1,25)

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24]])

In [33]:
randarr

array([ 2, 32, 19, 21, 19, 11, 22, 11,  1, 18])

In [35]:
# methods - min, max, argmin, argmax
randarr.min() # return minimum value from array

1

In [36]:
randarr.max() # return maximum value

32

In [37]:
randarr.argmax() # return index of max value

1

In [42]:
arr.reshape(5,5).max() # although it is a matrix, will return only one max value

24

In [43]:
# dtype 
arr.dtype

dtype('int64')

In [45]:
np.array([1.2,1.3]).dtype

dtype('float64')



---




## Indexing and Selection
- bracket indexing and selection
- broadcasting
- indexing 2d matrix
- conditional selections

In [48]:
arr = np.arange(0,11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [50]:
# bracket indexing and selection
arr[8] # get value of index 8

8

In [52]:
arr[1:5] # index 1 to index 4

array([1, 2, 3, 4])

In [53]:
# broadcasting
arr[0:5] = 100
arr

array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

In [54]:
arr = np.arange(0,11)
slice_arr = arr[0:6]
slice_arr

array([0, 1, 2, 3, 4, 5])

In [55]:
slice_arr[:] = 99
slice_arr

array([99, 99, 99, 99, 99, 99])

In [57]:
arr # careful, original dataset also changed.

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [58]:
arr = np.arange(0,11)
arr_copy = arr.copy() 
arr_copy

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [59]:
arr_copy[1:5] = 99
arr_copy

array([ 0, 99, 99, 99, 99,  5,  6,  7,  8,  9, 10])

In [61]:
arr # .copy() method will not change the original value

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [64]:
# Indexing 2D matrix
arr_2d = np.arange(1,16).reshape(3,5)
arr_2d

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [65]:
arr_2d[1] # return second row

array([ 6,  7,  8,  9, 10])

In [68]:
arr_2d[:,1] # return second column

array([ 2,  7, 12])

In [69]:
arr_2d[1,1] # return value from second row and second column 

7

In [71]:
arr_2d[::2, ::2] # last 2 is step.

array([[ 1,  3,  5],
       [11, 13, 15]])

In [72]:
# conditional selection 
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [73]:
arr > 5

array([False, False, False, False, False, False,  True,  True,  True,
        True,  True])

In [74]:
arr[arr > 5]

array([ 6,  7,  8,  9, 10])

In [79]:
arr[np.where((arr > 2) & (arr < 8))]

array([3, 4, 5, 6, 7])

In [82]:
np.where((arr > 3) & (arr%2 == 0) )

(array([ 4,  6,  8, 10]),)

## NumPy Operations
- Arithmetic
- Universal Array Functions
- Summary Statistics 
- Axis


### Arithmetic

In [83]:
arr = np.arange(0,10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [84]:
arr + arr 

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [85]:
arr * arr

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

In [86]:
arr - arr

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [87]:
arr/arr # error at 0/0

  """Entry point for launching an IPython kernel.


array([nan,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [89]:
1/arr # 1/0 return infinity

  """Entry point for launching an IPython kernel.


array([       inf, 1.        , 0.5       , 0.33333333, 0.25      ,
       0.2       , 0.16666667, 0.14285714, 0.125     , 0.11111111])

In [90]:
arr**3

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

### Universal Array Functions

In [91]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [92]:
np.exp(arr) # exponential function (e^)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [93]:
np.sin(arr) # sine function

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849])

In [94]:
np.log(arr) # natural log function

  """Entry point for launching an IPython kernel.


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436,
       1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458])

### Summary Statistics

In [95]:
arr.sum()

45

In [96]:
arr.mean()

4.5

In [97]:
arr.max()

9

**Others**
- arr.min()
- arr.var() # variance
- arr.std() # standard deviation

### Axis
- working with 2D arrays, consider as rows and columns
- row - axis 0
- column - axis 1
- think about arr.shape returned values

In [98]:
arr_2d 

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [100]:
arr_2d.shape

(3, 5)

In [101]:
arr_2d.sum(axis=0) # col sum ?? I consider as answer will return along the dimension of the axis.

array([18, 21, 24, 27, 30])

In [102]:
arr_2d.sum(axis=1)

array([15, 40, 65])