# Numpy Basic Tutorials
---
YouTube Channel: [codebasics](https://www.youtube.com/channel/UCh9nVJoWXmFb7sLApWGcLPQ)    
Reference: https://www.youtube.com/playlist?list=PLeo1K3hjS3uset9zIVzJWqplaWBiacTEU

### > Numpy Tutorial 1: Introduction
Reference: https://www.youtube.com/watch?v=rN0TREj8G7U

In [1]:
# First thing first - installing numpy
'''
$ pip install numpy
'''

# Second - Import it
import numpy as np # NOTE: np is just an alias, it can be any name you want

In [2]:
# Creating a one dimensional array
A = np.array([1,2,3])

In [3]:
# Jupyter special way of seeing the content of `A` array
A

array([1, 2, 3])

In [4]:
# NOTE: See that the output is unlike the above method. Jupyter allows you to see the content of the
# array wihout printing it explicitly
print(A)

[1 2 3]


In [5]:
# This is very similar to a list but there are huge difference as compared to the python list
A[0]

1

In [6]:
A[1]

2

### Let's see the differences between the Python list and numpy array
---
Why numpy is better than Python list?
1. Less memory is used
2. It is faster to be executed
3. Convenient, easy to used

#### 1. Less Memory Usage

In [7]:
# Let's create a Python list to compare with it
# But first, lets import sys to check the size
import sys
SIZE = 1000000

# Creating a list with 1000 elements in it
ls = range(SIZE)
integer = 1 # Can be any number, we just want to insert the integer into the command below to get the size
print(f'The size used by the Python list: {sys.getsizeof(integer) * len(ls)} bytes')

# Creating a numpy array
arr = np.arange(SIZE)
print(f'The size used by the numpy array: {arr.size * arr.itemsize} bytes')
# NOTE:
# arr.size == 1000 
# arr.itemsize == size of an elements in the array == 4

The size used by the Python list: 28000000 bytes
The size used by the numpy array: 4000000 bytes


#### 2. Faster to be executed & convenient

In [8]:
# Let's see the execution time
import time

# Creating 2 Python lists
L1 = range(SIZE)
L2 = range(SIZE)

N1 = np.arange(SIZE)
N2 = np.arange(SIZE)

# Testing time by adding up the corresponding elements from two lists
start = time.time()
result = [(x+y) for x, y in zip(L1, L2)]
print(f'Python took {(time.time() - start) * 1000}ms')

# Testing time by adding the corresponding elements from two numpy array
start = time.time()
result = N1 + N2
print(f'Numpy took {(time.time() - start) * 1000}ms')

Python took 161.56792640686035ms
Numpy took 14.333009719848633ms


In [9]:
# Basic operators
A = np.array([1,2,3])
B = np.array([4,5,6])

# Addition
print(A + B)

# Substraction
print(A - B)

# Multiplication
print(A * B)

# Division
print(A / B)


[5 7 9]
[-3 -3 -3]
[ 4 10 18]
[0.25 0.4  0.5 ]


---

### > Numpy Tutorial 2: Basic Array Operations
Reference: https://www.youtube.com/watch?v=a8aDcLk4vRc

#### Numpy Dimension & Data Types & Data Size

In [10]:
# Single Dimensional array
sA = np.array([1,2,3])
print(sA.ndim)

1


In [11]:
# Multidimensional array
mA = np.array([ [1,2,3] , [3,4,5] , [5,6,7] ])
print(mA.ndim)

# Let's take a look at the item size
print(f'The item size for one {mA.dtype} element in the numpy array is {mA.itemsize} bytes')

2
The item size for one int32 element in the numpy array is 4 bytes


In [12]:
# Let's change the integer of the array into float
mA = np.array( [ [1,2,3], [3,4,5], [5,6,7] ], dtype=np.float64 ) # 64 bits = 8 bytes [1 byte = 8 bits]
print(f'The item size for one {mA.dtype} element in the numpy array is {mA.itemsize} bytes')

The item size for one float64 element in the numpy array is 8 bytes


In [13]:
# Checking the number of elements contained in the array
print(f'The number of elements in the numpy array is {mA.size}.')

# See the array
print(f'\n{mA}')

The number of elements in the numpy array is 9.

[[1. 2. 3.]
 [3. 4. 5.]
 [5. 6. 7.]]


In [14]:
# Jupyter way of checking the content of the array
mA

array([[1., 2., 3.],
       [3., 4., 5.],
       [5., 6., 7.]])

In [15]:
# Let's see how to change the array type into any other type, in this case `complex`
complexArr = mA.astype(complex)
print(complexArr)

[[1.+0.j 2.+0.j 3.+0.j]
 [3.+0.j 4.+0.j 5.+0.j]
 [5.+0.j 6.+0.j 7.+0.j]]


#### Numpy Shape: Finding the rows and columns of the array

In [16]:
# Getting the shape of the array || the (i, j) row and columns of the array
print(mA.shape)

# Let's see other example
a = np.array([[1,2], [3,4], [5,6]], dtype=np.float64)
print(f'\nNew array:\n{a}\nShape: {a.shape}')

(3, 3)

New array:
[[1. 2.]
 [3. 4.]
 [5. 6.]]
Shape: (3, 2)


#### Numpy Initialization

In [17]:
# Initialization your array with zeros
zeroArr = np.zeros( (3,6) ) # (2, 6) here is the shape of the array you want
print(zeroArr)

[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]


In [18]:
# Initialization your array with ones
oneArr = np.ones( (2,5) )
print(oneArr)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


#### Numpy Arange

In [19]:
# Putting range of number into an array

# Python list way of setting range of elements into the list
ls = range(1, 5)
print(list(ls))

# Numpy array way of setting range of elements into the array
arr = np.arange(1, 5) # The last element is not included like usual
print(arr)

# Stepping forward
arr = np.arange(1, 10, 2)
print(arr)

[1, 2, 3, 4]
[1 2 3 4]
[1 3 5 7 9]


In [20]:
# Take a look at the documentary of the np.arange where there are some other attributes can be added
?np.arange

#### Numpy Linspace: Create an evenly spaced sequence in a specified interval

In [21]:
# Another way of creating a numpy array
# a = np.linspace(start, stop, numberOfElementInBetweenStart&Stop)
linArr = np.linspace(0, 50, 5) # The stop is included as well
print(linArr)

# NOTE: That the dtype in linspace is float as they are creating elements which have the equal gaps
# If you want it to be int
linArr = np.linspace(0, 50, 5, dtype=np.int32) # linArr = linArr.astype(np.int32)
print(linArr)

[ 0.  12.5 25.  37.5 50. ]
[ 0 12 25 37 50]


In [22]:
?np.linspace

#### Numpy Reshape: Reshaping the array

In [23]:
# Let's set up an array for clearer view
arr = np.array( [[1,2], [3,4], [5,6]] )
print(f'The array:\n{arr}')
print(f'Shape of this array: {arr.shape}\n\n')

# Reshaping it to 2 rows and 3 columns
reshaped = arr.reshape(2,3)
print(f'The array:\n{reshaped}')
print(f'Shape of this array: {reshaped.shape}\n\n')

# Let's see the original arr
print(f'The original array:\n{arr}')
print(f'Shape of this array: {arr.shape}')
print('You can see that the original array is not affected.\n\n')

# As long as the size fit, you can reshape it into any shape
weirdArr = arr.reshape(6,1)
print(f'The array:\n{weirdArr}')
print(f'Shape of this array: {weirdArr.shape}')
print(f'The dimension of this array: {weirdArr.ndim}')
print(f'The size of this array: {weirdArr.size}')
print(f'The row of this array: {weirdArr.shape[0]}')
print(f'The column of this array: {weirdArr.shape[1]}')

The array:
[[1 2]
 [3 4]
 [5 6]]
Shape of this array: (3, 2)


The array:
[[1 2 3]
 [4 5 6]]
Shape of this array: (2, 3)


The original array:
[[1 2]
 [3 4]
 [5 6]]
Shape of this array: (3, 2)
You can see that the original array is not affected.


The array:
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
Shape of this array: (6, 1)
The dimension of this array: 2
The size of this array: 6
The row of this array: 6
The column of this array: 1


#### Numpy Ravel: Flattening your array

In [24]:
# Let's use weirdArr again to compare its shape, dimension and size
weirdArr.ravel() # This is just to preview the flattened weirdArr but it will not update itself to the flattened version
flattenedArr = weirdArr.ravel() # This will stored the flattened array into the var
print(f'The array:\n{flattenedArr}')
print(f'Shape of this array: {flattenedArr.shape}')
print(f'The dimension of this array: {flattenedArr.ndim}')
print(f'The size of this array: {flattenedArr.size}')

# Take a look at the shape of the array, what's the difference?
# Reference: https://stackoverflow.com/questions/39627852/why-does-the-shape-of-a-1d-array-not-show-the-number-of-rows-as-1/39627884
print(f'The shape[0] of this array: {flattenedArr.shape[0]}') # NOTE: That we didnt use row as there is no row and column for 1D array
print(f'The shape[1] of this array: {flattenedArr.shape[1]}') # An error occured here

The array:
[1 2 3 4 5 6]
Shape of this array: (6,)
The dimension of this array: 1
The size of this array: 6
The shape[0] of this array: 6


IndexError: tuple index out of range

#### Numpy Mathematical Function: Min, Max, Sum, Sqrt, Std

In [25]:
# Let's see an array
print(f'The array:\n{arr}\n')

# Getting the max
print(f'The max element in this array: {arr.max()}')

# Getting the min
print(f'The min element in this array: {arr.min()}')

# Getting the sum of the elements in the array
print(f'The sum elements in this array: {arr.sum()}')

# Let's add the elements according to the axis
# Axis = 0 :: Vertical elements
# Axis = 1 :: Horizontal elements
print(f'The sum vertical elements in this array:\n{arr.sum(axis=0)}\n')
print(f'The sum horizontal elements in this array:\n{arr.sum(axis=1)}\n') 
# NOTE: That the array shown is in 1D array format

# Getting the square root for each element in the array :: np.sqrt :: generic function; not a array function
print(f'The square root for each element in the array:\n{np.sqrt(arr)}\n')
print(f'Do note that the arr is not updated by the sqrt:\n{arr}\n')

# Getting the standard deviation using array function
print(f'The standard deviation of this array using array function: {arr.std()}')
print(f'The standard deviation of this array using generic function: {np.std(arr)}')

The array:
[[1 2]
 [3 4]
 [5 6]]

The max element in this array: 6
The min element in this array: 1
The sum elements in this array: 21
The sum vertical elements in this array:
[ 9 12]

The sum horizontal elements in this array:
[ 3  7 11]

The square root for each element in the array:
[[1.         1.41421356]
 [1.73205081 2.        ]
 [2.23606798 2.44948974]]

Do note that the arr is not updated by the sqrt:
[[1 2]
 [3 4]
 [5 6]]

The standard deviation of this array using array function: 1.707825127659933
The standard deviation of this array using generic function: 1.707825127659933


#### 2D Array Mathematic Operations: matrix dot multiplication, +, -, *, /

In [26]:
# Let's set up two 2D arrays
A1 = np.array([ [1,2] , [3,4] ])
A2 = np.array([ [5,6] , [7,8] ])

# Reference of creating ranges for 2D array
# https://stackoverflow.com/questions/56172814/how-to-create-a-2d-array-of-ranges-using-numpy

print(f'Array 1:\n{A1}\n')
print(f'Array 2:\n{A2}\n\n')

# Add
print(f'A1 + A2:\n{A1+A2}\n')

# Substract
print(f'A2 - A1:\n{A2-A1}\n')

# Multiplication [NOTE: This is not matrix dot multiplication]
print(f'A1 * A2:\n{A1*A2}\n')

# Division
print(f'A2 / A1:\n{A2/A1}\n')

# Dot Matrix multiplication
print(f'A2 • A1:\n{A1.dot(A2)}\n')

Array 1:
[[1 2]
 [3 4]]

Array 2:
[[5 6]
 [7 8]]


A1 + A2:
[[ 6  8]
 [10 12]]

A2 - A1:
[[4 4]
 [4 4]]

A1 * A2:
[[ 5 12]
 [21 32]]

A2 / A1:
[[5.         3.        ]
 [2.33333333 2.        ]]

A2 • A1:
[[19 22]
 [43 50]]



### > Numpy Tutorial 3: Slicing/Stacking Arrays, Indexing with Boolean Arrays
Reference: https://www.youtube.com/watch?v=_d_Ka-ks2a0

#### 1. Indexing & Slicing

In [2]:
# Let's create a numpy array again
import numpy as np
arr = np.array([1,2,3])

# Slicing the array
print(arr[0:2]) # NOTE: That the element on the index 2 is not included

# Indexing the array
print(arr[2])
print(arr[-1])

[1 2]
3
3


In [33]:
# Slicing and indexing on multidimensional array

# Creating a multidimentional array using arange
arr = np.arange(3) + np.array([0, 4, 21])[:, None]
# [:, None] is not create the column for the array

# Selecting the specified element in an array
print(f'The array:\n{arr}')
print(f'\narr[1,2] = {arr[1,2]}') # arr[x, y] where x is the row and y is the column

# Selecting the row of the array
print(f'\nThe last row = {arr[-1]}')

# Selecting few elements in a specified row
print(f'\nThe last row\'s first and second element = {arr[-1, 0:2]}') # [row, column]

# Selecting only columns
print(f'\nThe last two columns elements = \n{arr[:, 1:3]}') # [row, column]

# Looping the row of the array
print('\nThe rows are...')
for row in arr:
    print(row)

The array:
[[ 0  1  2]
 [ 4  5  6]
 [21 22 23]]

arr[1,2] = 6

The last row = [21 22 23]

The last row's first and second element = [21 22]

The last two columns elements = 
[[ 1  2]
 [ 5  6]
 [22 23]]

The rows are...
[0 1 2]
[4 5 6]
[21 22 23]


#### Numpy Flat: Flattening the array

In [91]:
# Ravel vs Flat

# Ravel without list
print(f'arr.ravel() = {arr.ravel()}')

# Ravel with list
print(f'list(arr.ravel()) = {list(arr.ravel())}')

# Flat need list
print(f'list(arr.flat) = {list(arr.flat)}')

# Flatten without list
print(f'arr.flatten() = {arr.flatten()}')

arr.ravel() = [ 0  1  2  3  4  5  6  7  8  9 10 11]
list(arr.ravel()) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
list(arr.flat) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
arr.flatten() = [ 0  1  2  3  4  5  6  7  8  9 10 11]


#### What is the difference between arr.ravel() and arr.flatten()?
Reference: https://www.geeksforgeeks.org/differences-flatten-ravel-numpy/

a.ravel():
(i) Return only reference/view of original array
(ii) If you modify the array you would notice that the value of original array also changes.
(iii) Ravel is faster than flatten() as it does not occupy any memory.
(iv) Ravel is a library-level function.

a.flatten() :
(i) Return copy of original array
(ii) If you modify any value of this array value of original array is not affected.
(iii) Flatten() is comparatively slower than ravel() as it occupies memory.
(iv) Flatten is a method of an ndarray object. 


In [38]:
# Creating a multidimentional array using arange [Simple version]
A = np.arange(6).reshape(3,2) # (row, col)
print(f'Array A:\n{A}\n')

# Arange with starting and stopping
B = np.arange(6, 12).reshape(3,2)
print(f'Array B:\n{B}\n')

Array A:
[[0 1]
 [2 3]
 [4 5]]

Array B:
[[ 6  7]
 [ 8  9]
 [10 11]]



#### Numpy VStack & HStack: Stacking two array together

In [47]:
# VStack = Vertical Stacking
print('Vertical Stacking')
C = np.vstack( (A, B) )
print(f'The stack array (A, B):\n{C}\n')

D = np.vstack( (B, A) )
print(f'The stack array (B, A):\n{D}\n')


Vertical Stacking
The stack array (A, B):
[[ 0  1]
 [ 2  3]
 [ 4  5]
 [ 6  7]
 [ 8  9]
 [10 11]]

The stack array (B, A):
[[ 6  7]
 [ 8  9]
 [10 11]
 [ 0  1]
 [ 2  3]
 [ 4  5]]



In [49]:
# HStack = Horizontal Stacking
print('Horizontal Stacking')
C = np.hstack( (A, B) )
print(f'The stack array (A, B):\n{C}\n')

D = np.hstack( (B, A) )
print(f'The stack array (B, A):\n{D}\n')

Horizontal Stacking
The stack array (A, B):
[[ 0  1  6  7]
 [ 2  3  8  9]
 [ 4  5 10 11]]

The stack array (B, A):
[[ 6  7  0  1]
 [ 8  9  2  3]
 [10 11  4  5]]



#### Numpy VSplit & HSplit: Splitting a big array into two sub array 

In [61]:
# Splitting the array into two small arrays

# Creating an array with two rows
arr = np.arange(30).reshape(2, 15)
print(f'The array:\n{arr}\n')

print('Horizontal Splitting - np.hsplit(arr, x) where x is the number of split you want.')
print('======   -->   ===|===') # Make sure the array can be splitted equally

# Splitting the arrays
result = np.hsplit(arr, 3) # a, b, c = np.hsplit(arr, 3) also works
print(f'The first splitted array:\n{result[0]}\n')
print(f'The second splitted array:\n{result[1]}\n')
print(f'The third splitted array:\n{result[2]}\n')

# result = np.hsplit(arr, 2) <-- This is produce an error as array split does not result in an equal division

The array:
[[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
 [15 16 17 18 19 20 21 22 23 24 25 26 27 28 29]]

Horizontal Splitting - np.hsplit(arr, x) where x is the number of split you want.
The first splitted array:
[[ 0  1  2  3  4]
 [15 16 17 18 19]]

The second splitted array:
[[ 5  6  7  8  9]
 [20 21 22 23 24]]

The third splitted array:
[[10 11 12 13 14]
 [25 26 27 28 29]]



In [71]:
# Vertical Splitting
arr = np.arange(20).reshape(10, 2)
print(f'The array:\n{arr}\n')

print('Vertical Splitting - np.vsplit(arr, x) where x is the number of split you want.')
print(' ==\n ==\n ==\n ==   \n\n vv\n\n ==\n ==\n----\n ==\n ==\n') 
# Make sure the array can be splitted equally

# Let's try the vertical splitting
result = np.vsplit(arr, 2)
print(f'The first splitted array:\n{result[0]}\n')
print(f'The second splitted array:\n{result[1]}\n')


The array:
[[ 0  1]
 [ 2  3]
 [ 4  5]
 [ 6  7]
 [ 8  9]
 [10 11]
 [12 13]
 [14 15]
 [16 17]
 [18 19]]

Vertical Splitting - np.vsplit(arr, x) where x is the number of split you want.
 ==
 ==
 ==
 ==   

 vv

 ==
 ==
----
 ==
 ==

The first splitted array:
[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]

The second splitted array:
[[10 11]
 [12 13]
 [14 15]
 [16 17]
 [18 19]]



#### Numpy Comparing & Replacing Elements

In [84]:
A = np.arange(15).reshape(5, 3)
print(A)

# Comparing the elements with a number
com = A > 5
print(f'\n{com}') # dtype = bool

# Comparing between two arrays
B = np.arange(14, -1, -1).reshape(5, 3)
print(f'\n{B}')

com = B > A
print(f'\n{com}')

# Getting the list of elements that are true in com array
print(f'\n{B[com]}')

# Replacing all the True element into other element
B[com] = -1
print(f'\n{B}')


[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]

[[False False False]
 [False False False]
 [ True  True  True]
 [ True  True  True]
 [ True  True  True]]

[[14 13 12]
 [11 10  9]
 [ 8  7  6]
 [ 5  4  3]
 [ 2  1  0]]

[[ True  True  True]
 [ True  True  True]
 [ True False False]
 [False False False]
 [False False False]]

[14 13 12 11 10  9  8]

[[-1 -1 -1]
 [-1 -1 -1]
 [-1  7  6]
 [ 5  4  3]
 [ 2  1  0]]


### > Numpy Tutorial 3: Nditer - Iterate numpy array
Reference: https://www.youtube.com/watch?v=XawR6CjAYV4  
Documentation: https://numpy.org/doc/stable/reference/generated/numpy.nditer.html

In [112]:
# Creating a numpy array
arr = np.arange(12).reshape(3, 4)
print(f'Array:\n{arr}\n')

Array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]



In [113]:
# Simple for loop for printing the list of elements
for row in arr:
    for cell in row:
        print(cell)

0
1
2
3
4
5
6
7
8
9
10
11


In [114]:
# Another flatten method, check out `Numpy Flat: Flattening the array` above
for cell in arr.flatten():
    print(cell)

0
1
2
3
4
5
6
7
8
9
10
11


#### Using np.nditer(arr, order='?')
Where ? can be
1. C :: Counting from left to right on the first row then to the next row
2. F :: Counting from up to down from the first column then to the right column

In [115]:
# Let's try C order
print(f'Array:\n{arr}\n')
for cell in np.nditer(arr, order='C'):
    print(cell)

Array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

0
1
2
3
4
5
6
7
8
9
10
11


In [116]:
# Let's try F order
print(f'Array:\n{arr}\n')
for cell in np.nditer(arr, order='F'):
    print(cell)

Array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

0
4
8
1
5
9
2
6
10
3
7
11


In [117]:
# Flags = external_loop where something like transpose
print(f'Array:\n{arr}\n')
for cell in np.nditer(arr, order='F', flags=['external_loop']):
    print(cell)

print(f'\nTranspose Array:\n{arr.T}\n')

Array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

[0 4 8]
[1 5 9]
[ 2  6 10]
[ 3  7 11]

Transpose Array:
[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]



In [123]:
# op_flags = readwrite :: Updating the elements
arr = np.arange(12).reshape(3, 4)
print(f'Array:\n{arr}\n')

for x in np.nditer(arr, op_flags=['readwrite']):
    x[...] = x**2
    print(x)

print(f'\nUpdated array:\n{arr}\n')

Array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

0
1
4
9
16
25
36
49
64
81
100
121

Updated array:
[[  0   1   4   9]
 [ 16  25  36  49]
 [ 64  81 100 121]]



#### Iterating two arrays simultaneously

In [126]:
# Creating two new arrays
A = np.arange(12).reshape(3, 4)
print(f'Array:\n{A}\n')

B = np.arange(3, 15, 4).reshape(3, 1)
print(f'Array:\n{B}\n')

Array:
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

Array:
[[ 3]
 [ 7]
 [11]]



In [128]:
# Iterating two arrays simultaneously
for x, y in np.nditer([A, B]):
    print(x, y)

0 3
1 3
2 3
3 3
4 7
5 7
6 7
7 7
8 11
9 11
10 11
11 11


In [135]:
# Try iterating with another array which is not compatible
C = np.arange(3, 19, 5).reshape(4,1)
print(f'Array:\n{C}\n')

# ValueError: operands could not be broadcast together with shapes (3,4) (4,1) 
for x, y in np.nditer([A, C]):
    print(x, y)

Array:
[[ 3]
 [ 8]
 [13]
 [18]]



ValueError: operands could not be broadcast together with shapes (3,4) (4,1) 

In [136]:
# Try iterating with another array which is not compatible
C = np.arange(3, 19, 5).reshape(1,4)
print(f'Array:\n{C}\n')

for x, y in np.nditer([A, C]):
    print(x, y)

Array:
[[ 3  8 13 18]]

0 3
1 8
2 13
3 18
4 3
5 8
6 13
7 18
8 3
9 8
10 13
11 18
