### import numpy

In [24]:
import numpy as np


### check python and numpy version

In [25]:
import platform
print('Python version: ' + platform.python_version())
print('Numpy version: ' + np.__version__)

Python version: 3.12.2
Numpy version: 2.1.2


# 0. Numpy Data Types

### A list of Numpy Data Types

In [26]:
import pandas as pd
dtypes = pd.DataFrame(
    {
        'Type': ['int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', 'int64', 'uint64', 'float16', 'float32', 'float64', 'float128', 'complex64', 'complex128', 'bool', 'object', 'string_', 'unicode_'],
        'Type Code': ['i1', 'u1', 'i2', 'u2', 'i4', 'u4', 'i8', 'u8', 'f2', 'f4 or f', 'f8 or d', 'f16 or g', 'c8', 'c16', '', 'O', 'S', 'U']
    }
)

dtypes

Unnamed: 0,Type,Type Code
0,int8,i1
1,uint8,u1
2,int16,i2
3,uint16,u2
4,int32,i4
5,uint32,u4
6,int64,i8
7,uint64,u8
8,float16,f2
9,float32,f4 or f


In [27]:
arr = np.array([1,2,3], dtype='f4')
print(arr)
print(arr.dtype)

arr = np.array([1+2j, 3-4j], dtype='c8')
print(arr)
print(arr.dtype)

arr = np.array([0, 1, 2], dtype="bool")
print(arr)
print(arr.dtype)

[1. 2. 3.]
float32
[1.+2.j 3.-4.j]
complex64
[False  True  True]
bool


In [28]:
arr = np.array([1.0,2,3])
print(arr)
print(arr.dtype)

[1. 2. 3.]
float64


### string data type

In [31]:
# set the max length of the string using S + some number, such as 'S3'
# any string longer than the max length will be truncated
s = np.array(['abc', 'defg'], dtype='S3')
print(s)
print(s.dtype)



[b'abc' b'def']
|S3


In [32]:
# numpy string and unicode data types are fixed-length
# string_ and unicode_ will treat the longest string in the array as the default length when creating an array
arr = np.array(['a', 'ab', 'abc'], dtype=np.string_)
print(arr.dtype)
print(arr)

arr = np.array(['a', 'ab', 'abc'], dtype=np.unicode_)
print(arr.dtype)

AttributeError: `np.string_` was removed in the NumPy 2.0 release. Use `np.bytes_` instead.

In [None]:
# what does "|" and "<" mean above?
# they are the byte order indicators, which is beyond the scope of this tutorial
# you can check it out here: https://docs.scipy.org/doc/numpy/reference/generated/numpy.dtype.byteorder.html
# and here: https://en.wikipedia.org/wiki/Endianness

# 1. Create Arrays

### create an array from a Python array

In [None]:
arr = np.array(range(10))
print(arr)

[0 1 2 3 4 5 6 7 8 9]


In [33]:
arr = np.array([1,2,3,4,5])
print(arr)

[1 2 3 4 5]


### create an array in a specified data type

In [34]:
arr = np.array([[1,2,3], [4,5,6]], dtype='i2')
print(arr)

[[1 2 3]
 [4 5 6]]


### create an aray of evenly spaced values within a specified interval

In [None]:
# np.arange(start, stop, step)
arr = np.arange(0, 20, 2)  
print(arr)

[ 0  2  4  6  8 10 12 14 16 18]


### create an array of evenly spaced numbers in a specified interval

In [None]:
# np.linspace(start, stop, num_of_elements, endpoint=True, retstep=False) 
arr = np.linspace(0, 3, 4,endpoint=True, retstep=False)
print(arr)

[0. 1. 2. 3.]


In [None]:
# exclude endpoint and return setp size
arr, step = np.linspace(0, 10, 21, endpoint=True, retstep=True)
print(arr)
print(step)

[ 0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5
  7.   7.5  8.   8.5  9.   9.5 10. ]
0.5


### create an array of random values in a given shape

In [None]:
rand_arr = np.random.rand(5, 3)
print(rand_arr)

[[0.87311909 0.59716337 0.70565054]
 [0.82166126 0.99208893 0.1030815 ]
 [0.60472375 0.76293439 0.99353216]
 [0.30376695 0.38838847 0.22185978]
 [0.54130104 0.68162281 0.51932774]]


### create an array of zeros in a given shape 

In [None]:
zeros = np.zeros((2,3), dtype='i4')
print(zeros)

[[0 0 0]
 [0 0 0]]


### create an array of zeros with the same shape and data type as a given array

In [None]:
zeros = np.zeros_like(arr)
print(zeros)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


### create an array of ones in a given shape 

In [None]:
ones = np.ones((2,3))
print(ones)

[[1. 1. 1.]
 [1. 1. 1.]]


### create an array of ones with the same shape and data type as a given array

In [None]:
ones = np.ones_like(arr)
print(ones)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


### create an array of arbitrary values in a given shape 

In [None]:
empty = np.empty((2,3))
print(empty)

[[0. 0. 0.]
 [0. 0. 0.]]


### create an array of arbitrary values with the same shape and data type as a given array

In [35]:
empty = np.empty_like(arr)
print(empty)

[[    0 16256     0]
 [16384     0 16448]]


### create an array of constant values in a given shape  

In [None]:
p = np.full((2,3), 5)
print(p)

[[5 5 5]
 [5 5 5]]


### create an array of constant values with the same shape and data type as a given array

In [None]:
p = np.full_like(zeros_like, 7)
print(p)

[[7. 7. 7.]
 [7. 7. 7.]
 [7. 7. 7.]
 [7. 7. 7.]
 [7. 7. 7.]]


### create an array by repetition

In [None]:
# repeat each element of an array by a specified number of times
# np.repeat(iterable, reps, axis=None)
arr = [0, 1, 2]
print(np.repeat(arr, 5, axis=0))    # or np.repeat(range(3), 3)

[0 0 0 0 0 1 1 1 1 1 2 2 2 2 2]


In [None]:
x = np.array([[1,2],[3,4]])
x

array([[1, 2],
       [3, 4]])

In [None]:
print(np.repeat(arr,3, axis=0))

[[1 2]
 [1 2]
 [1 2]
 [3 4]
 [3 4]
 [3 4]]


In [None]:
# repeat along a specified axis with specified number of repetitions
arr = [[1,2], [3,4]]
print(np.repeat(arr, [1,2], axis=0))

[[1 2]
 [3 4]
 [3 4]]


In [None]:
# repeat an array by a specified number of times
arr = [0, 1, 2]
print(np.tile(arr, 3))

[0 1 2 0 1 2 0 1 2]


In [None]:
# repeat along specified axes
print(np.tile(arr, (2,2)))

[[0 1 2 0 1 2]
 [0 1 2 0 1 2]]


### create an identity matrix with a given diagonal size

In [None]:
identity_matrix = np.eye(3)
print(identity_matrix)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [None]:
identity_matrix = np.identity(5)
print(identity_matrix)

[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


### create an identity matrix with a diagonal offset

In [None]:
identity_matrix = np.eye(5, k=1)    # positive number shifts the diagonal upward
print(identity_matrix)

[[0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0.]]


In [None]:
identity_matrix = np.eye(5, k=-2)   # negative number shifts the diagonal downward
print(identity_matrix)

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]]


### extract the diagonal array / create a diagonal array

In [None]:
arr = np.random.rand(5,5)
print(arr)

[[6.31340799e-01 1.27711358e-01 4.25996526e-01 5.19918294e-01
  9.76844893e-01]
 [6.21629521e-01 8.06723112e-01 4.82996997e-03 9.21844523e-01
  4.22283330e-01]
 [5.04273588e-01 4.52961683e-01 7.63734930e-01 8.20110531e-01
  1.14693380e-01]
 [4.12436860e-01 4.03015906e-01 4.24528026e-01 4.70910904e-01
  4.70190306e-02]
 [1.75246983e-01 5.30752549e-01 4.74938549e-01 8.25785437e-01
  3.73992546e-04]]


In [None]:
# extract the diagonal
print(np.diag(arr))

[6.31340799e-01 8.06723112e-01 7.63734930e-01 4.70910904e-01
 3.73992546e-04]


In [None]:
# create a matrix with a specified diagonal array
arr = np.diag([1,2,3,4,5])
print(arr)

[[1 0 0 0 0]
 [0 2 0 0 0]
 [0 0 3 0 0]
 [0 0 0 4 0]
 [0 0 0 0 5]]


# 2. Inspect Arrays

In [None]:
arr = np.array([[1,2,3], [4,5,6]], dtype=np.int64)

### inspect general information of an array

In [None]:
print(np.info(arr))

class:  ndarray
shape:  (2, 3)
strides:  (24, 8)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x2a4706176c0
byteorder:  little
byteswap:  False
type: int64
None


### inspect the data type of an array

In [None]:
print(arr.dtype)

int64


### inspect the dimension of an array

In [None]:
print(arr.shape)

(2, 3)


### inspect length of an array

In [None]:
print(len(arr))

2


### inspect the number of dimensions of an array

In [None]:
print(arr.ndim)

2


### inspect the number of elements in an array

In [None]:
print(arr.size)

6


### inspect the number of bytes of each element in an array

In [None]:
print(arr.itemsize)

8


### inspect the memory size of an array (in byte)

In [None]:
# arr.nbytes = arr.size * arr.itemsize
print(arr.nbytes)

48


# 3. Sampling Methods

### set seed

In [None]:
np.random.seed(123)

### set random state which is independent from the global seed

In [None]:
rs = np.random.RandomState(8)
rs.rand(10)

array([0.8734294 , 0.96854066, 0.86919454, 0.53085569, 0.23272833,
       0.0113988 , 0.43046882, 0.40235136, 0.52267467, 0.4783918 ])

### generate a random sample from interval [0, 1) in a given shape

In [None]:
# generate a random scalar
print(np.random.rand())      

0.9701769486766465


In [None]:
# generate a 1-D array
print(np.random.rand(3))           

[0.66032539 0.9092129  0.49738994]


In [None]:
# generate a 2-D array
print(np.random.rand(3,3))          

[[0.70460071 0.27583755 0.86101621]
 [0.12110836 0.70654979 0.0106169 ]
 [0.48955773 0.01581689 0.23099406]]


### generate a sample from the standard normal distribution (mean = 0, var = 1)

In [None]:
print(np.random.randn(3,3))

[[-0.14337247 -0.6191909  -0.76943347]
 [ 0.57674602  0.12652592 -1.30148897]
 [ 2.20742744  0.52274247  0.46564476]]


### generate an array of random integers in a given interval [low, high)

In [None]:
# np.ranodm.randint(low, high, size, dtype)
print(np.random.randint(1, 10, 3, 'i8'))

[1 1 5]


### generate an array of random floating-point numbers in the interval [0.0, 1.0)

In [None]:
# the following methods are the same as np.random.rand()
print(np.random.random_sample(10))
print(np.random.random(10))
print(np.random.ranf(10))
print(np.random.sample(10))

[0.63179202 0.44025718 0.08372648 0.71233018 0.42786349 0.2977805
 0.49208478 0.74029639 0.35772892 0.41720995]
[0.65472131 0.37380143 0.23451288 0.98799529 0.76599595 0.77700444
 0.02798196 0.17390652 0.15408224 0.07708648]
[0.8898657  0.7503787  0.69340324 0.51176338 0.46426806 0.56843069
 0.30254945 0.49730879 0.68326291 0.91669867]
[0.10892895 0.49549179 0.23283593 0.43686066 0.75154299 0.48089213
 0.79772841 0.28270293 0.43341824 0.00975735]


### generate a random sample from a given 1-D array

In [None]:
# np.random.choice(iterable_or_int, size, replace=True, p=weights)
print(np.random.choice(range(3), 10, replace=True, p=[0.1, 0.8, 0.1]))

[1 1 1 1 1 0 1 1 1 1]


In [None]:
print(np.random.choice(3, 10))

[2 1 1 0 1 0 0 2 2 2]


In [None]:
print(np.random.choice([1,2,3], 10))

[3 2 1 2 3 3 2 3 2 3]


### shuffle an array in place

In [None]:
arr = np.array(range(10))
print(arr)

[0 1 2 3 4 5 6 7 8 9]


In [None]:
np.random.shuffle(arr)
print(arr)

[8 4 6 3 9 5 1 7 2 0]


### generate a permutation of an array

In [None]:
# similar to np.random.shuffle(), but it returns a copy rather than making changes in place
arr = np.array(range(10))
print('The initial array: ', arr)
print('A permutation of the array: ', np.random.permutation(arr))

The initial array:  [0 1 2 3 4 5 6 7 8 9]
A permutation of the array:  [3 6 2 4 5 9 1 8 0 7]


# 4. Math Functions

In [None]:
arr = np.random.rand(5,5)

In [None]:
arr

array([[0.36700563, 0.49230332, 0.36054373, 0.06050417, 0.10752849],
       [0.3306844 , 0.19665339, 0.72721347, 0.01351523, 0.47836055],
       [0.8383947 , 0.72356806, 0.7417161 , 0.49723207, 0.02819886],
       [0.56758473, 0.90995592, 0.99614976, 0.44971738, 0.48079677],
       [0.86932533, 0.4389932 , 0.29913611, 0.25592063, 0.38810422]])

### element-wise addition, subtraction, multiplication and division

In [None]:
print(arr + 10)
print(arr - 10)
print(arr * 10)
print(arr / 10)

[[10.36700563 10.49230332 10.36054373 10.06050417 10.10752849]
 [10.3306844  10.19665339 10.72721347 10.01351523 10.47836055]
 [10.8383947  10.72356806 10.7417161  10.49723207 10.02819886]
 [10.56758473 10.90995592 10.99614976 10.44971738 10.48079677]
 [10.86932533 10.4389932  10.29913611 10.25592063 10.38810422]]
[[-9.63299437 -9.50769668 -9.63945627 -9.93949583 -9.89247151]
 [-9.6693156  -9.80334661 -9.27278653 -9.98648477 -9.52163945]
 [-9.1616053  -9.27643194 -9.2582839  -9.50276793 -9.97180114]
 [-9.43241527 -9.09004408 -9.00385024 -9.55028262 -9.51920323]
 [-9.13067467 -9.5610068  -9.70086389 -9.74407937 -9.61189578]]
[[3.67005632 4.92303322 3.60543734 0.60504172 1.07528487]
 [3.30684398 1.96653386 7.27213467 0.13515234 4.7836055 ]
 [8.38394703 7.23568056 7.41716102 4.97232065 0.28198863]
 [5.67584726 9.0995592  9.96149764 4.49717379 4.80796772]
 [8.69325331 4.38993204 2.9913611  2.55920626 3.88104221]]
[[0.03670056 0.04923033 0.03605437 0.00605042 0.01075285]
 [0.03306844 0.0196

In [None]:
arr1 = np.array([1,2,3])
# the above operations can be performed using numpy built-in functions
# which can save memory as the output can be stored in the original array rather than assigning new memoryarr = np.array([1,2,3])
np.add(arr1, np.arange(8,11), out=arr1)
print(arr1)

np.subtract(arr1, [8,9,10], out=arr1)
print(arr1)

np.multiply(arr1, [1,2,3], out=arr1)
print(arr1)

[ 9 11 13]
[1 2 3]
[1 4 9]


### element-wise exponentiation

In [None]:
print(np.exp(arr))

[[2.2459457  2.38350515 1.51890922 1.06070964 1.61358596]
 [1.68397897 1.7871676  1.36233345 2.50899588 1.92581163]
 [1.41799143 1.71788528 1.56682832 1.32620431 1.34430416]
 [1.75677718 2.04457773 1.67802696 1.421968   1.88283545]
 [2.3943673  1.18280466 2.6844566  1.41823754 2.28463238]]


In [None]:
np.exp([1,1])

array([2.71828183, 2.71828183])

In [None]:
np.log(np.exp(1))

1.0

### element-wise logorithm

In [None]:
# natural log
print(np.log(arr))      

[[-0.13524456 -3.52595367 -0.59412803 -1.42608562 -0.12238798]
 [-0.77601061 -1.64418294 -1.22522197 -0.20098157 -0.58073432]
 [-0.38871796 -0.21179979 -0.14090462 -0.87229189 -2.83126659]
 [-0.73718475 -0.65169928 -0.54363808 -1.1737702  -0.0835092 ]
 [-0.42258965 -1.05199195 -0.61416218 -0.80061348 -1.2647107 ]]


In [None]:
# base 2
print(np.log2(arr))     

[[-0.19511665 -5.08687587 -0.85714556 -2.05740665 -0.17656854]
 [-1.11954666 -2.37205457 -1.76762165 -0.28995511 -0.83782252]
 [-0.56080147 -0.3055625  -0.20328239 -1.25845119 -4.08465428]
 [-1.06353278 -0.94020332 -0.78430396 -1.69339245 -0.12047831]
 [-0.60966799 -1.51770357 -0.88604873 -1.15504109 -1.82459185]]


In [None]:
# base 10
print(np.log10(arr))    

[[-0.05873596 -1.53130222 -0.25802652 -0.61934112 -0.05315243]
 [-0.33701713 -0.71405958 -0.53210714 -0.08728519 -0.25220971]
 [-0.16881806 -0.09198348 -0.0611941  -0.37883156 -1.22960346]
 [-0.32015527 -0.2830294  -0.23609902 -0.50976192 -0.03626758]
 [-0.18352835 -0.4568743  -0.26672725 -0.34770202 -0.54925688]]


### element-wise square root

In [None]:
print(np.sqrt(arr))

[[0.89951469 0.93197219 0.64652336 0.24277182 0.69170731]
 [0.72191373 0.76199214 0.55605665 0.95910512 0.80953536]
 [0.59096648 0.73559095 0.67011446 0.53133884 0.54394533]
 [0.7506537  0.84568982 0.71945721 0.59333113 0.79547398]
 [0.93440838 0.40974192 0.99371944 0.5911133  0.90895827]]


### element-wise sine and cosine

In [None]:
np.sqrt(16)

4.0

In [None]:
arr1 = np.array([1,2,3])
np.sqrt(arr1)

array([1.        , 1.41421356, 1.73205081])

In [None]:
print(np.sin(arr))

[[0.7665826  0.02941949 0.52442846 0.23794303 0.77379149]
 [0.44416174 0.19197122 0.28948859 0.72973036 0.5307518 ]
 [0.62717856 0.72368474 0.76340745 0.40592657 0.05890404]
 [0.46041177 0.49788598 0.54855249 0.30429572 0.79553051]
 [0.60943479 0.34218509 0.51507406 0.43411297 0.27858547]]


In [None]:
print(np.cos(arr))

[[0.64214571 0.99956715 0.85145452 0.97127911 0.63344039]
 [0.89594662 0.98140056 0.95718147 0.68373503 0.8475273 ]
 [0.77887551 0.69013071 0.64591723 0.9139057  0.99826365]
 [0.88770547 0.8672425  0.83611612 0.95257762 0.60591354]
 [0.7928362  0.93963257 0.85714568 0.90085844 0.96041144]]


In [None]:
arr=np.array([[1, 2 , 3], [4, 5, 6], [7, 8, 9]])

In [None]:
arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### sum along a specified axis

In [None]:
# sum along the row
print(np.sum(arr, axis=0))    

[12 15 18]


In [None]:
# sum along the column
print(np.sum(arr, axis=1))    

[ 6 15 24]


### compute the min and max along a specified axis

In [None]:
# calculate min along the row
print(np.min(arr, axis=0))

[1 2 3]


In [None]:
# calculate max along the column
print(np.max(arr, axis=1))    

[3 6 9]


In [None]:
# if axis not specified, calculate the max/min value of all elements
print(np.max(arr))
print(np.min(arr))

9
1


### compute the indices of the min and max along a specified axis

In [None]:
arr=np.array([[10, 2 , 3], [4, 8, 6], [2, 7, 13]])

In [None]:
print(arr)

[[10  2  3]
 [ 4  8  6]
 [ 2  7 13]]


In [None]:
# along the row
print(np.argmin(arr, axis=0))
print(np.argmax(arr, axis=0))

[2 0 0]
[0 1 2]


In [None]:
# along the column
print(np.argmin(arr, axis=1))
print(np.argmax(arr, axis=1))

[1 0 0]
[0 1 2]


In [None]:
# if axis not specified, return the index of the flattened array
print(np.argmin(arr))
print(np.argmax(arr))

1
8


### compute element-wise min and max of two arrays

In [None]:
arr1 = np.array([[1, 3, 5],[7, 8, 9]])
arr2 = np.array([[0, 4, 3],[2, 4, 5]])
print(np.maximum(arr1, arr2))
print(np.minimum(arr1, arr2))

[[1 4 5]
 [7 8 9]]
[[0 3 3]
 [2 4 5]]


### split fractional and integral parts of a floating-point array

In [None]:
arr1 = np.random.rand(10) * 10
re, intg = np.modf(arr1)
print(arr1)
print('fractional: ', re)
print('integral: ', intg)

[9.7576773  3.06688571 4.17572052 3.41890941 9.1766928  6.62805766
 8.79455008 4.39289228 1.00360639 7.16105726]
fractional:  [0.7576773  0.06688571 0.17572052 0.41890941 0.1766928  0.62805766
 0.79455008 0.39289228 0.00360639 0.16105726]
integral:  [9. 3. 4. 3. 9. 6. 8. 4. 1. 7.]


### compute the mean

In [None]:
# compute the overall mean
arr1 = np.array([1,2,3])
print(np.mean(arr1))

2.0


In [None]:
arr=np.array([[10, 2 , 3], [4, 8, 6], [2, 7, 9]])
print(arr)
# compute the mean along the row
print(np.mean(arr, axis=0))   

[[10  2  3]
 [ 4  8  6]
 [ 2  7  9]]
[5.33333333 5.66666667 6.        ]


In [None]:
# compute the mean along the column
print(np.mean(arr, axis=1)) 

[5. 6. 6.]


### compute the median

In [None]:
arr1 = np.array([5,2,6,4])# compute the overall median
print(np.median(arr1))

4.5


In [None]:
# compute the median along the row
print(np.median(arr, axis=0)) 

[4. 7. 6.]


In [None]:
# compute the median along the column
print(np.median(arr, axis=1))

[0.55204372 0.46023842 0.67792545 0.52115943 0.4490534 ]


### compute the percentile

In [None]:
arr1 = np.random.rand(100)
# compute 5, 65, and 95 percentiles of the array
print(np.percentile(arr1, [5, 65, 95]))

[0.02936265 0.7055695  0.97147254]


### compute the standard deviation & variance

In [None]:
arr1.sort()
print(arr1)

[0.00610258 0.0105478  0.02012975 0.02391173 0.02509183 0.02958743
 0.0335144  0.04718055 0.05177277 0.05492812 0.08235109 0.09547905
 0.10885369 0.11203909 0.14148241 0.14917541 0.15582293 0.16348092
 0.17214768 0.18617954 0.19497803 0.20101979 0.20283845 0.20331571
 0.20420727 0.23026398 0.26950515 0.28600489 0.28861347 0.29012531
 0.31063051 0.33230915 0.37653713 0.38397357 0.40156415 0.42506248
 0.42556158 0.43438687 0.43770834 0.46459611 0.49777987 0.51294801
 0.51334862 0.51512875 0.51652552 0.52127297 0.53017389 0.54289635
 0.544506   0.56661742 0.5734977  0.57747717 0.58383648 0.60736609
 0.61309323 0.62641702 0.64369296 0.64473185 0.64941367 0.65704652
 0.66550973 0.67388227 0.697827   0.69986071 0.70258361 0.71111473
 0.73391764 0.7606978  0.76772176 0.78217034 0.79679244 0.80903788
 0.81064032 0.83590835 0.84386194 0.84531822 0.85232981 0.85629492
 0.86489693 0.87060547 0.87496425 0.87884004 0.88817533 0.89344464
 0.91165    0.91410589 0.91579237 0.92349983 0.92499751 0.9293

In [None]:
(0.02509183+0.02958743+0.0335144)/3

0.029397886666666668

In [None]:
arr = np.array([1,2,6,40])# compute the overall median
print(np.mean(arr))
# compute the overall standard deviation
print(np.std(arr))
print(np.var(arr))
print(np.sqrt(np.var(arr)))


12.25
16.13032857693854
260.1875
16.13032857693854


In [None]:
# compute the standard deviation along the row
print(np.std(arr, axis=0))

[3.39934634 2.62466929 2.44948974]


In [None]:
# compute the standard deviation along the column
print(np.std(arr, axis=1))

[0.33997649 0.21759869 0.29739013 0.20049061 0.13310804]


In [None]:
# compute the overall variance
print(np.var(arr))

260.1875


In [None]:
# compute the variance along the row
print(np.var(arr, axis=0))

[0.02280245 0.07256599 0.03334464 0.0400337  0.11227883]


In [None]:
# compute the variance along the column
print(np.var(arr, axis=1))

[0.11558401 0.04734919 0.08844089 0.04019649 0.01771775]


### compute the covariance & correlation

In [None]:
arr = np.random.rand(5,8)

In [None]:
print(np.cov(arr))

[[ 0.08348251 -0.02432034 -0.01656777  0.01869514 -0.01991955]
 [-0.02432034  0.10598514  0.04369059  0.01519759  0.03112859]
 [-0.01656777  0.04369059  0.08873143  0.03554201  0.03197368]
 [ 0.01869514  0.01519759  0.03554201  0.03526523 -0.00211817]
 [-0.01991955  0.03112859  0.03197368 -0.00211817  0.05248675]]


In [None]:
print(np.corrcoef(arr[:,0], arr[:,1]))

[[ 1.         -0.54127501]
 [-0.54127501  1.        ]]


### compute cumulative sum & product

In [None]:
# calculate the cumulative sums along the row
print(np.cumsum(arr, axis=0))    

[[0.40939041 0.37251939 0.33997017 0.81257008 0.52820553 0.55382711
  0.11720684 0.78460482]
 [0.8439766  0.44974212 0.5217817  1.33358096 1.0075397  0.8673696
  0.21687605 1.38115748]
 [1.09214649 1.05408228 0.82689535 2.2308859  1.58955502 1.57986202
  0.90049152 2.32777838]
 [2.00537774 1.10533719 1.20721083 2.79474244 1.72212877 2.15499731
  1.22305927 3.31170594]
 [2.58104356 1.73506483 1.60822311 3.5393725  2.12331469 2.92047243
  2.20549051 3.47982271]]


In [None]:
arr=[3,5,6,8,11]
print(np.cumsum(arr))
print(np.cumprod(arr))

[ 3  8 14 22 33]
[   3   15   90  720 7920]


In [None]:
# calculate the cumulative sums along the column
print(np.cumsum(arr, axis=1))    

[[0.40939041 0.7819098  1.12187997 1.93445006 2.46265559 3.01648269
  3.13368953 3.91829435]
 [0.43458619 0.51180891 0.69362044 1.21463132 1.69396549 2.00750798
  2.10717719 2.70372985]
 [0.24816989 0.85251005 1.1576237  2.05492864 2.63694396 3.34943639
  4.03305187 4.97967276]
 [0.91323126 0.96448617 1.34480165 1.90865819 2.04123193 2.61636722
  2.93893496 3.92286253]
 [0.57566582 1.20539345 1.60640573 2.35103579 2.75222172 3.51769684
  4.50012808 4.66824484]]


In [None]:
# calculate the cumulative product along the row
arr = np.random.rand(5,8)
print(np.cumprod(arr, axis=0))   


[[1.98152630e-01 8.53767710e-01 5.98215774e-01 6.75171120e-01
  7.07481839e-01 6.85780691e-01 3.96823316e-01 5.21589096e-01]
 [1.36181471e-01 4.39386738e-01 2.56910068e-01 6.65273950e-01
  5.22263770e-01 8.35815406e-02 1.64530976e-01 2.30921747e-01]
 [8.13368116e-02 2.70754931e-01 1.82276337e-01 4.17639922e-02
  2.02433003e-01 3.14443142e-02 9.38001605e-03 1.78827489e-01]
 [1.39224965e-02 2.28579787e-01 4.92769938e-02 4.27224599e-04
  1.11600956e-01 1.03809829e-02 1.55598356e-04 1.10035854e-01]
 [6.06962204e-03 1.76034732e-01 5.54570307e-03 3.95354320e-04
  9.62119628e-02 7.66564202e-03 1.31465591e-04 7.23454424e-02]]


In [None]:
# calculate the cumulative product along the column
print(np.cumprod(arr, axis=1))  

[[4.09390413e-01 1.52505867e-01 5.18474458e-02 4.21296834e-02
  2.22531316e-02 1.23243875e-02 1.44450249e-03 1.13336361e-03]
 [4.34586185e-01 3.35599307e-02 6.10158235e-03 3.17899076e-03
  1.52379890e-03 4.77775703e-04 4.76195265e-05 2.84075554e-05]
 [2.48169887e-01 1.49979030e-01 4.57606489e-02 4.10612562e-02
  2.38982805e-02 1.70273439e-02 1.16401558e-02 1.10188147e-02]
 [9.13231256e-01 4.68075854e-02 1.78016494e-02 1.00375765e-02
  1.33071906e-03 7.65343488e-04 2.46875125e-04 2.42907240e-04]
 [5.75665818e-01 3.62512674e-01 1.45372033e-01 1.08248386e-01
  4.34277288e-02 3.32428461e-02 3.26588104e-02 5.49049362e-03]]


### element-wise comparison

In [None]:
arr1 = np.array([1,2,3,4,5])
arr2 = np.array([5,4,3,2,1])

In [None]:
# return an array of bools
print(arr1 == arr2)    
print(arr1 <= 3)
print(np.sum(arr1 < 3))
arr3 = np.array([1,2,3,4,5,2,3,4])
np.unique(arr3,return_counts=True)

[False False  True False False]
[ True  True  True False False]
2


(array([1, 2, 3, 4, 5]), array([1, 2, 2, 2, 1]))

# 5. Slicing & Indexing

In [None]:
arr = np.array(range(100)).reshape((10,10))

### select an element by row and column indices

In [None]:
arr

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])

In [None]:
arr[4,4]
arr[4][4]

44

In [None]:
arr.reshape(2,50)

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
        32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
        48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
        66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
        82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
        98, 99]])

In [None]:
arr[4][4]

44

In [None]:
print(arr[5][5])
# or more concisely
print(arr[5,5])

55
55


### indexing with slicing

In [None]:
print(arr[1:3, 4:6])

arr[:,1]

[[14 15]
 [24 25]]


array([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])

In [None]:
# ellipsis slicing: auto-complete the dimensions
arr = np.array(range(16)).reshape(2,2,2,2)
# equivalent to arr[0,:,:,:]
print(arr[0, ...])    

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


### assign a scalar to a slice by broadcasting

In [None]:
arr[1:3,:] = 100    # or simply arr[1:3]
arr[:,8:] = 100
print(arr)

[[  0   1   2   3   4   5   6   7 100 100]
 [100 100 100 100 100 100 100 100 100 100]
 [100 100 100 100 100 100 100 100 100 100]
 [ 30  31  32  33  34  35  36  37 100 100]
 [ 40  41  42  43  44  45  46  47 100 100]
 [ 50  51  52  53  54  55  56  57 100 100]
 [ 60  61  62  63  64  65  66  67 100 100]
 [ 70  71  72  73  74  75  76  77 100 100]
 [ 80  81  82  83  84  85  86  87 100 100]
 [ 90  91  92  93  94  95  96  97 100 100]]


In [None]:
arr

array([[  0,   1,   2,   3,   4,   5,   6,   7, 100, 100],
       [100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
       [100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
       [ 30,  31,  32,  33,  34,  35,  36,  37, 100, 100],
       [ 40,  41,  42,  43,  44,  45,  46,  47, 100, 100],
       [ 50,  51,  52,  53,  54,  55,  56,  57, 100, 100],
       [ 60,  61,  62,  63,  64,  65,  66,  67, 100, 100],
       [ 70,  71,  72,  73,  74,  75,  76,  77, 100, 100],
       [ 80,  81,  82,  83,  84,  85,  86,  87, 100, 100],
       [ 90,  91,  92,  93,  94,  95,  96,  97, 100, 100]])

### boolean indexing

In [None]:
arr1 = np.arange(25).reshape((5,5))
#print (arr1)
bools = np.array([True, True, False, True, False])
print(arr1[bools])


[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [15 16 17 18 19]]


In [None]:
a=True

In [None]:
bools = np.array([True, True, False, True, False])


In [None]:
~bools

array([False, False,  True, False,  True])

In [None]:
# negate the condition
print(arr1[~bools])    

[[10 11 12 13 14]
 [20 21 22 23 24]]


In [None]:
arr2 = np.array([1,2,3,4,5])

print((arr2<2) | (arr2>4))

# multiple conditions
print(arr2[(arr2<2) | (arr2>4)])    

[ True False False False  True]
[1 5]


In [None]:
arr2 = np.array([1,2,3,4,5])
(arr2<2) | (arr2>4)

array([ True, False, False, False,  True])

### fancy indexing

In [None]:
arr = np.random.rand(10,10)

In [None]:
arr

array([[0.5637054 , 0.3083644 , 0.15187102, 0.06771095, 0.97786093,
        0.95038905, 0.67685425, 0.0220101 , 0.95961258, 0.71380332],
       [0.90623775, 0.77766251, 0.01912229, 0.56738601, 0.02251918,
        0.24924472, 0.51357153, 0.47035461, 0.1114155 , 0.38803632],
       [0.22199568, 0.3281278 , 0.77353404, 0.72503331, 0.17702052,
        0.18514878, 0.66594899, 0.70723822, 0.46958011, 0.67155141],
       [0.4980589 , 0.74146201, 0.62258671, 0.68071902, 0.28168989,
        0.55785455, 0.32781594, 0.44562136, 0.66010232, 0.38910065],
       [0.97431845, 0.59887487, 0.79312456, 0.30063805, 0.92798178,
        0.10596439, 0.98949381, 0.61830996, 0.00998426, 0.67628518],
       [0.02495265, 0.80204422, 0.94706802, 0.6038373 , 0.76781628,
        0.31115582, 0.99982123, 0.18994421, 0.94752158, 0.13430099],
       [0.8089414 , 0.11235574, 0.81707074, 0.91803384, 0.66724337,
        0.2109814 , 0.52949602, 0.0860856 , 0.2464657 , 0.51168812],
       [0.44836684, 0.25109927, 0.7532032

In [None]:
# select arr[3,3], arr[1,2], arr[2,1]
print(arr[[3,1,2], [3,2,1]])       

[0.68071902 0.01912229 0.3281278 ]


In [None]:
# select rows 3,1,2 and columns 6,4,8 
print(arr[[3,1,2]][:, [6,4,8]])    

[[0.4358603  0.65206127 0.24468691]
 [0.29366174 0.10862771 0.32711534]
 [0.18313084 0.23759754 0.16236423]]


### dimension inference

In [None]:
# dimension inference using any negative number (usually -1)
arr = np.array(range(16)).reshape((-1,4))
print(arr.shape)

(4, 4)


### find elements/indices by conditions

In [None]:
arr = np.arange(16).reshape(4,4)
print(arr)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [None]:
# find the elements greater than 5 and return a flattened array
print(arr[arr>5])    # or arr[np.where(arr>5)]

[ 6  7  8  9 10 11 12 13 14 15]


In [None]:
arr>5

array([[False, False, False, False],
       [False, False,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [None]:
# return values based on conditions 
# np.where(condition, true_return, false_return)
print(np.where(arr>5, -1, 10))

[[10 10 10 10]
 [10 10 -1 -1]
 [-1 -1 -1 -1]
 [-1 -1 -1 -1]]


In [None]:
# find the indices of the elements on conditions
print(np.argwhere(arr>5))

[[1 2]
 [1 3]
 [2 0]
 [2 1]
 [2 2]
 [2 3]
 [3 0]
 [3 1]
 [3 2]
 [3 3]]


# 6. Sort an Array

In [None]:
arr = np.random.rand(5,5)

In [None]:
arr

array([[0.91169557, 0.29277717, 0.85692548, 0.78851451, 0.75056571],
       [0.37734504, 0.25099337, 0.46330528, 0.89682457, 0.09558084],
       [0.91897495, 0.23727616, 0.50983315, 0.61333997, 0.60616218],
       [0.54290531, 0.66624501, 0.14897225, 0.06548913, 0.69762239],
       [0.60755347, 0.71118497, 0.6856014 , 0.26239314, 0.06973902]])

### sort an array along a specified axis

In [None]:
# sort along the row and return a copy
print(np.sort(arr, axis=0))   

[[0.49949874 0.10114412 0.03730588 0.19473393 0.54028614]
 [0.75465269 0.16251518 0.06401398 0.38301862 0.54257234]
 [0.76638132 0.37762709 0.08495801 0.4226051  0.6638128 ]
 [0.78288355 0.42644389 0.28094985 0.93550925 0.85587381]
 [0.95245677 0.64140882 0.34649155 0.98240687 0.97858938]]


In [None]:
np.sort(arr, axis=0)

array([[0.37734504, 0.23727616, 0.14897225, 0.06548913, 0.06973902],
       [0.54290531, 0.25099337, 0.46330528, 0.26239314, 0.09558084],
       [0.60755347, 0.29277717, 0.50983315, 0.61333997, 0.60616218],
       [0.91169557, 0.66624501, 0.6856014 , 0.78851451, 0.69762239],
       [0.91897495, 0.71118497, 0.85692548, 0.89682457, 0.75056571]])

In [None]:
# sort along the row in place
arr.sort(axis=0)
print(arr)

[[0.03200145 0.21824392 0.43363479 0.01299626 0.07382587]
 [0.03482672 0.36572993 0.48291756 0.21141667 0.4388661 ]
 [0.49834197 0.89345833 0.83614832 0.26832068 0.44781943]
 [0.84923019 0.97518668 0.89785999 0.86917503 0.56144938]
 [0.85243615 0.99676933 0.95927991 0.8885109  0.91266099]]


In [None]:
# sort along the column and return a copy
print(np.sort(arr, axis=1))    

[[0.02945699 0.05396756 0.06753657 0.11702945 0.17913374]
 [0.12910116 0.13630925 0.18249983 0.31981183 0.41165849]
 [0.36056504 0.37163583 0.42053876 0.51866089 0.56232065]
 [0.42945962 0.43148605 0.44498496 0.62847058 0.70682921]
 [0.55147061 0.74238635 0.75572196 0.7740198  0.86890785]]


In [None]:
# sort along the column in place
arr.sort(axis=1)    
print(arr)

[[0.02945699 0.05396756 0.06753657 0.11702945 0.17913374]
 [0.12910116 0.13630925 0.18249983 0.31981183 0.41165849]
 [0.36056504 0.37163583 0.42053876 0.51866089 0.56232065]
 [0.42945962 0.43148605 0.44498496 0.62847058 0.70682921]
 [0.55147061 0.74238635 0.75572196 0.7740198  0.86890785]]


### compute the indices that would sort an array along a specified axis

In [None]:
arr = np.random.rand(5,5)

In [None]:
arr

array([[0.35767739, 0.10637874, 0.76643072, 0.16535391, 0.48921577],
       [0.12843449, 0.97189223, 0.41895177, 0.05428855, 0.74241403],
       [0.30840827, 0.84660395, 0.5274792 , 0.53478826, 0.25774221],
       [0.73053262, 0.90301376, 0.40051013, 0.37936952, 0.81514234],
       [0.57069183, 0.11940937, 0.42919012, 0.04422195, 0.20799519]])

In [None]:
# along the row
print(np.argsort(arr, axis=1))

[[1 3 0 4 2]
 [3 0 2 4 1]
 [4 0 2 3 1]
 [3 2 0 4 1]
 [3 1 4 2 0]]


In [None]:
# along the column
print(np.argsort(arr, axis=1))

[[1 2 4 3 0]
 [0 3 1 4 2]
 [1 2 0 3 4]
 [0 4 2 1 3]
 [3 0 1 4 2]]


In [None]:
# if axis=None, return the indices of a flattened array
print(np.argsort(arr, axis=None))

[23  8  1 21  5  3 24 14 10  0 18 17  7 22  4 12 13 20 15  9  2 19 11 16
  6]


In [None]:
arr = np.random.rand(3,4)

In [None]:
arr

array([[0.89907216, 0.93781722, 0.05547097, 0.55021473],
       [0.02591797, 0.27955247, 0.38201433, 0.92903076],
       [0.9841483 , 0.31755225, 0.54264515, 0.23812465]])

# 7. Manipulate an Array

### transpose an array

In [None]:
# the following methods return a copy
print(arr.T)
# or 
print(np.transpose(arr))
# or
print(arr.transpose())

[[0.35767739 0.12843449 0.30840827 0.73053262 0.57069183]
 [0.10637874 0.97189223 0.84660395 0.90301376 0.11940937]
 [0.76643072 0.41895177 0.5274792  0.40051013 0.42919012]
 [0.16535391 0.05428855 0.53478826 0.37936952 0.04422195]
 [0.48921577 0.74241403 0.25774221 0.81514234 0.20799519]]
[[0.35767739 0.12843449 0.30840827 0.73053262 0.57069183]
 [0.10637874 0.97189223 0.84660395 0.90301376 0.11940937]
 [0.76643072 0.41895177 0.5274792  0.40051013 0.42919012]
 [0.16535391 0.05428855 0.53478826 0.37936952 0.04422195]
 [0.48921577 0.74241403 0.25774221 0.81514234 0.20799519]]
[[0.35767739 0.12843449 0.30840827 0.73053262 0.57069183]
 [0.10637874 0.97189223 0.84660395 0.90301376 0.11940937]
 [0.76643072 0.41895177 0.5274792  0.40051013 0.42919012]
 [0.16535391 0.05428855 0.53478826 0.37936952 0.04422195]
 [0.48921577 0.74241403 0.25774221 0.81514234 0.20799519]]


### transpose of a high dimensional array with specified order of axes

In [None]:
arr1 = np.arange(16).reshape((2,2,4))
print(arr1)

arr1.transpose((1,0,2))
print(arr1)

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]
[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]


### swap axes

In [None]:
arr1 = np.arange(16).reshape((2,8))


In [None]:
arr1

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15]])

In [None]:
print(arr1.swapaxes(1,0))

[[ 0  8]
 [ 1  9]
 [ 2 10]
 [ 3 11]
 [ 4 12]
 [ 5 13]
 [ 6 14]
 [ 7 15]]


### change the shape of an array

In [None]:
arr1 = np.arange(12).reshape((3,4))


In [None]:
arr1

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [None]:
# change the shape of an array and return a copy
arr1.reshape((2,6))

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

In [None]:
arr1

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [None]:
# change the shape of an array in place
arr1.resize((2,6))

In [None]:
arr1

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

### flatten an array

In [None]:
print(arr)
print(arr.flatten())    

[[0.35767739 0.10637874 0.76643072 0.16535391 0.48921577]
 [0.12843449 0.97189223 0.41895177 0.05428855 0.74241403]
 [0.30840827 0.84660395 0.5274792  0.53478826 0.25774221]
 [0.73053262 0.90301376 0.40051013 0.37936952 0.81514234]
 [0.57069183 0.11940937 0.42919012 0.04422195 0.20799519]]
[0.35767739 0.10637874 0.76643072 0.16535391 0.48921577 0.12843449
 0.97189223 0.41895177 0.05428855 0.74241403 0.30840827 0.84660395
 0.5274792  0.53478826 0.25774221 0.73053262 0.90301376 0.40051013
 0.37936952 0.81514234 0.57069183 0.11940937 0.42919012 0.04422195
 0.20799519]


In [None]:
# return a view
# change any element in the view will change the initial array
arr.ravel()      

array([0.35767739, 0.10637874, 0.76643072, 0.16535391, 0.48921577,
       0.12843449, 0.97189223, 0.41895177, 0.05428855, 0.74241403,
       0.30840827, 0.84660395, 0.5274792 , 0.53478826, 0.25774221,
       0.73053262, 0.90301376, 0.40051013, 0.37936952, 0.81514234,
       0.57069183, 0.11940937, 0.42919012, 0.04422195, 0.20799519])

In [None]:
arr

array([[0.89907216, 0.93781722, 0.05547097, 0.55021473],
       [0.02591797, 0.27955247, 0.38201433, 0.92903076],
       [0.9841483 , 0.31755225, 0.54264515, 0.23812465]])

### append elements to an array

In [None]:
arr = np.array([1,2,3])

In [None]:
# append a scalar and return a copy
arr1 = np.append(arr, 4)    
print(arr1)

[1 2 3 4]


In [None]:
# append an array and return a copy
arr2 = np.append(arr, [4,5,6])    
print(arr2)

[1 2 3 4 5 6]


### insert elements into an array

In [None]:
# np.insert(array, position, element)

# insert a scalar at a certain position
arr3 = np.insert(arr, 0, 100)    
print(arr3)

[100   1   2   3]


In [None]:
# insert multiple values at a certain position
arr3 = np.insert(arr, 0, [1,2,3])    
print(arr3)

[1 2 3 1 2 3]


### delete elements from an array

In [None]:
# remove the element at position 0
arr4 = np.delete(arr, 0)    
print(arr4)

[2 3]


In [None]:
# remove the element at multiple positions
arr4 = np.delete(arr, [0,2])    
print(arr4)

[2]


### copy an array

In [None]:
arr = np.array([1,2,3])

In [None]:
# the following methods are all deep copy
arr1 = np.copy(arr)
# or 
arr1 = arr.copy()
# or 
arr1 = np.array(arr, copy=True)

# 8. Combine & Split an Array

In [None]:
arr1 = np.array([[1,2,3,4], [1,2,3,4]])
arr2 = np.array([[5,6,7,8], [5,6,7,8]])

### ```np.concatenate((a, b), axis=0)```

In [None]:
# concat along the row
cat = np.concatenate((arr1, arr2), axis=0)        
print(cat)

[[1 2 3 4]
 [1 2 3 4]
 [5 6 7 8]
 [5 6 7 8]]


In [None]:
# concat along the column
cat = np.concatenate((arr1, arr2), axis=1)    
print(cat)

[[1 2 3 4 5 6 7 8]
 [1 2 3 4 5 6 7 8]]


### ```np.vstack((a, b))``` 
### ```np.r_[a, b]```

In [None]:
# stack arrays vertically
cat = np.vstack((arr1, arr2))
print(cat)

[[1 2 3 4]
 [1 2 3 4]
 [5 6 7 8]
 [5 6 7 8]]


In [None]:
# stack arrays vertically
cat = np.r_[arr1, arr2]
print(cat)

[[1 2 3 4]
 [1 2 3 4]
 [5 6 7 8]
 [5 6 7 8]]


### ```np.hstack((a, b))```
### ```np.c_[a, b]```

In [None]:
# stack arrays horizontally
cat = np.hstack((arr1, arr2, arr1))
print(cat)

[[1 2 3 4 5 6 7 8 1 2 3 4]
 [1 2 3 4 5 6 7 8 1 2 3 4]]


In [None]:
# stack arrays horizontally
cat = np.c_[arr1, arr2]
print(cat)

[[1 2 3 4 5 6 7 8]
 [1 2 3 4 5 6 7 8]]


### split an array 

In [None]:
arr = np.random.rand(6,6)

In [None]:
arr

array([[0.6537239 , 0.47938311, 0.95319082, 0.80541801, 0.74688231,
        0.46837922],
       [0.04136431, 0.84251041, 0.72456949, 0.03665223, 0.5392612 ,
        0.76615779],
       [0.60377753, 0.66001107, 0.46490851, 0.36188459, 0.00268207,
        0.42850551],
       [0.49694336, 0.36718673, 0.25000665, 0.83955185, 0.85294555,
        0.23640027],
       [0.17911718, 0.05019692, 0.2250341 , 0.34613613, 0.24691299,
        0.0834099 ],
       [0.37781053, 0.85144396, 0.34361003, 0.99759618, 0.21153787,
        0.23472937]])

In [None]:
# split the array vertically into n evenly spaced chunks
arr1 = np.vsplit(arr, 2)
print(arr1[0])
print(arr1[1])

[[0.6537239  0.47938311 0.95319082 0.80541801 0.74688231 0.46837922]
 [0.04136431 0.84251041 0.72456949 0.03665223 0.5392612  0.76615779]
 [0.60377753 0.66001107 0.46490851 0.36188459 0.00268207 0.42850551]]
[[0.49694336 0.36718673 0.25000665 0.83955185 0.85294555 0.23640027]
 [0.17911718 0.05019692 0.2250341  0.34613613 0.24691299 0.0834099 ]
 [0.37781053 0.85144396 0.34361003 0.99759618 0.21153787 0.23472937]]


In [None]:
# split the array horizontally into n evenly spaced chunks
arr2 = np.hsplit(arr, 2)
print(arr2)

[array([[9.35431074e-01, 7.03568004e-01, 1.05792715e-01],
       [2.38611695e-01, 3.45434817e-01, 9.65192673e-01],
       [8.27724063e-01, 7.56202899e-01, 2.11173652e-04],
       [9.35942805e-02, 6.52133176e-01, 7.77493195e-01],
       [1.09480977e-01, 7.36418157e-01, 8.11244180e-01],
       [3.80482062e-01, 6.72549071e-01, 8.23691442e-01]]), array([[0.75634414, 0.22580275, 0.95021515],
       [0.50898923, 0.46511813, 0.83173463],
       [0.78650936, 0.49745417, 0.1353171 ],
       [0.95445125, 0.50717022, 0.68843276],
       [0.31637543, 0.96422325, 0.72422793],
       [0.40686265, 0.88798957, 0.14567514]])]


# 9. Set Operations

### select the unique elements from an array

In [None]:
arr = np.array([1,1,2,2,3,3,4,5,6])
print(np.unique(arr))

[1 2 3 4 5 6]


In [None]:
# return the number of times each unique item appears
arr = np.array([1,1,2,2,3,3,4,5,6])
uniques, counts = np.unique(arr, return_counts=True)
print(uniques)
print(counts)

[1 2 3 4 5 6]
[2 2 2 1 1 1]


### compute the intersection & union of two arrays

In [None]:
arr1 = np.array([1,2,3,4,5])
arr2 = np.array([3,4,5,6,7])

In [None]:
# intersection
print(np.intersect1d(arr1, arr2))

[3 4 5]


In [None]:
# union
print(np.union1d(arr1, arr2))

[1 2 3 4 5 6 7]


### compute whether each element of an array is contained in another

In [None]:
print(np.in1d(arr1, arr2))

[False False  True  True  True]


In [None]:
# preserve the shape of the array in the output, if the array is of higher dimensions
print(np.isin(arr1, arr2))

[False False  True  True  True]


### compute the elements in an array that are not in another

In [None]:
print(np.setdiff1d(arr1, arr2))

[1 2]


### compute the elements in either of two arrays, but not both

In [None]:
print(np.setxor1d(arr1, arr2))

[1 2 6 7]


# 10. Linear Algebra

In [None]:
arr1 = np.random.rand(5,5)
arr2 = np.random.rand(5,5)

### matrix multiplication

In [None]:
print(arr1.dot(arr2))
# or
print(np.dot(arr1, arr2))
# or
print(arr1 @ arr2)

[[2.51623511 2.86218675 2.46149074 2.04223873 1.68511463]
 [0.97887139 1.12545821 1.08799709 0.91490076 0.71161775]
 [1.76553731 2.31931894 1.70683443 1.69936475 1.24178866]
 [1.16873119 1.51462574 1.20311322 1.18600669 0.82424827]
 [1.15498347 1.31753805 1.11707168 0.8826558  0.70418125]]
[[2.51623511 2.86218675 2.46149074 2.04223873 1.68511463]
 [0.97887139 1.12545821 1.08799709 0.91490076 0.71161775]
 [1.76553731 2.31931894 1.70683443 1.69936475 1.24178866]
 [1.16873119 1.51462574 1.20311322 1.18600669 0.82424827]
 [1.15498347 1.31753805 1.11707168 0.8826558  0.70418125]]
[[2.51623511 2.86218675 2.46149074 2.04223873 1.68511463]
 [0.97887139 1.12545821 1.08799709 0.91490076 0.71161775]
 [1.76553731 2.31931894 1.70683443 1.69936475 1.24178866]
 [1.16873119 1.51462574 1.20311322 1.18600669 0.82424827]
 [1.15498347 1.31753805 1.11707168 0.8826558  0.70418125]]


### QR factorization 

In [None]:
arr = np.random.rand(5,5)

q, r = np.linalg.qr(arr)
print(q)
print(r)

[[-0.4432654   0.32513554  0.61941338 -0.50166373  0.24992646]
 [-0.53313979 -0.84083639  0.01631061 -0.07809998 -0.04889265]
 [-0.497832    0.2725315   0.2186724   0.71963447 -0.33496042]
 [-0.00757871  0.09648464 -0.05319421 -0.440347   -0.89100964]
 [-0.52094492  0.32202228 -0.75194002 -0.17451256  0.17043949]]
[[-1.71197975 -1.721771   -0.91763767 -1.14596917 -1.48828016]
 [ 0.          0.18511396  0.15670431 -0.3105988  -0.02561779]
 [ 0.          0.         -0.56886488  0.09863229 -0.28981768]
 [ 0.          0.          0.         -0.31671114 -0.28469988]
 [ 0.          0.          0.          0.         -0.67088305]]


### singular value decomposition (SVD)

In [None]:
arr = np.random.rand(5,5)

u, s, v = np.linalg.svd(arr)
print(u)
print(s)
print(v)

[[-0.31707494  0.50745837  0.27489595 -0.52708908 -0.53717669]
 [-0.467027   -0.43107604  0.5391023  -0.31541122  0.4538102 ]
 [-0.53255432  0.56212351 -0.33998784  0.16799359  0.50654662]
 [-0.34108705  0.00379167  0.4323183   0.76786776 -0.32729992]
 [-0.53046657 -0.49057337 -0.57559542 -0.06964278 -0.37654055]]
[2.83737775 0.69055824 0.47487856 0.23943839 0.10871122]
[[-0.42059018 -0.38307359 -0.21951733 -0.6476369  -0.45687757]
 [-0.63976954  0.71850762 -0.25227551  0.00271298  0.10388191]
 [ 0.20378291  0.23608185 -0.1886435   0.38886392 -0.84613023]
 [-0.40137463 -0.52295215 -0.41054126  0.61598638  0.13204589]
 [ 0.45953212  0.08824872 -0.82706633 -0.22339365  0.21702294]]


### compute eigen values

In [None]:
arr = np.random.rand(5,5)
print(np.linalg.eigvals(arr))

[ 2.85678022 -0.67311818  0.37430308  0.1524331  -0.07643027]


### eigen value decomposition

In [None]:
arr = np.random.rand(5,5)

w, v = np.linalg.eig(arr)
print(w)    # eigen values
print(v)    # eigen vectors

[ 2.76395772+0.j          0.10174612+0.23654886j  0.10174612-0.23654886j
 -0.52935513+0.05320233j -0.52935513-0.05320233j]
[[ 0.27758037+0.j         -0.55997738+0.j         -0.55997738-0.j
   0.22379469-0.00692696j  0.22379469+0.00692696j]
 [ 0.45939229+0.j          0.17076292+0.34885444j  0.17076292-0.34885444j
  -0.70496349+0.j         -0.70496349-0.j        ]
 [ 0.48172485+0.j          0.21554976-0.3991528j   0.21554976+0.3991528j
   0.0228234 -0.0005529j   0.0228234 +0.0005529j ]
 [ 0.48237473+0.j          0.37310246-0.2373618j   0.37310246+0.2373618j
   0.12243854-0.18721414j  0.12243854+0.18721414j]
 [ 0.49715556+0.j         -0.28133211+0.23470715j -0.28133211-0.23470715j
   0.60718075+0.18347867j  0.60718075-0.18347867j]]


### compute the trace & determinant

In [None]:
# notice this is not a function in linalg!!!
print(np.trace(arr))    

1.9087397083691204


In [None]:
print(np.linalg.det(arr))

0.05187451362412388


### calculate the inverse/psedo-inverse of a matrix

In [None]:
arr = np.random.rand(5,5)

In [None]:
# compute the inverse of a matrix
print(np.linalg.inv(arr))

[[  2.37704915  -2.18278595  14.14976233 -17.3879446   -4.13238747]
 [ -2.65572368   2.85561316   6.39432678 -11.58940717   1.15627517]
 [ -0.64187733   2.8106897   11.3550277  -20.17896096  -0.58449384]
 [  4.52924162  -5.63978854  -0.64934041   4.47682269  -1.75308404]
 [ -2.16914098   1.49607223 -25.1450202   37.98516176   4.2574866 ]]


In [None]:
# compute the psudo-inverse of a matrix
print(np.linalg.pinv(arr))

[[  2.37704915  -2.18278595  14.14976233 -17.3879446   -4.13238747]
 [ -2.65572368   2.85561316   6.39432678 -11.58940717   1.15627517]
 [ -0.64187733   2.8106897   11.3550277  -20.17896096  -0.58449384]
 [  4.52924162  -5.63978854  -0.64934041   4.47682269  -1.75308404]
 [ -2.16914098   1.49607223 -25.1450202   37.98516176   4.2574866 ]]


### solve a linear system

In [None]:
# solve a linear system in closed form
arr = np.random.rand(5,5)

y = [1,2,3,4,5]
print(np.linalg.solve(arr, y))

[-49.75295153 -18.33776982 -44.59372785   0.44351389  98.61602289]


In [None]:
# calculate the least-squares solution of a linear system
y = [1,2,3,4,5]
solution, residuals, rank, singular = np.linalg.lstsq(arr, y)
print(solution)
print(residuals)
print(rank)
print(singular)

[-49.75295153 -18.33776982 -44.59372785   0.44351389  98.61602289]
[]
5
[2.21809418 0.85765544 0.52823685 0.09333081 0.0173482 ]


  This is separate from the ipykernel package so we can avoid doing imports until
