<a href="https://colab.research.google.com/github/sakunisgithub/machine_learning/blob/master/Numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

$$\textbf{NumPy - Numerical Python}$$

### Creating NumPy arrays

In [12]:
import numpy as np

In [13]:
# 1D (a vector)
a = np.array([1, 2, 3, 4])
print(a)

[1 2 3 4]


In [14]:
print(type(a))

<class 'numpy.ndarray'>


In [15]:
# 2D (a matrix)
b = np.array([[1, 2, 3], [4, 5, 6]])
print(b)

[[1 2 3]
 [4 5 6]]


In [16]:
# 3D (a tensor)
c = np.array([[[5, 6], [15, 16]], [[1, 2], [11, 12]]])
print(c)

[[[ 5  6]
  [15 16]]

 [[ 1  2]
  [11 12]]]


In [17]:
## creating a numpy array of a particular data type

# float
a1 = np.array([1, 2, 3], dtype = float)
print(a1)

# boolean
a2 = np.array([0, 1, 2, 3], dtype = bool)
print(a2)

# complex
a3 = np.array([5, 7, 10], dtype = complex)
print(a3)

[1. 2. 3.]
[False  True  True  True]
[ 5.+0.j  7.+0.j 10.+0.j]


In [18]:
# by using arange() function - arange() is similar to range()
a4 = np.arange(1, 11)
print(a4) # np.arange(a, b) yields integers from a to b-1

a5 = np.arange(1, 11, 3)
print(a5)

arr = np.arange(5)
print(arr)

[ 1  2  3  4  5  6  7  8  9 10]
[ 1  4  7 10]
[0 1 2 3 4]


In [19]:
# reshape() function
a6 = np.arange(1, 51).reshape(10, 5)
print(a6)

a7 = np.arange(1, 28).reshape(3, 3, 3)
print(a7)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]
 [21 22 23 24 25]
 [26 27 28 29 30]
 [31 32 33 34 35]
 [36 37 38 39 40]
 [41 42 43 44 45]
 [46 47 48 49 50]]
[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]]

 [[10 11 12]
  [13 14 15]
  [16 17 18]]

 [[19 20 21]
  [22 23 24]
  [25 26 27]]]


In [20]:
# ones()
a8 = np.ones((5, 2)) # creates an array of shape (5, 2) with all the elements being 1
print(a8)

[[1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]]


In [21]:
a9 = np.ones((3, 4), dtype = int)
print(a9)

[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


In [22]:
# zeros()
a10 = np.zeros((2, 3)) # creates an array of shape (2, 3) with all the elements being 0
print(a10)

[[0. 0. 0.]
 [0. 0. 0.]]


In [23]:
# random()
a11 = np.random.random((2, 3)) # creates an array of shape(2, 3) with all the elements being random numbers in between 0 and 1
print(a11)

[[0.89630535 0.71846678 0.96850137]
 [0.91929458 0.97222693 0.05019539]]


np.ones(), np.zeros(), np.random.random() are mainly useful in array initialization.

In [24]:
# linspace() linearly spaced
a12 = np.linspace(-20, 20, 15) # generates 15 equispaced numbers from -20 to 20
print(a12)

[-20.         -17.14285714 -14.28571429 -11.42857143  -8.57142857
  -5.71428571  -2.85714286   0.           2.85714286   5.71428571
   8.57142857  11.42857143  14.28571429  17.14285714  20.        ]


In [25]:
# identity()
a13 = np.identity(4) # creates an identity matrix of order 4
print(a13)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


### Array Attributes

In [26]:
arr1 = np.arange(0, 11)
print(arr1)

[ 0  1  2  3  4  5  6  7  8  9 10]


In [27]:
arr2 = np.arange(1, 13, dtype = float).reshape(3, 4)
print(arr2)

[[ 1.  2.  3.  4.]
 [ 5.  6.  7.  8.]
 [ 9. 10. 11. 12.]]


In [28]:
arr3 = np.arange(8, dtype = np.int32).reshape(2, 2, 2)
print(arr3)

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


In [29]:
# ndim - gives number of dimension(s) of an array
print(arr1.ndim)
print(arr2.ndim)
print(arr3.ndim)

1
2
3


In [30]:
# shape - gives dimension of an array
print(arr1.shape)
print(arr2.shape)
print(arr3.shape)

(11,)
(3, 4)
(2, 2, 2)


In [31]:
# size - gives number of elements in an object
print(arr2.size)
print(arr3.size)

12
8


In [32]:
# itemsize - refers to the number of bytes used to store a single element of an array
print(arr1.itemsize)
print(arr2.itemsize)
print(arr3.itemsize)
# recall that 8 bits = 1 byte
# by default colab uses 64-bit integers and they take 8 bytes of space
# also floates in colab take 8 bytes of space
# 32-bit integers take 4 bytes of space

8
8
4


In [33]:
print(arr1.dtype)
print(arr2.dtype)
print(arr3.dtype)

int64
float64
int32


### Changing datatype

In [34]:
print(arr1.dtype)

int64


In [35]:
arr1 = arr1.astype(np.int32)

In [36]:
print(arr1)

[ 0  1  2  3  4  5  6  7  8  9 10]


In [37]:
print(arr1.dtype)

int32


### Array Operations

In [38]:
b1 = np.arange(1, 13).reshape(3, 4)
print(b1)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [39]:
b2 = np.arange(13, 25).reshape(3, 4)
print(b2)

[[13 14 15 16]
 [17 18 19 20]
 [21 22 23 24]]


In [40]:
# scalar operation - operating in an array with a scalar
print(b1 + 2)

[[ 3  4  5  6]
 [ 7  8  9 10]
 [11 12 13 14]]


In [41]:
print(b1 * 2)

[[ 2  4  6  8]
 [10 12 14 16]
 [18 20 22 24]]


In [42]:
print(b1 / 2) # in division output datatype is always float

[[0.5 1.  1.5 2. ]
 [2.5 3.  3.5 4. ]
 [4.5 5.  5.5 6. ]]


In [43]:
print(b1 ** 2)

[[  1   4   9  16]
 [ 25  36  49  64]
 [ 81 100 121 144]]


In [44]:
# relational operation
print(b1 > 5)

[[False False False False]
 [False  True  True  True]
 [ True  True  True  True]]


In [45]:
print(b1 == 7)

[[False False False False]
 [False False  True False]
 [False False False False]]


In [46]:
print(b1 != 3)

[[ True  True False  True]
 [ True  True  True  True]
 [ True  True  True  True]]


In [47]:
# vector operations
print(b1 + b2) # itemwise addition

[[14 16 18 20]
 [22 24 26 28]
 [30 32 34 36]]


In [48]:
print(b1 * b2) # itemwise multiplication

[[ 13  28  45  64]
 [ 85 108 133 160]
 [189 220 253 288]]


### Common Array Functions

In [49]:
c1 = np.random.random((3, 3))
c1 = c1 * 100
print(c1)

[[33.32601397 60.49139693 61.37360049]
 [53.59212965 58.47741608 33.09910732]
 [ 1.30319695 27.1249524  29.46891774]]


In [50]:
# round()
c1_rounded = np.round(c1, 2)
print(c1_rounded)

[[33.33 60.49 61.37]
 [53.59 58.48 33.1 ]
 [ 1.3  27.12 29.47]]


In [51]:
# floor()
print(np.floor(c1))

[[33. 60. 61.]
 [53. 58. 33.]
 [ 1. 27. 29.]]


In [52]:
# ceil()
print(np.ceil(c1))

[[34. 61. 62.]
 [54. 59. 34.]
 [ 2. 28. 30.]]


In [53]:
# max()
print(np.max(c1))

61.373600492988025


In [54]:
# min()
print(np.min(c1))

1.3031969491867468


In [55]:
# sum()
print(np.sum(c1))

358.2567315327666


In [56]:
# prod()
print(np.prod(c1))

13369257387215.867


In [57]:
print(np.max(c1, axis = 0)) # maximum element in each column

[53.59212965 60.49139693 61.37360049]


In [58]:
print(np.max(c1, axis = 1)) # maximum element in each row

[61.37360049 58.47741608 29.46891774]


In [59]:
print(np.sum(c1, axis = 0)) # columnsums

[ 88.22134057 146.09376541 123.94162555]


In [60]:
print(np.sum(c1, axis = 1)) # rowsums

[155.19101139 145.16865305  57.89706709]


In [61]:
# mean()
print(np.mean(c1))

39.806303503640734


In [62]:
print(np.mean(c1, axis = 1)) # rowmeans

[51.73033713 48.38955102 19.29902236]


In [63]:
# median()
print(np.median(c1))

33.32601396752918


In [64]:
# std()
print(np.std(c1)) # gives population standard deviation

19.0578910303188


In [65]:
print(np.std(c1, ddof = 1)) # gives sample standard deviation

20.213945973979055


In [66]:
# var()
print(np.var(c1)) # gives population variance

363.2032105235058


In [67]:
print(np.var(c1, ddof = 1)) # gives sample variance

408.603611838944


In [68]:
# trigonometric functions

print(np.sin(c1)) # sin()
print(np.cos(c1)) # cos()
print(np.tan(c1)) # tan()

[[ 0.94299084 -0.71814745 -0.99367363]
 [-0.18400015  0.93661632  0.99369151]
 [ 0.96440844  0.91251111 -0.93006284]]
[[-0.33281869 -0.69589097  0.11230632]
 [-0.98292622 -0.35035677 -0.11214803]
 [ 0.26441702 -0.40905192 -0.36740047]]
[[-2.83334696  1.0319827  -8.8478873 ]
 [ 0.1871963  -2.67332164 -8.86053484]
 [ 3.64730091 -2.23079529  2.53146884]]


In [69]:
# dot() - matrix multiplication
c2 = np.arange(12).reshape(3, 4)
c3 = np.arange(11, -1, -1).reshape(4, 3)
print(c2)
print(c2.shape)
print(c3)
print(c3.shape)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
(3, 4)
[[11 10  9]
 [ 8  7  6]
 [ 5  4  3]
 [ 2  1  0]]
(4, 3)


In [70]:
print(np.dot(c2, c3))

[[ 24  18  12]
 [128 106  84]
 [232 194 156]]


In [71]:
# log()
print(np.log(c1))

[[3.50633829 4.10250116 4.11697978]
 [3.98140222 4.06864063 3.49950631]
 [0.26482044 3.30045406 3.38333607]]


In [72]:
# exp()
print(np.exp(c1))

[[2.97374668e+14 1.86672305e+26 4.51041008e+26]
 [1.88263523e+23 2.49126036e+25 2.37006176e+14]
 [3.68104599e+00 6.02860945e+11 6.28330820e+12]]


### Indexing

In [73]:
d1 = np.arange(10)
print(d1)

[0 1 2 3 4 5 6 7 8 9]


In [74]:
print(d1[0]) # first item
print(d1[-1]) # last item

0
9


In [75]:
d2 = np.arange(12).reshape(3, 4)
print(d2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [76]:
print(d2[1, 2]) # gives element of 2nd row and 3rd column, remember than indexing in python starts from 0

6


In [77]:
print(d2[2, 2])

10


In [78]:
print(d2[1, 3])

7


In [79]:
d3 = np.arange(8).reshape(2, 2, 2)
print(d3)

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


In [80]:
print(d3[1, 0, 1])

5


In [81]:
print(d3[0, 1, 1])

3


In [82]:
print(d3[1, 1, 1])

7


In [83]:
print(d3[0, 1, 0])

2


In [84]:
print(d3[1, 1, 0])

6


### Slicing

In [85]:
print(d1)
print(d1[1:7]) # gives elements from index 1 to index 6 of d1

[0 1 2 3 4 5 6 7 8 9]
[1 2 3 4 5 6]


In [86]:
print(d1[1:7:2]) # gives alternate elements from index 1 to index 6 of d1

[1 3 5]


In [87]:
print(d1[1:8:3])

[1 4 7]


In [88]:
print(d2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [89]:
print(d2[0, :]) # gives first row of d2
# print(d2[0]) # also works

[0 1 2 3]


In [90]:
print(d2[2, :])

[ 8  9 10 11]


In [91]:
print(d2[0:2, ]) # gives rows with indices 0 and 1

[[0 1 2 3]
 [4 5 6 7]]


In [92]:
print(d2[[0, 2], :]) # gives rows with indices 0 and 2

[[ 0  1  2  3]
 [ 8  9 10 11]]


In [93]:
print(d2[:, 3]) # gives 4th column of d2

[ 3  7 11]


In [94]:
print(d2[:, 0:3]) # gives columns with indices 0, 1, 2

[[ 0  1  2]
 [ 4  5  6]
 [ 8  9 10]]


In [95]:
print(d2[:, [0, 3]]) # gives columns with indices 0 and 3

[[ 0  3]
 [ 4  7]
 [ 8 11]]


In [96]:
print(d2[1:, 1:3]) # gives a submatrix with 2nd, 3rd row and 2nd, 3rd column

[[ 5  6]
 [ 9 10]]


In [97]:
print(d2[::2, ::3])

[[ 0  3]
 [ 8 11]]


In [98]:
print(d2[::2, 1:4:2])

[[ 1  3]
 [ 9 11]]


In [99]:
print(d2[1, 0:4:3])

[4 7]


In [100]:
print(d2[0:2, 1:4])

[[1 2 3]
 [5 6 7]]


In [101]:
d4 = np.arange(27).reshape(3, 3, 3)
print(d4)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


In [102]:
print(d4[1, :, :])

[[ 9 10 11]
 [12 13 14]
 [15 16 17]]


In [103]:
print(d4[0:3:2, :, :])

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


In [104]:
print(d4[0, 1, :])

[3 4 5]


In [105]:
print(d4[1, :, 1])

[10 13 16]


In [106]:
print(d4[2, 1:3, 1:3])

[[22 23]
 [25 26]]


In [107]:
print(d4[0:3:2, 0, 0:3:2])

[[ 0  2]
 [18 20]]


### Iterating

In [108]:
print(d1)

[0 1 2 3 4 5 6 7 8 9]


In [109]:
for i in d1:
  print(i)

0
1
2
3
4
5
6
7
8
9


In [110]:
print(d2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [111]:
for i in d2:
  print(i) # prints a 1D array

[0 1 2 3]
[4 5 6 7]
[ 8  9 10 11]


In [112]:
for i in np.nditer(d2):
  print(i)

0
1
2
3
4
5
6
7
8
9
10
11


In [113]:
print(d4)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


In [114]:
for i in d4:
  print(i) # prints a 2D array

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[ 9 10 11]
 [12 13 14]
 [15 16 17]]
[[18 19 20]
 [21 22 23]
 [24 25 26]]


In [115]:
for i in np.nditer(d4):
  print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26


### Reshaping

In [116]:
print(d2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [117]:
# reshape()
d2_reshaped = d2.reshape(4, 3)
print(d2_reshaped)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [118]:
# transpose
d2_transposed = np.transpose(d2)
print(d2_transposed)

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [119]:
print(d2.T)

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [120]:
# ravel() - converts any dimensional array to 1D array
d4_raveled = np.ravel(d4)
print(d4_raveled)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26]


$\bullet$ All these functions create a new array, do not change the existing array.

### Stacking

$\bullet$ Stacking is useful in merging data.

In [121]:
e1 = np.arange(12).reshape(3, 4)
print(e1)
e2 = np.arange(12, 24).reshape(3, 4)
print(e2)
e3 = np.arange(24, 36).reshape(3, 4)
print(e3)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]
[[24 25 26 27]
 [28 29 30 31]
 [32 33 34 35]]


In [122]:
# horizontal stacking - hstack()
print(np.hstack((e1, e2, e3))) # remember that its inputs is a tuple

[[ 0  1  2  3 12 13 14 15 24 25 26 27]
 [ 4  5  6  7 16 17 18 19 28 29 30 31]
 [ 8  9 10 11 20 21 22 23 32 33 34 35]]


In [123]:
# vertical stacking - vstack()
print(np.vstack((e1, e2, e3)))

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]
 [32 33 34 35]]


### Splitting

In [124]:
print(d2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [125]:
# horizontal splitting - hsplit()
print(np.hsplit(d2, 2))

[array([[0, 1],
       [4, 5],
       [8, 9]]), array([[ 2,  3],
       [ 6,  7],
       [10, 11]])]


In [126]:
# vertical splitting - vsplit()
print(d2_reshaped)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [127]:
print(np.vsplit(d2_reshaped, 2))

[array([[0, 1, 2],
       [3, 4, 5]]), array([[ 6,  7,  8],
       [ 9, 10, 11]])]


In [128]:
print(np.vsplit(d2, 3))

[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]


$\bullet$ $\textit{hsplit()}$ and $\textit{vsplit()}$ support equal splitting only.

### NumPy array vs Python list

In [129]:
# speed

import time

# on python list

a = [i for i in range(10000000)]
b = [i for i in range(10000000, 20000000)]

c = []

start = time.time()

for i in range(len(a)):
  c.append(a[i] + b[i])

stop = time.time()

print(f"time on python list :{stop - start}")


# on numpy array

a1 = np.arange(10000000)
b1 = np.arange(10000000, 20000000)

start = time.time()

c1 = a1 + b1

stop = time.time()

print(f"time on Numpy array :{stop - start}")

time on python list :1.9005169868469238
time on Numpy array :0.020281314849853516


In [130]:
# memory

import sys
# python list

print(sys.getsizeof(a)) # tells number of bytes occupied by 'a'

print(sys.getsizeof(a1))

a1 = np.arange(10000000, dtype = np.int32)
print(sys.getsizeof(a1))

89095160
80000112
40000112


In [131]:
# convenience
# python list does not support addition etc. You have to do explicit coding.
# numpy array supports addition, subtraction etc.

### Advanced Indexing

In [135]:
# Fancy indexing
a = np.arange(24).reshape(6, 4)
print(a)

print(a[[0, 1, 4]])

print(a[:, [0, 2, 3]])

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [16 17 18 19]]
[[ 0  2  3]
 [ 4  6  7]
 [ 8 10 11]
 [12 14 15]
 [16 18 19]
 [20 22 23]]


In [137]:
# Boolean indexing
a = np.random.randint(1, 100, 24).reshape(6, 4)
print(a)

[[ 2 48 87 56]
 [63 89 41 67]
 [15  4  4 71]
 [10 48 39 18]
 [81 56 46 61]
 [29 69 81 16]]


In [147]:
# find all the numbers greater than 50
print(a > 50) # output will be a boolean array
print(a[a > 50])

[[False False  True  True]
 [ True  True False  True]
 [False False False  True]
 [False False False False]
 [ True  True False  True]
 [False  True  True False]]
[87 56 63 89 67 71 81 56 61 69 81]


In [148]:
# find all even numbers
print(a[a % 2 == 0])

[ 2 48 56  4  4 10 48 18 56 46 16]


In [150]:
# find the numbers greater than 50 and even
print(a[(a > 50) & (a % 2 == 0)])

[56 56]


In [151]:
# find the numbers divisible by 7
print(a[a % 7 == 0])

[56 63 56]


In [155]:
# find the numbers not divisible by 7
print(a[~(a % 7 == 0)]) # ~ is the negation operator
# print(a[a % 7 != 0])

[ 2 48 87 89 41 67 15  4  4 71 10 48 39 18 81 46 61 29 69 81 16]
