<a href="https://colab.research.google.com/github/sakunisgithub/machine_learning/blob/master/Numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

$$\textbf{NumPy - Numerical Python}$$

### Creating NumPy arrays

In [5]:
import numpy as np

In [6]:
# 1D (a vector)
a = np.array([1, 2, 3, 4])
print(a)

[1 2 3 4]


In [7]:
print(type(a))

<class 'numpy.ndarray'>


In [8]:
# 2D (a matrix)
b = np.array([[1, 2, 3], [4, 5, 6]])
print(b)

[[1 2 3]
 [4 5 6]]


In [9]:
# 3D (a tensor)
c = np.array([[[5, 6], [15, 16]], [[1, 2], [11, 12]]])
print(c)

[[[ 5  6]
  [15 16]]

 [[ 1  2]
  [11 12]]]


In [10]:
## creating a numpy array of a particular data type

# float
a1 = np.array([1, 2, 3], dtype = float)
print(a1)

# boolean
a2 = np.array([0, 1, 2, 3], dtype = bool)
print(a2)

# complex
a3 = np.array([5, 7, 10], dtype = complex)
print(a3)

[1. 2. 3.]
[False  True  True  True]
[ 5.+0.j  7.+0.j 10.+0.j]


In [11]:
# by using arange() function - arange() is similar to range()
a4 = np.arange(1, 11)
print(a4) # np.arange(a, b) yields integers from a to b-1

a5 = np.arange(1, 11, 3)
print(a5)

arr = np.arange(5)
print(arr)

[ 1  2  3  4  5  6  7  8  9 10]
[ 1  4  7 10]
[0 1 2 3 4]


In [12]:
# reshape() function
a6 = np.arange(1, 51).reshape(10, 5)
print(a6)

a7 = np.arange(1, 28).reshape(3, 3, 3)
print(a7)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]
 [21 22 23 24 25]
 [26 27 28 29 30]
 [31 32 33 34 35]
 [36 37 38 39 40]
 [41 42 43 44 45]
 [46 47 48 49 50]]
[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]]

 [[10 11 12]
  [13 14 15]
  [16 17 18]]

 [[19 20 21]
  [22 23 24]
  [25 26 27]]]


In [13]:
# ones()
a8 = np.ones((5, 2)) # creates an array of shape (5, 2) with all the elements being 1
print(a8)

[[1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]]


In [14]:
a9 = np.ones((3, 4), dtype = int)
print(a9)

[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


In [15]:
# zeros()
a10 = np.zeros((2, 3)) # creates an array of shape (2, 3) with all the elements being 0
print(a10)

[[0. 0. 0.]
 [0. 0. 0.]]


In [16]:
# random()
a11 = np.random.random((2, 3)) # creates an array of shape(2, 3) with all the elements being random numbers in between 0 and 1
print(a11)

[[0.24429224 0.52346316 0.44635045]
 [0.22928993 0.09695481 0.33988635]]


np.ones(), np.zeros(), np.random.random() are mainly useful in array initialization.

In [17]:
# linspace() linearly spaced
a12 = np.linspace(-20, 20, 15) # generates 15 equispaced numbers from -20 to 20
print(a12)

[-20.         -17.14285714 -14.28571429 -11.42857143  -8.57142857
  -5.71428571  -2.85714286   0.           2.85714286   5.71428571
   8.57142857  11.42857143  14.28571429  17.14285714  20.        ]


In [18]:
# identity()
a13 = np.identity(4) # creates an identity matrix of order 4
print(a13)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


### Array Attributes

In [19]:
arr1 = np.arange(0, 11)
print(arr1)

[ 0  1  2  3  4  5  6  7  8  9 10]


In [20]:
arr2 = np.arange(1, 13, dtype = float).reshape(3, 4)
print(arr2)

[[ 1.  2.  3.  4.]
 [ 5.  6.  7.  8.]
 [ 9. 10. 11. 12.]]


In [21]:
arr3 = np.arange(8, dtype = np.int32).reshape(2, 2, 2)
print(arr3)

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


In [22]:
# ndim - gives number of dimension(s) of an array
print(arr1.ndim)
print(arr2.ndim)
print(arr3.ndim)

1
2
3


In [23]:
# shape - gives dimension of an array
print(arr1.shape)
print(arr2.shape)
print(arr3.shape)

(11,)
(3, 4)
(2, 2, 2)


In [24]:
# size - gives number of elements in an object
print(arr2.size)
print(arr3.size)

12
8


In [25]:
# itemsize - refers to the number of bytes used to store a single element of an array
print(arr1.itemsize)
print(arr2.itemsize)
print(arr3.itemsize)
# recall that 8 bits = 1 byte
# by default colab uses 64-bit integers and they take 8 bytes of space
# also floates in colab take 8 bytes of space
# 32-bit integers take 4 bytes of space

8
8
4


In [26]:
print(arr1.dtype)
print(arr2.dtype)
print(arr3.dtype)

int64
float64
int32


### Changing datatype

In [27]:
print(arr1.dtype)

int64


In [28]:
arr1 = arr1.astype(np.int32)

In [29]:
print(arr1)

[ 0  1  2  3  4  5  6  7  8  9 10]


In [30]:
print(arr1.dtype)

int32


### Array Operations

In [31]:
b1 = np.arange(1, 13).reshape(3, 4)
print(b1)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [32]:
b2 = np.arange(13, 25).reshape(3, 4)
print(b2)

[[13 14 15 16]
 [17 18 19 20]
 [21 22 23 24]]


In [33]:
# scalar operation - operating in an array with a scalar
print(b1 + 2)

[[ 3  4  5  6]
 [ 7  8  9 10]
 [11 12 13 14]]


In [34]:
print(b1 * 2)

[[ 2  4  6  8]
 [10 12 14 16]
 [18 20 22 24]]


In [35]:
print(b1 / 2) # in division output datatype is always float

[[0.5 1.  1.5 2. ]
 [2.5 3.  3.5 4. ]
 [4.5 5.  5.5 6. ]]


In [36]:
print(b1 ** 2)

[[  1   4   9  16]
 [ 25  36  49  64]
 [ 81 100 121 144]]


In [37]:
# relational operation
print(b1 > 5)

[[False False False False]
 [False  True  True  True]
 [ True  True  True  True]]


In [38]:
print(b1 == 7)

[[False False False False]
 [False False  True False]
 [False False False False]]


In [39]:
print(b1 != 3)

[[ True  True False  True]
 [ True  True  True  True]
 [ True  True  True  True]]


In [40]:
# vector operations
print(b1 + b2) # itemwise addition

[[14 16 18 20]
 [22 24 26 28]
 [30 32 34 36]]


In [41]:
print(b1 * b2) # itemwise multiplication

[[ 13  28  45  64]
 [ 85 108 133 160]
 [189 220 253 288]]


### Common Array Functions

In [42]:
c1 = np.random.random((3, 3))
c1 = c1 * 100
print(c1)

[[41.00561791  2.99383327 68.54109825]
 [80.065099   94.37959936 82.85333646]
 [68.48179213 12.04916623  4.20013526]]


In [43]:
# round()
c1_rounded = np.round(c1, 2)
print(c1_rounded)

[[41.01  2.99 68.54]
 [80.07 94.38 82.85]
 [68.48 12.05  4.2 ]]


In [44]:
# floor()
print(np.floor(c1))

[[41.  2. 68.]
 [80. 94. 82.]
 [68. 12.  4.]]


In [45]:
# ceil()
print(np.ceil(c1))

[[42.  3. 69.]
 [81. 95. 83.]
 [69. 13.  5.]]


In [46]:
# max()
print(np.max(c1))

94.37959935872746


In [47]:
# min()
print(np.min(c1))

2.9938332692090452


In [48]:
# sum()
print(np.sum(c1))

454.5696778642884


In [49]:
# prod()
print(np.prod(c1))

18257814718790.223


In [50]:
print(np.max(c1, axis = 0)) # maximum element in each column

[80.065099   94.37959936 82.85333646]


In [51]:
print(np.max(c1, axis = 1)) # maximum element in each row

[68.54109825 94.37959936 68.48179213]


In [52]:
print(np.sum(c1, axis = 0)) # columnsums

[189.55250904 109.42259886 155.59456997]


In [53]:
print(np.sum(c1, axis = 1)) # rowsums

[112.54054942 257.29803483  84.73109362]


In [54]:
# mean()
print(np.mean(c1))

50.50774198492093


In [55]:
print(np.mean(c1, axis = 1)) # rowmeans

[37.51351647 85.76601161 28.24369787]


In [56]:
# median()
print(np.median(c1))

68.48179212676293


In [57]:
# std()
print(np.std(c1)) # gives population standard deviation

34.09822656723899


In [58]:
print(np.std(c1, ddof = 1)) # gives sample standard deviation

36.16663084819497


In [59]:
# var()
print(np.var(c1)) # gives population variance

1162.6890550307626


In [60]:
print(np.var(c1, ddof = 1)) # gives sample variance

1308.025186909608


In [61]:
# trigonometric functions

print(np.sin(c1)) # sin()
print(np.cos(c1)) # cos()
print(np.tan(c1)) # tan()

[[-0.16416692  0.1472223  -0.54294505]
 [-0.99896443  0.13143832  0.92150089]
 [-0.59176471 -0.49445211 -0.87164208]]
[[-0.98643257 -0.98910343  0.83976822]
 [-0.04549795  0.99132435  0.38837624]
 [ 0.80611074  0.86920487 -0.49014293]]
[[ 0.16642487 -0.14884419 -0.64654155]
 [21.95625038  0.13258862  2.37270151]
 [-0.73409853 -0.56885566  1.77834266]]


In [62]:
# dot() - matrix multiplication
c2 = np.arange(12).reshape(3, 4)
c3 = np.arange(11, -1, -1).reshape(4, 3)
print(c2)
print(c2.shape)
print(c3)
print(c3.shape)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
(3, 4)
[[11 10  9]
 [ 8  7  6]
 [ 5  4  3]
 [ 2  1  0]]
(4, 3)


In [63]:
print(np.dot(c2, c3))

[[ 24  18  12]
 [128 106  84]
 [232 194 156]]


In [64]:
# log()
print(np.log(c1))

[[3.71370908 1.0965546  4.22743354]
 [4.38284004 4.54732494 4.41707201]
 [4.2265679  2.48899547 1.43511673]]


In [65]:
# exp()
print(np.exp(c1))

[[6.43448191e+17 1.99620560e+01 5.84818028e+29]
 [5.91331058e+34 9.73955705e+40 9.61051883e+35]
 [5.51143172e+29 1.70956810e+05 6.66953516e+01]]


### Indexing

In [66]:
d1 = np.arange(10)
print(d1)

[0 1 2 3 4 5 6 7 8 9]


In [67]:
print(d1[0]) # first item
print(d1[-1]) # last item

0
9


In [68]:
d2 = np.arange(12).reshape(3, 4)
print(d2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [69]:
print(d2[1, 2]) # gives element of 2nd row and 3rd column, remember than indexing in python starts from 0

6


In [70]:
print(d2[2, 2])

10


In [71]:
print(d2[1, 3])

7


In [72]:
d3 = np.arange(8).reshape(2, 2, 2)
print(d3)

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


In [73]:
print(d3[1, 0, 1])

5


In [74]:
print(d3[0, 1, 1])

3


In [75]:
print(d3[1, 1, 1])

7


In [76]:
print(d3[0, 1, 0])

2


In [77]:
print(d3[1, 1, 0])

6


### Slicing

In [78]:
print(d1)
print(d1[1:7]) # gives elements from index 1 to index 6 of d1

[0 1 2 3 4 5 6 7 8 9]
[1 2 3 4 5 6]


In [79]:
print(d1[1:7:2]) # gives alternate elements from index 1 to index 6 of d1

[1 3 5]


In [80]:
print(d1[1:8:3])

[1 4 7]


In [81]:
print(d2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [82]:
print(d2[0, :]) # gives first row of d2
# print(d2[0]) # also works

[0 1 2 3]


In [83]:
print(d2[2, :])

[ 8  9 10 11]


In [84]:
print(d2[0:2, ]) # gives rows with indices 0 and 1

[[0 1 2 3]
 [4 5 6 7]]


In [85]:
print(d2[[0, 2], :]) # gives rows with indices 0 and 2

[[ 0  1  2  3]
 [ 8  9 10 11]]


In [86]:
print(d2[:, 3]) # gives 4th column of d2

[ 3  7 11]


In [87]:
print(d2[:, 0:3]) # gives columns with indices 0, 1, 2

[[ 0  1  2]
 [ 4  5  6]
 [ 8  9 10]]


In [88]:
print(d2[:, [0, 3]]) # gives columns with indices 0 and 3

[[ 0  3]
 [ 4  7]
 [ 8 11]]


In [89]:
print(d2[1:, 1:3]) # gives a submatrix with 2nd, 3rd row and 2nd, 3rd column

[[ 5  6]
 [ 9 10]]


In [96]:
print(d2[::2, ::3])

array([[ 0,  3],
       [ 8, 11]])

In [97]:
print(d2[::2, 1:4:2])

[[ 1  3]
 [ 9 11]]


In [98]:
print(d2[1, 0:4:3])

[4 7]


In [99]:
print(d2[0:2, 1:4])

[[1 2 3]
 [5 6 7]]


In [102]:
d4 = np.arange(27).reshape(3, 3, 3)
print(d4)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


In [106]:
print(d4[1, :, :])

[[ 9 10 11]
 [12 13 14]
 [15 16 17]]


In [105]:
print(d4[0:3:2, :, :])

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


In [107]:
print(d4[0, 1, :])

[3 4 5]


In [108]:
print(d4[1, :, 1])

[10 13 16]


In [109]:
print(d4[2, 1:3, 1:3])

[[22 23]
 [25 26]]


In [110]:
print(d4[0:3:2, 0, 0:3:2])

[[ 0  2]
 [18 20]]


### Iterating

In [111]:
print(d1)

[0 1 2 3 4 5 6 7 8 9]


In [115]:
for i in d1:
  print(i)

0
1
2
3
4
5
6
7
8
9


In [112]:
print(d2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [116]:
for i in d2:
  print(i) # prints a 1D array

[0 1 2 3]
[4 5 6 7]
[ 8  9 10 11]


In [118]:
for i in np.nditer(d2):
  print(i)

0
1
2
3
4
5
6
7
8
9
10
11


In [114]:
print(d4)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


In [117]:
for i in d4:
  print(i) # prints a 2D array

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[ 9 10 11]
 [12 13 14]
 [15 16 17]]
[[18 19 20]
 [21 22 23]
 [24 25 26]]


In [119]:
for i in np.nditer(d4):
  print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26


### Reshaping

In [120]:
print(d2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [123]:
# reshape()
d2_reshaped = d2.reshape(4, 3)
print(d2_reshaped)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [124]:
# transpose
d2_transposed = np.transpose(d2)
print(d2_transposed)

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [125]:
print(d2.T)

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


In [126]:
# ravel() - converts any dimensional array to 1D array
d4_raveled = np.ravel(d4)
print(d4_raveled)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26]


$\bullet$ All these functions create a new array, do not change the existing array.

### Stacking

$\bullet$ Stacking is useful in merging data.

In [131]:
e1 = np.arange(12).reshape(3, 4)
print(e1)
e2 = np.arange(12, 24).reshape(3, 4)
print(e2)
e3 = np.arange(24, 36).reshape(3, 4)
print(e3)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]
[[24 25 26 27]
 [28 29 30 31]
 [32 33 34 35]]


In [132]:
# horizontal stacking - hstack()
print(np.hstack((e1, e2, e3))) # remember that its inputs is a tuple

[[ 0  1  2  3 12 13 14 15 24 25 26 27]
 [ 4  5  6  7 16 17 18 19 28 29 30 31]
 [ 8  9 10 11 20 21 22 23 32 33 34 35]]


In [133]:
# vertical stacking - vstack()
print(np.vstack((e1, e2, e3)))

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]
 [32 33 34 35]]


### Splitting

In [136]:
print(d2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [137]:
# horizontal splitting - hsplit()
print(np.hsplit(d2, 2))

[array([[0, 1],
       [4, 5],
       [8, 9]]), array([[ 2,  3],
       [ 6,  7],
       [10, 11]])]


In [142]:
# vertical splitting - vsplit()
print(d2_reshaped)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [141]:
print(np.vsplit(d2_reshaped, 2))

[array([[0, 1, 2],
       [3, 4, 5]]), array([[ 6,  7,  8],
       [ 9, 10, 11]])]


In [143]:
print(np.vsplit(d2, 3))

[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]


$\bullet$ $\textit{hsplit()}$ and $\textit{vsplit()}$ support equal splitting only.