# Advanced Numpy Course

Link: https://youtu.be/pQt8yQuPOGo


In [3]:
import numpy as np

## Broadcasting
 

In [4]:
a = np.array([1,2,3])       # (3,)
b = np.array([4])    # (2,1)
print(a+b)

[5 6 7]


compatible : either the same number or a one.

In [5]:
a = np.array([[1,2,3]])       # (1,3)
b = np.array([[4],[5]])    # (2,1)
print(a+b)

[[5 6 7]
 [6 7 8]]


In [6]:
"""
(5, 1, 6, 7, 9, 10, 2, 1)
        || (compatible with)
(5, 9, 1, 7, 1, 1, 2, 19)
"""

'\n(5, 1, 6, 7, 9, 10, 2, 1)\n        || (compatible with)\n(5, 9, 1, 7, 1, 1, 2, 19)\n'

In [7]:
a = np.random.random((5,7,1,4,8,1,5))
b = np.random.random((5,1,8,4,1,5,1))

print((a+b).shape)

(5, 7, 8, 4, 8, 5, 5)


## Advanced Indexing

In [8]:
a = np.array([1,2,3,4,5,6,7,8,9])

print(a[0])
print(a[0:4])
print(a[0:9:2])         # a[start_idx : end_idx : stride]

1
[1 2 3 4]
[1 3 5 7 9]


In [4]:
a = np.array([[1,2,3],
              [4,5,6],
              [7,8,9]])


# Advanced indexing happens when instead of a tuple ([0,1]), we pass a list like: [[0,1]]

print(a[0:2])
print(a[[1,2]])     # Selecting 1st row and 2nd row

[[1 2 3]
 [4 5 6]]
[[4 5 6]
 [7 8 9]]


In [6]:
# Dimensional Indexing : We can select everything along a particular dimension
print(a[:,:])    # `:` for 1st dimension | `:` for 2nd dimension


[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [8]:
print(a[:,2])

[3 6 9]


In [10]:
print(a[1,:])

[4 5 6]


In [11]:
print(a[:,1, np.newaxis])

[[2]
 [5]
 [8]]


In [16]:
print(a[[0,2]])
print(a[:,[0,2]])     # give every single row of columns 0,2
print(a[[0,2], [0,2]])      # 0th row 0th column and 2nd row - 2nd column
print(a[[0,1,2], [0,2,2]])

[[1 2 3]
 [7 8 9]]
[[1 3]
 [4 6]
 [7 9]]
[1 9]
[1 6 9]


#### Boolean Indexing

In [24]:
print(a[[True, False, True]])
print(a[:, [True, False, True]])
print(a[[[True, False, True], [True, False, True], [True, False, True]]])

[[1 2 3]
 [7 8 9]]
[[1 3]
 [4 6]
 [7 9]]
[1 3 4 6 7 9]


## Sorting and Search

In [27]:
a = np.array([[5,9,8],
              [4,1,6],
              [7,3,2]])
print(np.sort(a))     # sorted per row | returns a sorted version of the array and doesn't apply to the array. 
print(a)              
a.sort()              # returns the array in sorted order
print(a)              # a.sort() == np.sort(a)

[[5 8 9]
 [1 4 6]
 [2 3 7]]
[[5 9 8]
 [4 1 6]
 [7 3 2]]
[[5 8 9]
 [1 4 6]
 [2 3 7]]


In [29]:
print(np.sort(a, axis=0))
print(np.sort(a, axis=1))
print(np.sort(np.sort(a, axis=0), axis=1))

print(np.sort(a.flatten()).reshape(a.shape))

[[1 3 6]
 [2 4 7]
 [5 8 9]]
[[5 8 9]
 [1 4 6]
 [2 3 7]]
[[1 3 6]
 [2 4 7]
 [5 8 9]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]


### Searching for values

In [34]:
a = np.array([[5,9,8],
              [4,1,6],
              [7,3,2]])

outputs = np.array([0.05, 0.01, 0.87, 0.02, 0.01, 0.03, 0.01, 0.01])


In [35]:
print(np.argmax(outputs))       # returns the index of the highest value
print(np.argmin(outputs))       # give the index of the lowest value (lowest activation)

2
1


In [36]:
print(np.nonzero(outputs))      # gives indices of non-zero values

(array([0, 1, 2, 3, 4, 5, 6, 7]),)


In [37]:
print(np.where(outputs > 0.01, outputs, 0))

[0.05 0.   0.87 0.02 0.   0.03 0.   0.  ]


In [38]:
a = np.arange(12).reshape(3,4)
print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [43]:
for row in a:
    for element in row:
        print(element, end=' ')

0 1 2 3 4 5 6 7 8 9 10 11 

In [46]:
for element in np.nditer(a):   
    print(element, end=' ')

0 1 2 3 4 5 6 7 8 9 10 11 

In [48]:
for element in np.nditer(a, order='C'):     # iterate in row-major order like in C   
    print(element, end=' ')

0 1 2 3 4 5 6 7 8 9 10 11 

In [49]:
for element in np.nditer(a, order='F'):     # iterate in column-major order like in Fortran   
    print(element, end=' ')

0 4 8 1 5 9 2 6 10 3 7 11 

In [52]:
with np.nditer(a, op_flags=['readwrite']) as it:
    for element in it:   
        element[...] = element ** 2

print(a)

[[  0   1   4   9]
 [ 16  25  36  49]
 [ 64  81 100 121]]


## Masking

In [53]:
import numpy as np
import numpy.ma as ma


In [58]:
arr = np.array([1,2,3,np.nan, 4, np.inf])
masked_arr = ma.masked_array(arr, mask=[0,1,0,1,0,1])
print(masked_arr)
print(masked_arr.mean())
print(masked_arr.sum())


[1.0 -- 3.0 -- 4.0 --]
2.6666666666666665
8.0


In [60]:
arr = np.array([[1,2,3],[4,5,6]])
masked_arr = ma.masked_array(arr, mask = [[0,0,1],[1,0,0]])
print(masked_arr)
print(masked_arr.sum())
print(ma.getmask(masked_arr))

[[1 2 --]
 [-- 5 6]]
14
[[False False  True]
 [ True False False]]


In [68]:
print(ma.masked_greater(arr, value=4))
print(ma.masked_inside(arr, 2, 4))
print(ma.masked_outside(arr, 2, 4))
print(ma.masked_where(arr % 2 == 0, arr))


[[1 2 3]
 [4 -- --]]
[[1 -- --]
 [-- 5 6]]
[[-- 2 3]
 [4 -- --]]
[[1 -- 3]
 [-- 5 --]]


In [69]:
arr = np.array([[1,np.nan,3],[4,5,np.inf]])
print(ma.masked_invalid(arr))

[[1.0 -- 3.0]
 [4.0 5.0 --]]


## Views and Copy

In [71]:
import numpy as np

arr = np.array([1,2,3,4,5])
new_arr = arr[0:3]
print(new_arr)

new_arr[1] = 100
print(new_arr)
print(arr)

[1 2 3]
[  1 100   3]
[  1 100   3   4   5]


In the above, `new_arr` is a `view`. `view` is basically just a different viewpoint onto the same data. So it's the same underlying data, the exact same thing in the memory but from a different perspective with different metadata. So if we do changes in the new variable the values changed will be reflected on the old variable too coz they are pointing to the same object.

To make a copy of the value/array we have to explicitly declare it using `copy()` method.


In [72]:
new_arr = arr.copy()
print(new_arr)

new_arr[2] = 1000
print(new_arr)
print(arr)

[  1 100   3   4   5]
[   1  100 1000    4    5]
[  1 100   3   4   5]


When we do `slicing`, we always get a `view`. When we do `advanced indexing` we always get a `copy`.

In [73]:
arr = np.array([[1,2,3], [4,5,6], [7,8,9]])

new_arr = arr[[0,1]]
new_arr[0,1] = 1000

print(new_arr)
print(arr)

[[   1 1000    3]
 [   4    5    6]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [75]:
print(arr.base)
print(arr.copy().base)
print(arr[0:2].base)

None
None
[[1 2 3]
 [4 5 6]
 [7 8 9]]


## Vectorization

In [None]:
arr = np.array([[1,2,3], [4,5,6], [7,8,9]])

In [77]:
def square_if_even(x):
    if x%2 == 0:
        return x ** 2
    else:
        return x

In [80]:
print(square_if_even(arr))

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [79]:
vectorized_square_if_even = np.vectorize(square_if_even)
print(vectorized_square_if_even(arr))

[[ 1  4  3]
 [16  5 36]
 [ 7 64  9]]


In [82]:
A = np.array([[1,2],
               [3,4]])
B = np.array([[0,5],
               [10,15]])

print(np.matmul(A,B))
print(A @ B)

[[20 35]
 [40 75]]
[[20 35]
 [40 75]]


## Custom Data Types in Numpy

In [83]:
arr = np.array([1,2,3.2, 'hello', {'key': 'value'}])
print(arr.dtype)

object


In [90]:
dt = np.dtype('U10')
print(dt.name)
print(dt.type)
print(dt.byteorder)
print(dt.itemsize)

str320
<class 'numpy.str_'>
=
40


In [93]:
arr = np.array(['hello', 'world', 'how', 'are', 'you'], dtype=dt)
print(arr)
print(arr.dtype)

['hello' 'world' 'how' 'are' 'you']
<U10


In [95]:
arr = np.array(['hello12345678909876543212345678', 'world', 'how', 'are', 'you'], dtype=dt)    # It will truncate the maximum length of the string to 10 as dt = np.dtype('U10)'
print(arr)
print(arr.dtype)

['hello12345' 'world' 'how' 'are' 'you']
<U10


In [96]:
dt = np.dtype('i4, (2,3)f8, f4')

arr = np.array([
                (16, np.array([[2.7, 1.2, 8.7], [1.1, 9.2, 12.32]]), 19.3),
                (16, np.array([[2.7, 1.2, 8.7], [1.1, 9.2, 12.32]]), 19.3),
                ], dtype=dt)

print(arr)
print(arr.dtype)

[(16, [[ 2.7 ,  1.2 ,  8.7 ], [ 1.1 ,  9.2 , 12.32]], 19.3)
 (16, [[ 2.7 ,  1.2 ,  8.7 ], [ 1.1 ,  9.2 , 12.32]], 19.3)]
[('f0', '<i4'), ('f1', '<f8', (2, 3)), ('f2', '<f4')]


In [97]:
dt = np.dtype("U5, f4")

arr1 = np.array([('hello', 29.1), ('world', 32.1), ('how', 1231.1)])

arr2 = np.array([('hello', 29.1), ('world', 32.1), ('how', 1231.1)], dtype=dt)

arr3 = np.array([('hello', 29.1), ('world', 32.1), ('how', 1231.1)], dtype=object)

print(arr1.dtype)
print(arr2.dtype)
print(arr3.dtype)



<U32
[('f0', '<U5'), ('f1', '<f4')]
object


In [100]:
print(arr1.nbytes)    # get the memory occupied by the numpy array
print(arr2.nbytes)
print(arr3.nbytes)

768
72
48
