In [1]:
import numpy as np
print(np.__version__)

1.18.1


### In this Notebook I have covered 
* Missing data representation
* Data type conversion
* Sliding Windows
* Einstein summation
* Convolution

### Missing data representation

### None : None is Python object often used for missing data in Python code.
Since None is a python object, so it cannot be used in any arbitrary NumPy array,
but only in arrays with data type 'object' 

In [2]:
arr = np.array([5, 7, None, 9])
arr

array([5, 7, None, 9], dtype=object)

<b>The presence of Python objects in an array also means that if we perform aggregations like sum() or min() across an array with a None value, we will get an error <b>

In [3]:
np.sum(arr)

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

### NaN : acronym for Not a Number, used to represent missing numerical data, it is a special floating-point value recognized by all systems that use the standard IEEE floating-point representation

In [4]:
arr2 = np.array([5, 6, 7, np.nan, 9])

In [5]:
arr2, arr2.dtype

(array([ 5.,  6.,  7., nan,  9.]), dtype('float64'))

In [6]:
# aggregate operations would result in nan

np.min(arr2), np.max(arr2), np.sum(arr2)

(nan, nan, nan)

### Special aggregations to ignore missing data

In [7]:
arr2

array([ 5.,  6.,  7., nan,  9.])

In [8]:
np.nansum(arr2), np.nanmin(arr2), np.nanmax(arr2)

(27.0, 5.0, 9.0)

### Find nan elements in array

In [9]:
arr2 = np.array([7, 6, np.nan, 8, 9, np.nan])
arr2

array([ 7.,  6., nan,  8.,  9., nan])

In [10]:
# Check if any element in array is nan
np.any(np.isnan(arr2))

True

In [11]:
# to find out how many nan elements
np.isnan(arr2).sum()

2

In [12]:
# to find location of nan values
np.isnan(arr2)

array([False, False,  True, False, False,  True])

In [13]:
# replace nans with some value
arr2[np.isnan(arr2)] = -9999
arr2  

array([ 7.000e+00,  6.000e+00, -9.999e+03,  8.000e+00,  9.000e+00,
       -9.999e+03])

### Data type conversion

In [14]:
# float to integer
x = np.array([1, 1.5, 2, 3.0, 4.2])
x.dtype

dtype('float64')

In [15]:
x_int = x.astype('int')
x_int

array([1, 1, 2, 3, 4])

In [16]:
x_int.dtype

dtype('int32')

In [17]:
# string to float

y = np.array(['1.5', '2.6', '3.7'])
y.dtype

dtype('<U3')

In [18]:
y_float = y.astype('float')
y_float

array([1.5, 2.6, 3.7])

In [19]:
y_float.dtype

dtype('float64')

### Arrays as Sliding Windows

In [20]:
from numpy.lib.stride_tricks import as_strided

In [21]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [22]:
a.strides[-1]

4

In [23]:
# create array of 2 columns
as_strided(a, shape=(8,2), strides=(4,4))

array([[0, 1],
       [1, 2],
       [2, 3],
       [3, 4],
       [4, 5],
       [5, 6],
       [6, 7],
       [7, 8]])

In [24]:
# Change strides to form a different sliding array
as_strided(a, shape=(5,2), strides=(8,4))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [25]:
# create array of 3 columns 
as_strided(a, shape=(8,3), strides=(4,4))

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4],
       [3, 4, 5],
       [4, 5, 6],
       [5, 6, 7],
       [6, 7, 8],
       [7, 8, 9]])

In [26]:
# Another array

x = np.array([[0,1], [10,11], [20,21], [30,31], [40,41], [50,51]])
x

# required output is :

#[[00,01,10,11,20,21],
# [10,11,20,21,30,31],
# [20,21,30,31,40,41],
# [30,31,40,41,50,51]]

array([[ 0,  1],
       [10, 11],
       [20, 21],
       [30, 31],
       [40, 41],
       [50, 51]])

In [27]:
x.flatten()

array([ 0,  1, 10, 11, 20, 21, 30, 31, 40, 41, 50, 51])

In [28]:
x.strides[-1]

4

In [29]:
as_strided(x, shape=(4,6), strides=(8,4))

array([[ 0,  1, 10, 11, 20, 21],
       [10, 11, 20, 21, 30, 31],
       [20, 21, 30, 31, 40, 41],
       [30, 31, 40, 41, 50, 51]])

### einsum(): Einstein summation

In [30]:
A = np.arange(1,10).reshape(3,3)
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [31]:
# sum along columns via einsum()
np.einsum('ij ->i', A)

array([ 6, 15, 24])

In [32]:
# equivalent 
np.sum(A, axis=1)

array([ 6, 15, 24])

In [33]:
# sum along rows via einsum()
np.einsum('ij -> j', A)

array([12, 15, 18])

In [34]:
# Equivalent
np.sum(A, axis=0)

array([12, 15, 18])

In [35]:
# Transpose via einsum()
np.einsum('ij -> ji', A)

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [36]:
# equivalent
A.T

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [37]:
# diagonal matrix via einsum()
np.einsum('ii->i', A)

array([1, 5, 9])

In [38]:
# equivalent diagonal matrix
np.diag(A)

array([1, 5, 9])

In [39]:
# sum of diagonal matrix via einsum()
np.einsum('ii->', A)

15

In [40]:
# equivalent of sum of diagonal elements
np.diag(A).sum()

15

In [41]:
A = np.array([[1, 1, 1],
              [2, 2, 2],
              [5, 5, 5]])

B = np.array([[0, 1, 0],
              [1, 1, 0],
              [1, 1, 1]])

In [42]:
# Multiplication of elements of matrices 
A*B

array([[0, 1, 0],
       [2, 2, 0],
       [5, 5, 5]])

In [43]:
# multiplication via einsum()
np.einsum('ij, ij -> ij', A, B)

array([[0, 1, 0],
       [2, 2, 0],
       [5, 5, 5]])

In [44]:
## Dot matrix via einsum()
np.einsum('ij, jk -> ik', A,B)

array([[ 2,  3,  1],
       [ 4,  6,  2],
       [10, 15,  5]])

In [45]:
## Dot matrix 
np.dot(A,B)

array([[ 2,  3,  1],
       [ 4,  6,  2],
       [10, 15,  5]])

### Convolution

In [46]:
matrix = np.arange(25).reshape((5, 5))
print(matrix)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


In [47]:
conv_filter = np.array([[1, 1, 0], [1, 2, 3], [0, 1, 1]])
print(conv_filter)

[[1 1 0]
 [1 2 3]
 [0 1 1]]


In [48]:
matrix.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [49]:
filter_shape = conv_filter.shape
conv_shape   = tuple(np.subtract(matrix.shape, filter_shape) + 1) + filter_shape
conv_strides = matrix.strides * 2
print(conv_shape)
print(conv_strides)

(3, 3, 3, 3)
(20, 4, 20, 4)


In [50]:
sub_matrices = as_strided(matrix, conv_shape, conv_strides)

In [51]:
sub_matrices

array([[[[ 0,  1,  2],
         [ 5,  6,  7],
         [10, 11, 12]],

        [[ 1,  2,  3],
         [ 6,  7,  8],
         [11, 12, 13]],

        [[ 2,  3,  4],
         [ 7,  8,  9],
         [12, 13, 14]]],


       [[[ 5,  6,  7],
         [10, 11, 12],
         [15, 16, 17]],

        [[ 6,  7,  8],
         [11, 12, 13],
         [16, 17, 18]],

        [[ 7,  8,  9],
         [12, 13, 14],
         [17, 18, 19]]],


       [[[10, 11, 12],
         [15, 16, 17],
         [20, 21, 22]],

        [[11, 12, 13],
         [16, 17, 18],
         [21, 22, 23]],

        [[12, 13, 14],
         [17, 18, 19],
         [22, 23, 24]]]])

In [52]:
sub_matrices.shape

(3, 3, 3, 3)

In [53]:
convolved = np.einsum('ij, ijkl->kl', conv_filter, sub_matrices)

print(convolved)

[[ 62  72  82]
 [112 122 132]
 [162 172 182]]
