## LAYERS

In [None]:
import numpy as np
from sklearn import datasets
import pandas as pd
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt

lr = 10 ** -6

### Flatten Layer

In [None]:
class flatten:
  def forward_pass(self, input):
    self.input_shape = input.shape
    self.output_shape = (np.prod(input.shape),)
    return input.reshape(self.output_shape)

  def backward_pass(self, d_out):
    return d_out.reshape(self.input_shape)

### Matrix Multiplication Layer / Dense Layer

In [None]:
# matrix multiplication XW

class matrix_multiplication_layer:
  def __init__(self, neurons_count, features_count):
    self. X = np.array([])
    self.W = np.array([[0 for j in range(neurons_count)] for i in range(features_count)])

  def forward_pass(self, X):
    self.X = X
    # N = XW
    N = np.dot(self.X, self.W)
    return N

  def backward_pass(self, dL_dN):
    #dL_dW
    dL_dW = np.dot(self.X.T, dL_dN)
    self.W = self.W - (lr * dL_dW)

    #dL_dX
    dL_dX = np.dot(dL_dN, self.W.T)
    return dL_dX

### Bias Addition Layer

In [None]:
# bias addition layer

class bias_addition_layer:
  def __init__(self, neurons_count):
    self.N = np.array([])
    self.B = np.array([0 for i in range(neurons_count)])

  def forward_pass(self, N):
    self.N = N
    Z = self.N + self.B
    return Z

  def backward_pass(self, dL_dZ):
    #dL_dB
    dL_dB = dL_dZ.sum(axis = 0) # column wise summation
    self.B = self.B - (lr * dL_dB)

    #dL_dN
    dL_dN = dL_dZ
    return dL_dN

### Softmax Layer

In [None]:
# softmax layer

class softmax_layer:
  def __init__(self):
    self.Z = np.array([])
    self.softmax = np.array([])

  def forward_pass(self, Z):
    self.Z = Z
    self.softmax = np.exp(Z)
    row_sum_of_exponentials = np.array([[i] for i in self.softmax.sum(axis = 1)])
    self.softmax = self.softmax / row_sum_of_exponentials
    return self.softmax

  def backward_pass(self, dL_dSoftmax):
    dL_dZ = []
    for i in range(len(dL_dSoftmax)):
        cols = len(self.Z[0])
        A = np.zeros([cols, cols])
        S = self.softmax

        for j in range(cols):
            for k in range(cols):
                if j == k:
                    A[j][k] = S[i][j]*(1-S[i][k])
                else:
                    A[j][k] = -S[i][j]*S[i][k]
                    
        dL_dZ.append(np.dot(dL_dSoftmax[i], A))
    return np.array(dL_dZ)

### Sigmoid Layer

In [None]:
# sigmoid layer

class sigmoid_layer:
  def __init__(self):
    self.Z = np.array([]) # Z = N + B
    self.sigmoid = np.array([])

  def forward_pass(self, Z):
    self.Z = Z
    self.sigmoid = np.exp(-1 * Z)
    self.sigmoid = 1 / (1 + self.sigmoid)
    return self.sigmoid

  def backward_pass(self, dL_dSigmoid):
    #Z = N + B
    dL_dZ = self.sigmoid * (1 - self.sigmoid) * dL_dSigmoid
    return dL_dZ

### Mean Squared Loss Layer

In [None]:
#mean squared loss

class mse_layer:
  def __init__(self):
    self.P = np.array([]) # predicted values
    self.Y = np.array([]) # actual values

  def forward_pass(self, P, Y):
    self.P = P
    self.Y = Y
    mse = np.dot((self.P - self.Y).T, self.P - self.Y)
    return mse

  def backward_pass(self):
    dL_dP = self.P - self.Y
    return dL_dP

### Cross Entropy Loss Layer

In [None]:
# cross entropy loss layer

class cross_entropy_loss_layer:
  def __init__(self):
    self.P = np.array([]) #predictions
    self.Y = np.array([]) #actual labels

  def forward_pass(self, P, Y):
    self.P = P
    self.Y = Y
    # finding loss for each row and then adding the lossed of all the rows
    cross_entropy_loss = (-1 * Y * np.log(P)).sum(axis = 1).sum(axis = 0)
    return np.array([[cross_entropy_loss]])

  def backward_pass(self):
    #dL_dP
    dL_dP = -self.Y / self.P
    return dL_dP

### Tanh Activation Layer

In [None]:
#tanh layer

class tanh_layer:
    def __init__(self):
        self.Z = np.array([])

    def forward_pass(self,Z):
        self.Z = Z
        return np.tanh(Z)


    def backward_pass(self, outGradient):
        exp_z = np.exp(self.Z)
        exp_neg_z = np.exp(-self.Z)
        return (4 / (exp_z + exp_neg_z) **2) * outGradient


Implement the convolution layer for 1 channel input and (n >= 1) channel output. Implement both forward and backward passes. Implement the flatten operation
https://towardsdatascience.com/forward-and-backward-propagation-in-convolutional-neural-networks-64365925fdfa

##Convolution Layer (generalized), to create a layer with 1 channel input, simply pass C=1 during object creation.

In [None]:
lr = 0.001
class convolution_layer:
  #H : height inputs, W: width inputs, HH:height filter, WW: width filter
  def __init__(self, H, W, F, C, HH, WW):
    self.N = 1 # num of input images
    self.H = H # height of input image
    self.W = W # width of input image
    self.F = F #is same as num of output channels / number of filters
    self.C = C # number of input channels
    self.HH = HH # height of kernel
    self.WW = WW #width of kernel
    # self.x = X.reshape(self.N, self.C, self.H, self.W)
    self.w = np.random.randn(F, C, HH, WW)
    self.b = np.random.randn(F,)
    self.stride = 1
    self.pad = 1
    self.x = np.array([])

  def forward_pass(self, x):
    x = x.reshape(self.N, self.C, self.H, self.W)
    self.x = x
    H_output = int(1 + (self.H + 2 * self.pad - self.HH)/self.stride)
    W_output = int(1 + (self.W + 2 * self.pad - self.WW)/self.stride)
    output = np.zeros((self.N, self.F, H_output, W_output))

    if self.pad != 0:
      padded_x = np.pad(x, [(0,), (0,), (self.pad,), (self.pad,)], "constant")
    else:
      padded_x = x.copy()

    for n in range(self.N):
      for f in range(self.F):
        for i in range(H_output):
          for j in range(W_output):
            output[n, f, i, j] = np.sum( padded_x[n, :, i*self.stride:i*self.stride+self.HH, j*self.stride : j*self.stride + self.WW] * self.w[f] ) + self.b[f]
    return output

  def backward_pass(self, derivative_out):
    _, _, height_out, weight_out = derivative_out.shape  # For output feature maps

    # Preparing gradients for output
    dx = np.zeros_like(self.x)
    dw = np.zeros_like(self.w)
    db = np.zeros_like(self.b)
    x_padded = np.pad(self.x, ((0, 0), (0, 0), (self.pad, self.pad), (self.pad, self.pad)), mode='constant', constant_values=0)
    dx_padded = np.pad(dx, ((0, 0), (0, 0), (self.pad, self.pad), (self.pad, self.pad)), mode='constant', constant_values=0)

    for n in range(self.N):
        for f in range(self.F):
            for i in range(0, self.H, self.stride):
                for j in range(0, self.W, self.stride):
                    dx_padded[n, :, i:i+self.HH, j:j+self.WW] += self.w[f, :, :, :] * derivative_out[n, f, i, j]
                    dw[f, :, :, :] += x_padded[n, :, i:i+self.HH, j:j+self.WW] * derivative_out[n, f, i, j]
                    db[f] += derivative_out[n, f, i, j]

    dx = dx_padded[:, :, 1:-1, 1:-1]
    #weights and biases updation
    self.w = self.w - (lr * dw)
    self.b = self.b - (lr * db)
    return dx

Train this CNN on mnist dataset. Layer 1: Convolution layer with 16 out-
put channels+flatten+tanh activation. Layer 2: 10 output neuron with linear
activation. Softmax cross entropy loss

Convolution Layer --> Flatten Layer --> Tanh Activation --> Matrix Multiplication / Dense Layer --> Bias Addition --> Softmax Activation --> Cross Entropy Loss Layer

In [None]:
# loading dataset
from sklearn.datasets import load_digits
X, Y_mnist = load_digits(return_X_y=True) #(training data, label)
print(X.shape)
num_of_features = len(X[0])
num_of_labels = 10
print(num_of_features)
# one hot encoding on true labels
Y = np.zeros((len(X), num_of_labels))
for i in range(len(X)):
  Y[i][Y_mnist[i]] = 1
print("Y =", Y.shape)

(1797, 64)
64
Y = (1797, 10)


In [None]:
# def sgd(X, Y, num_of_iterations, model)
def sgd_q6(X, Y, num_of_iterations):
  loss_values = []

  #defining the model
  layer1 = convolution_layer(8, 8, 16, 1, 3, 3)
  layer2 = flatten()
  layer3 = tanh_layer()
  layer4 = matrix_multiplication_layer(10, 64*16)
  layer5 = bias_addition_layer(10)
  layer6 = softmax_layer()
  layer7 = cross_entropy_loss_layer()

  #forward pass
  def model_forward_pass(curr_training_input, curr_training_label):
    convo_output = layer1.forward_pass(curr_training_input)
    flatten_output = layer2.forward_pass(convo_output)
    tanh_output = layer3.forward_pass(flatten_output)

    tanh_output = tanh_output.reshape(1, tanh_output.shape[0])

    N = layer4.forward_pass(tanh_output)
    B = layer5.forward_pass(N)
    P = layer6.forward_pass(B)  
    L = layer7.forward_pass(P, curr_training_label)
    return P, L

  def model_backward_pass():
    dL_dP = layer7.backward_pass()
    dL_dB = layer6.backward_pass(dL_dP)
    dL_dN = layer5.backward_pass(dL_dB)
    dL_dT = layer4.backward_pass(dL_dN)
    dL_dF = layer3.backward_pass(dL_dT)
    dL_dC = layer2.backward_pass(dL_dF)
    dL_dX = layer1.backward_pass(dL_dC)
    
  # training the model
  for iter in range(num_of_iterations):
    print("iter =", iter)
    for j in range(len(X)): # taking each sample one by one
      curr_training_input = np.array([X[j]])
      curr_training_label = np.array([Y[j]])

      _, loss = model_forward_pass(curr_training_input, curr_training_label)
      model_backward_pass()
    
    
  #calculating the overall loss after updation of weights
  for j in range(len(X)): # taking each sample one by one
    curr_training_input = np.array([X[j]])
    curr_training_label = np.array([Y[j]])

    _, loss = model_forward_pass(curr_training_input, curr_training_label)
    loss_values.append(loss[0][0])
  print("LOSS VALUES =")
  print(loss_values[:5])
  print(loss_values[-6:-1])

  print("Training done.")  

In [None]:
sgd_q6(X,Y,2)

iter = 0
iter = 1
LOSS VALUES =
[0.005572199435324734, 0.005554576308231158, 0.2674621561841253, 0.014307564291114407, 0.017306181611098245]
[0.01334367980728656, 0.027961862761544786, 0.0078003480787037964, 0.02631689970462639, 0.07024985350835393]
Training done.
