<a href="https://colab.research.google.com/github/rujuldwivedi/Projects/blob/main/NumPy_Models/LabAssignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [61]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing , load_iris, load_digits
from sklearn.preprocessing import Normalizer,OneHotEncoder
from sklearn.model_selection import train_test_split
from tqdm import trange
import matplotlib.pyplot as plt

In [62]:
class MultiplicationLayer :
    def __init__(self, X, W) :
        self.X = X
        self.W = W

    def __str__(self,):
        return " An instance of Muliplication Layer."

    def forward(self):
        self.Z = np.dot(self.X, self.W)

    def backward(self):
        self.dZ_dW = (self.X).T
        self.dZ_daZ_prev = self.W

In [63]:
class BiasAdditionLayer :
    def __init__(self, Z : np.ndarray , bias : np.ndarray ):
        self.B = bias
        self.Z = Z

    def __str__(self,):
        return "An instance of Bias Addition Layer."

    def forward(self,):
        self.Z = self.Z + self.B

    def backward(self,):
        self.dZ_dB = np.identity( self.B.shape[1] )

In [64]:
class MeanSquaredLossLayer :
    def __init__(self, Y : np.ndarray , Y_hat : np.ndarray):
        self.Y = Y
        self.aZ = Y_hat

    def __str__(self,):
        return "An instance of Mean Squared Loss Layer"

    def forward(self, ):
        self.L = np.mean( ( self.aZ - self.Y)**2 )

    def backward(self,):
        self.dL_daZ = (2/len(self.Y))*(self.aZ - self.Y).T

In [65]:
class SoftMaxActivation :
    def __init__(self, Z):
        self.Z = Z

    def __str__(self,):
        return "An instance of Softmax Activation Layer"

    def forward(self,):
        self.aZ = self.softmax(self.Z)

    def backward(self,):
        self.daZ_dZ = np.diag( self.aZ.reshape(-1) ) - (self.aZ.T)@( (self.aZ))

    @staticmethod
    def softmax(Z : np.ndarray):
        max_Z = np.max( Z, axis=1 ,keepdims=True )
        return (np.exp(Z - max_Z ))/np.sum( np.exp(Z - max_Z), axis=1 , keepdims=True)

In [66]:
class SigmoidActivation :
    def __init__(self,Z ):
        self.Z = Z

    def __str__(self,):
        return "An instance of Sigmoid Activation Layer"

    def forward(self,):
        self.aZ = self.sigmoid( self.Z )

    def backward(self,):
        diag_entries = np.multiply(self.aZ, 1-self.aZ).reshape(-1)
        self.daZ_dZ = np.diag(diag_entries)

    @staticmethod
    def sigmoid( Z : np.ndarray ) :
        return  1./(1 + np.exp(-Z) )

In [67]:
class CrossEntropyLossLayer :
    def __init__(self, Y , Y_pred):
        self.Y = Y
        self.aZ = Y_pred
        self.epsilon = 1e-40


    def __str__(self, ):
        return "An instance of Cross Entropy Loss Layer"

    def forward(self, ):
        self.L = - np.sum( self.Y * np.log(self.aZ+self.epsilon) )

    def backward(self, ):
        self.dL_daZ = -1*(self.Y/(self.aZ + self.epsilon)).T

In [68]:
class LinearActivation :
    def __init__(self, Z):
        self.Z = Z

    def __str__(self,):
        return "An instance of Linear Activation."

    def forward(self, ):
        self.aZ = self.Z

    def backward(self,):
        self.daZ_dZ = np.identity( self.Z.shape[1] )

In [69]:
class tanhActivation:
    def __init__(self, Z):
        self.Z = Z

    def __str__(self,):
        return "An instance of tanhActivation class."

    def forward(self,):
        self.aZ = np.tanh(self.Z)

    def backward(self,):
        self.daZ_dZ = np.diag(1 - self.aZ.reshape(-1)**2)

In [70]:
class ReLUActivation:
    def __init__(self, Z):
        self.Z = Z
        self.Leak = 0.01

    def __str__(self,):
        return "An instance of ReLU activation"

    def forward(self,):
        self.aZ = np.maximum(self.Z,0)

    def backward(self,):
        self.daZ_dZ = np.diag( [1. if x>=0 else self.Leak for x in self.aZ.reshape(-1)])

In [71]:
def load_data(dataset_name='california',
             normalize_X=False,
             normalize_y=False,
             one_hot_encode_y = False,
             test_size=0.2):
    if dataset_name == 'california' :
        data = fetch_california_housing()
    elif dataset_name == 'iris' :
        data = load_iris()
    elif dataset_name == 'mnist':
        data = load_digits()
        data['data'] = 1*(data['data']>=8)

    X = data['data']
    y = data['target'].reshape(-1,1)

    if normalize_X == True :
        normalizer = Normalizer()
        X  = normalizer.fit_transform(X)

    if normalize_y == True :
        normalizer = Normalizer()
        y = normalizer.fit_transform(y)

    if one_hot_encode_y == True:
        encoder = OneHotEncoder()
        y = encoder.fit_transform(y).toarray()

    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=test_size)
    return X_train, y_train, X_test, y_test

In [72]:
class Layer:
    def __init__(self, n_inp, n_out, activation_name="linear", seed=42):
        np.random.seed(seed)

        self.n_inp = n_inp
        self.n_out = n_out

        self.X = np.random.random((1, n_inp))
        self.Z = np.random.random((1, n_out))

        self.W = np.random.random((n_inp, n_out)) * \
            np.sqrt(2 / (n_inp + n_out))
        self.B = np.random.random((1, n_out))*np.sqrt(2 / (1 + n_out))

        self.multiply_layer = MultiplicationLayer(self.X, self.W)
        self.bias_add_layer = BiasAdditionLayer(self.B, self.B)

        if activation_name == 'linear':
            self.activation_layer = LinearActivation(self.Z)
        elif activation_name == 'sigmoid':
            self.activation_layer = SigmoidActivation(self.Z)
        elif activation_name == 'softmax':
            self.activation_layer = SoftMaxActivation(self.Z)
        elif activation_name == 'tanh':
            self.activation_layer = tanhActivation(self.Z)
        elif activation_name == 'relu':
            self.activation_layer = ReLUActivation(self.Z)

    def forward(self,):
        self.multiply_layer.X = self.X
        self.multiply_layer.forward()

        self.bias_add_layer.Z = self.multiply_layer.Z
        self.bias_add_layer.forward()

        self.activation_layer.Z = self.bias_add_layer.Z
        self.activation_layer.forward()

        self.Z = self.activation_layer.aZ

    def backward(self,):
        self.activation_layer.backward()
        self.bias_add_layer.backward()
        self.multiply_layer.backward()

In [73]:
class NeuralNetwork(Layer):
    def __init__(self, layers, loss_name="mean_squared", learning_rate=0.01, seed=42):
        np.random.seed(seed)

        self.layers = layers
        self.n_layers = len(layers)
        self.learning_rate = learning_rate

        self.inp_shape = self.layers[0].X.shape
        self.out_shape = self.layers[-1].Z.shape

        self.X = np.random.random(self.inp_shape)
        self.Y = np.random.random(self.out_shape)

        if loss_name == "mean_squared":
            self.loss_layer = MeanSquaredLossLayer(self.Y, self.Y)
        if loss_name == "cross_entropy":
            self.loss_layer = CrossEntropyLossLayer(self.Y, self.Y)

    def forward(self,):
        self.layers[0].X = self.X
        self.loss_layer.Y = self.Y

        self.layers[0].forward()
        for i in range(1, self.n_layers):
            self.layers[i].X = self.layers[i-1].Z
            self.layers[i].forward()

        self.loss_layer.aZ = self.layers[-1].Z
        self.loss_layer.forward()

    def backward(self,):

        self.loss_layer.Z = self.Y
        self.loss_layer.backward()
        self.grad_nn = self.loss_layer.dL_daZ
        for i in range(self.n_layers-1, -1, -1):
            self.layers[i].backward()

            dL_dZ = np.dot(
                self.layers[i].activation_layer.daZ_dZ, self.grad_nn)
            dL_dW = np.dot(self.layers[i].multiply_layer.dZ_dW, dL_dZ.T)
            dL_dB = np.dot(self.layers[i].bias_add_layer.dZ_dB, dL_dZ).T

            self.layers[i].W -= self.learning_rate*dL_dW
            self.layers[i].B -= self.learning_rate*dL_dB

            self.grad_nn = np.dot(
                self.layers[i].multiply_layer.dZ_daZ_prev, dL_dZ)

            del dL_dZ, dL_dW, dL_dB

In [74]:
def createLayers(inp_shape, layers_sizes, layers_activations):
    layers = []
    n_layers = len(layers_sizes)
    layer_0 = Layer(inp_shape, layers_sizes[0], layers_activations[0])
    layers.append(layer_0)
    inp_shape_next = layers_sizes[0]
    for i in range(1, n_layers):
        layer_i = Layer(inp_shape_next, layers_sizes[i], layers_activations[i])
        layers.append(layer_i)
        inp_shape_next = layers_sizes[i]

    out_shape = inp_shape_next
    return inp_shape, out_shape, layers

In [75]:
def SGD_NeuralNetwork(X_train,
                      y_train,
                      X_test,
                      y_test,
                      nn,
                      inp_shape=1,
                      out_shape=1,
                      n_iterations=1000,
                      task="regression"
                      ):
    iterations = trange(n_iterations, desc="Training ...", ncols=100)

    for iteration, _ in enumerate(iterations):
        randomIndx = np.random.randint(len(X_train))

        X_sample = X_train[randomIndx, :].reshape(1, inp_shape)
        Y_sample = y_train[randomIndx, :].reshape(1, out_shape)

        nn.X = X_sample
        nn.Y = Y_sample

        nn.forward()
        nn.backward()

    if task == "regression":

        nn.X = X_train
        nn.Y = y_train
        nn.forward()
        train_error = nn.loss_layer.L

        nn.X = X_test
        nn.Y = y_test
        nn.forward()
        test_error = nn.loss_layer.L

        if isinstance(nn.loss_layer, MeanSquaredLossLayer):
            print("Mean Squared Loss Error (Train Data)  : %0.5f" % train_error)
            print("Mean Squared Loss Error (Test Data)  : %0.5f" % test_error)

    if task == "classification":

        nn.X = X_train
        nn.Y = y_train
        nn.forward()
        y_true = np.argmax(y_train, axis=1)
        y_pred = np.argmax(nn.loss_layer.aZ, axis=1)
        acc = 1*(y_true == y_pred)
        print("Classification Accuracy (Training Data ): {0}/{1} = {2} %".format(
            sum(acc), len(acc), sum(acc)*100/len(acc)))

        nn.X = X_test
        nn.Y = y_test
        nn.forward()
        y_true = np.argmax(y_test, axis=1)
        y_pred = np.argmax(nn.loss_layer.aZ, axis=1)
        acc = 1*(y_true == y_pred)
        print("Classification Accuracy (Testing Data ): {0}/{1} = {2} %".format(
            sum(acc), len(acc), sum(acc)*100/len(acc)))


In [76]:
X_train, y_train, X_test, y_test = load_data('california', normalize_X=True, normalize_y=False, test_size=0.2)


In [77]:
inp_shape = X_train.shape[1]
layers_sizes = [1]
layers_activations = ['linear']

inp_shape, out_shape, layers = createLayers(inp_shape, layers_sizes, layers_activations)
loss_nn = 'mean_squared'

nn = NeuralNetwork(layers, loss_nn, learning_rate=0.1)

SGD_NeuralNetwork(X_train,y_train,X_test,y_test,nn,inp_shape, out_shape,n_iterations=10000,task="regression")

Training ...: 100%|████████████████████████████████████████| 10000/10000 [00:00<00:00, 12921.16it/s]

Mean Squared Loss Error (Train Data)  : 1.29365
Mean Squared Loss Error (Test Data)  : 1.37316





In [78]:
inp_shape = X_train.shape[1]
layers_sizes = [13,1]
layers_activations = ['sigmoid','linear']

inp_shape, out_shape, layers = createLayers(inp_shape, layers_sizes, layers_activations)
loss_nn = 'mean_squared'

nn = NeuralNetwork(layers, loss_nn, learning_rate=0.01)

SGD_NeuralNetwork(X_train,y_train,X_test,y_test,nn,inp_shape, out_shape,n_iterations=1000,task="regression")

Training ...: 100%|███████████████████████████████████████████| 1000/1000 [00:00<00:00, 4942.31it/s]

Mean Squared Loss Error (Train Data)  : 1.33544
Mean Squared Loss Error (Test Data)  : 1.41885





In [79]:
inp_shape = X_train.shape[1]
layers_sizes = [13,13,1]
layers_activations = ['sigmoid','sigmoid','linear']

inp_shape, out_shape, layers = createLayers(inp_shape, layers_sizes, layers_activations)
loss_nn = 'mean_squared'

nn = NeuralNetwork(layers, loss_nn, learning_rate=0.001)

SGD_NeuralNetwork(X_train,y_train,X_test,y_test,nn,inp_shape, out_shape,n_iterations=1000,task="regression")

Training ...: 100%|███████████████████████████████████████████| 1000/1000 [00:00<00:00, 3488.39it/s]


Mean Squared Loss Error (Train Data)  : 1.31529
Mean Squared Loss Error (Test Data)  : 1.39820


In [80]:
X_train, y_train, X_test, y_test = load_data('mnist', one_hot_encode_y=True, test_size=0.3)

In [81]:
inp_shape = X_train.shape[1]
layers_sizes = [89,10]
layers_activations = ['tanh','sigmoid']

inp_shape, out_shape, layers = createLayers(inp_shape, layers_sizes, layers_activations)
loss_nn = 'mean_squared'

nn = NeuralNetwork(layers, loss_nn, learning_rate=0.1)

SGD_NeuralNetwork(X_train,y_train,X_test,y_test,nn,inp_shape, out_shape,n_iterations=10000,task="classification")

Training ...: 100%|█████████████████████████████████████████| 10000/10000 [00:02<00:00, 4317.27it/s]

Classification Accuracy (Training Data ): 1188/1257 = 94.5107398568019 %
Classification Accuracy (Testing Data ): 482/540 = 89.25925925925925 %





In [82]:
inp_shape = X_train.shape[1]
layers_sizes = [89,10]
layers_activations = ['tanh','softmax']

inp_shape, out_shape, layers = createLayers(inp_shape, layers_sizes, layers_activations)
loss_nn = 'cross_entropy'

nn = NeuralNetwork(layers, loss_nn, learning_rate=0.01)

SGD_NeuralNetwork(X_train,y_train,X_test,y_test,nn,inp_shape, out_shape,n_iterations=10000,task="classification")

Training ...: 100%|█████████████████████████████████████████| 10000/10000 [00:01<00:00, 5216.58it/s]

Classification Accuracy (Training Data ): 1203/1257 = 95.70405727923628 %
Classification Accuracy (Testing Data ): 496/540 = 91.85185185185185 %





In [83]:
def convolutional_layer(zero_pad_input, l_filter):
    inp = zero_pad_input
    l = len(inp)
    m = len(l_filter)
    c = len(zero_pad_input)
    s = (c - m) + 1
    out = np.zeros((l, l))

    for i in range(s):
        for j in range(s):
            temp = np.zeros((m, m))
            row, col = np.indices((m, m))
            temp = np.multiply(zero_pad_input[row+i, col+j], l_filter)

            out[i][j] = np.sum(temp)
    return out

def Forward_pass(inp, l_filter):
    l = len(inp)
    zero_pad_input = np.zeros((l+2, l+2))
    zero_pad_input[1:l+1, 1:l+1] = inp

    f_out = convolutional_layer(zero_pad_input, l_filter)
    return f_out

def rotateMatrix(mat):
    N = len(mat)
    rot_mat = np.zeros((N, N))
    k = N - 1
    t1 = 0
    while (k >= 0 and t1 < 3):
        j = N - 1
        t2 = 0
        while (j >= 0 and t2 < N):
            rot_mat[t1][t2] = mat[k][j]
            j = j - 1
            t2 = t2 + 1
        k = k - 1
        t1 = t1 + 1

    return rot_mat

def Backward_pass(inp, output, l_filter):
    l = len(inp)
    zero_pad_input = np.zeros((l+2, l+2))
    zero_pad_input[1:l+1, 1:l+1] = inp

    grad_filter = convolutional_layer(zero_pad_input, output)

    rotated_filter = rotateMatrix(l_filter)
    zero_pad_output = np.zeros((l+2, l+2))
    zero_pad_output[1:l+1, 1:l+1] = output
    grad_X = convolutional_layer(zero_pad_output, rotated_filter)

    return grad_filter, grad_X

def flatten(inp_mat):
    flatten_vector = []

    for i in range(len(inp_mat)):
        for j in range(len(inp_mat[0])):
            flatten_vector.append(inp_mat[i][j])

    flatten_vector = np.array(flatten_vector)
    return flatten_vector

In [84]:
class ConvolutionalLayer:
    def __init__(self, inp_shape, activation='tanh', filter_shape=(1, 1), lr=0.01, Co=1, seed=42):

        inp = np.random.rand(*inp_shape)
        np.random.seed(seed)
        assert (inp_shape[1] >= filter_shape[0] and inp_shape[2] >= filter_shape[1]), \
            "Error : Input {} incompatible with filter {}".format(
                inp.shape, filter_shape)

        self.inp = np.random.rand(*inp_shape)
        self.inp_shape = inp_shape

        self.Ci = self.inp.shape[0]
        self.Co = Co
        self.filters_shape = (self.Co, self.Ci,  *filter_shape)
        self.out_shape = (self.Co, self.inp.shape[1] - filter_shape[0] + 1, self.inp.shape[2] - filter_shape[1] + 1)
        self.flatten_shape = np.prod(self.out_shape)
        self.lr = lr

        self.filters = np.random.rand(*self.filters_shape)
        self.biases = np.random.rand(*self.out_shape)
        self.out = np.random.rand(*self.out_shape)
        self.flatten_out = np.random.rand(1, self.flatten_shape)

        if activation == 'tanh':
            self.activation_layer = tanhActivation(self.out)

    def forward(self, ):
        self.out = np.copy(self.biases)
        for i in range(self.Co):
            for j in range(self.Ci):
                self.out[i] += self.convolve(self.inp[j], self.filters[i, j])

        self.flatten()
        self.activation_layer.Z = self.flatten_out
        self.activation_layer.forward()

    def backward(self, grad_nn):

        self.activation_layer.backward()
        loss_gradient = np.dot(self.activation_layer.daZ_dZ, grad_nn)
        loss_gradient = np.reshape(loss_gradient, self.out_shape)

        self.filters_gradient = np.zeros(self.filters_shape)
        self.input_gradient = np.zeros(self.inp_shape)
        self.biases_gradient = loss_gradient
        padded_loss_gradient = np.pad(loss_gradient, ((0, 0), (self.filters_shape[2]-1, self.filters_shape[2]-1), (self.filters_shape[3]-1, self.filters_shape[3]-1)))

        for i in range(self.Co):
            for j in range(self.Ci):
                self.filters_gradient[i, j] = self.convolve(self.inp[j], loss_gradient[i])
                rot180_Kij = np.rot90(np.rot90(self.filters[i, j], axes=(0, 1)), axes=(0, 1))
                self.input_gradient[j] += self.convolve(padded_loss_gradient[i], rot180_Kij)

        self.filters -= self.lr*self.filters_gradient
        self.biases -= self.lr*self.biases_gradient

    def flatten(self, ):
        self.flatten_out = self.out.reshape(1, -1)
        x_conv_y = np.zeros((x.shape[0] - y.shape[0] + 1, x.shape[1] - y.shape[1] + 1))
        for i in range(x.shape[0]-y.shape[0] + 1):
            for j in range(x.shape[1] - y.shape[1] + 1):
                tmp = x[i:i+y.shape[0], j:j+y.shape[1]]
                tmp = np.multiply(tmp, y)
                x_conv_y[i, j] = np.sum(tmp)
        return x_conv_y


In [85]:
class CNN :
    def __init__(self,  convolutional_layer, nn, seed = 42):
        self.nn = nn
        self.convolutional_layer = convolutional_layer
        self.X = np.random.rand(*self.convolutional_layer.inp_shape)
        self.Y = np.random.rand(*self.nn.out_shape)

    def forward(self,):
        self.convolutional_layer.inp = self.X
        self.convolutional_layer.forward()

        self.nn.X = self.convolutional_layer.activation_layer.aZ
        self.nn.Y = self.Y
        self.nn.forward()

    def backward(self,):
        self.nn.backward()
        self.convolutional_layer.backward( self.nn.grad_nn )

In [89]:
def SGD_CNN(X_train,
            y_train,
            X_test,
            y_test,
            cnn,
            inp_shape,
            out_shape,
            n_iterations=1000,
            task="classification"): # This function is used to train the convolutional neural network model using stochastic gradient descent

    iterations = trange(n_iterations, desc="Training ...", ncols=100) # progress bar

    for iteration, _ in enumerate(iterations): # train the model for each iteration
        randomIndx = np.random.randint(len(X_train)) # randomly choose a sample subset of the data
        X_sample = X_train[randomIndx, :].reshape(inp_shape) # input data
        Y_sample = y_train[randomIndx, :].reshape(out_shape) # output data

        cnn.X = X_sample # initialize the input data to the sample subset of the data
        cnn.Y = Y_sample # initialize the output data to the sample subset of the data

        cnn.forward()  # Forward Pass
        cnn.backward()  # Backward Pass

    # We'll run only forward pass for train and test data and check accuracy/error because we have already updated the weights and biases in the backward pass

    if task == "classification": # check the accuracy for classification problems
        X_train = X_train.reshape(-1, 8, 8) # reshape the input data
        y_true = np.argmax(y_train, axis=1) # true output
        acc = 0 # accuracy
        for i in range(len(X_train)): # for each sample in the training data
            cnn.X = X_train[i][np.newaxis, :, :] # input data
            cnn.Y = y_train[i] # true output
            cnn.forward() # forward pass of the convolutional neural network
            y_pred_i = np.argmax(cnn.nn.loss_layer.aZ, axis=1) # predicted output
            if (y_pred_i == y_true[i]): # check if the predicted output is equal to the true output
                acc += 1 # increment the accuracy

        print("Classification Accuracy (Training Data ):" + str(acc) + "/" + str(len(y_true)) + " = " + str(acc*100/len(y_true)) + " %" ) #str is used to convert the output to a string

        X_test = X_test.reshape(-1, 8, 8) # reshape the input data
        y_true = np.argmax(y_test, axis=1) # true output
        acc = 0 # accuracy
        for i in range(len(X_test)): # for each sample in the testing data
            cnn.X = X_test[i][np.newaxis, :, :] # input data
            cnn.Y = y_test[i] # true output
            cnn.forward() # forward pass of the convolutional neural network
            y_pred_i = np.argmax(cnn.nn.loss_layer.aZ, axis=1) # predicted output
            if (y_pred_i == y_true[i]): # check if the predicted output is equal to the true output
                acc += 1 # increment the accuracy

        print("Classification Accuracy (Testing Data ):" + str(acc) + "/" + str(len(y_true)) + " = " + str(acc*100/len(y_true)) + " %" ) #str is used to convert the output to a string


In [90]:
X_train, y_train, X_test, y_test = load_data('mnist', one_hot_encode_y=True)


In [91]:
conv_inp_shape = (1,8,8)
Co = 16
conv_filter_shape = (3,3)
conv_activation = 'tanh'
convolutional_layer = ConvolutionalLayer(conv_inp_shape,  filter_shape = conv_filter_shape,  Co = Co,  activation = conv_activation, lr = 0.01)
nn_inp_shape = convolutional_layer.flatten_shape
layers_sizes = [10]
layers_activations = ['softmax']

nn_inp_shape, nn_out_shape, layers = createLayers(nn_inp_shape, layers_sizes, layers_activations)
loss_nn = 'cross_entropy'

nn = NeuralNetwork(layers, loss_nn, learning_rate=0.01)

cnn = CNN( convolutional_layer, nn)
out_shape =  (1, layers_sizes[-1])

SGD_CNN(X_train,y_train,X_test,y_test, cnn,conv_inp_shape, out_shape,n_iterations=5000)

Training ...:   0%|                                                        | 0/5000 [00:00<?, ?it/s]


AttributeError: 'ConvolutionalLayer' object has no attribute 'convolve'