In [280]:
import numpy as np
from matplotlib import pyplot as plt
import torch

In [281]:
def linear(x, w, b):
        return np.dot(x, w) + b

In [282]:

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def tanh(x):
    return np.tanh(x)

def softmax(x):
    return np.exp(x)/np.sum(np.exp(x), axis=0)

In [283]:
def linear_backward(dz, x, w, b):
    #dz is the gradient of the loss function with respect to the output of the linear layer which happens to be same as the input of the activation function
    dw = np.dot(x.T, dz)
    db = np.sum(dz, axis=0)
    dx = np.dot(dz, w.T)
    return dx, dw, db

def activation_backward(dz, x, activation):
    if activation == 'sigmoid':
        print(dz.shape, sigmoid(x).shape)
        return dz*sigmoid(x)*(1-sigmoid(x))
    elif activation == 'relu':
        print(dz.shape, relu(x).shape)
        return np.where(x>0, dz, 0)
    elif activation == 'tanh':
        print(dz.shape, tanh(x).shape)
        return 1 - np.tanh(x)**2
    elif activation == 'softmax':
        print(dz.shape, softmax(x).shape)
        return dz*softmax(x)*(1-softmax(x))

In [284]:

class optimiser:
    #This will take care of the collection of optimisation algorithms that can be used to update the weights and biases of the neural network. Later, we will compare the performance of different optimisation algorithms on the same neural network and make some conclusions.
    def __init__(self, learning_rate):
        self.learning_rate = learning_rate

    def gradient_descent(self, weights, biases, dw, db):
        weights -= self.learning_rate*dw
        biases -= self.learning_rate*db
        return weights, biases
    
    def momentum(self, weights, biases, dw, db, beta):
        v_w = beta*v_w + (1-beta)*dw
        v_b = beta*v_b + (1-beta)*db
        weights -= self.learning_rate*v_w
        biases -= self.learning_rate*v_b
        return weights, biases
    
    def rmsprop(self, weights, biases, dw, db, beta):
        s_w = beta*s_w + (1-beta)*dw**2
        s_b = beta*s_b + (1-beta)*db**2
        weights -= self.learning_rate*dw/np.sqrt(s_w + 1e-8)
        biases -= self.learning_rate*db/np.sqrt(s_b + 1e-8)
        return weights, biases
    
    def adam(self, weights, biases, dw, db, beta1, beta2):
        v_w = beta1*v_w + (1-beta1)*dw
        v_b = beta1*v_b + (1-beta1)*db
        s_w = beta2*s_w + (1-beta2)*dw**2
        s_b = beta2*s_b + (1-beta2)*db**2
        v_w_corrected = v_w/(1-beta1)
        v_b_corrected = v_b/(1-beta1)
        s_w_corrected = s_w/(1-beta2)
        s_b_corrected = s_b/(1-beta2)
        weights -= self.learning_rate*v_w_corrected/np.sqrt(s_w_corrected + 1e-8)
        biases -= self.learning_rate*v_b_corrected/np.sqrt(s_b_corrected + 1e-8)
        return weights, biases
    

class plot:
    #This class will take care of the plotting of the neural network. This class will have the following methods:
    #1. plot_loss : This will plot the loss function of the neural network
    #2. plot_accuracy : This will plot the accuracy of the neural network
    #3. plot : This will plot both the loss function and the accuracy of the neural network
    #4. plot_decision_boundary : This will plot the decision boundary of the neural network. This will be useful for the classification problems. This will help us to visualise how the neural network is making the decision on the basis of the input features.
    #5. plot_confusion_matrix : This will plot the confusion matrix of the neural network. This will help us to visualise how the neural network is performing on the basis of the true labels and the predicted labels.
    def __init__(self, x, y, parameters):
        self.x = x
        self.y = y
        self.parameters = parameters

    def plot_loss(self, loss):
        plt.plot(loss)
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title('Loss Function')
        plt.show()

    def plot_accuracy(self, accuracy):
        plt.plot(accuracy)
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.title('Accuracy')
        plt.show()

    def plot(self, loss, accuracy):
        plt.plot(loss, label='Loss')
        plt.plot(accuracy, label='Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Loss/Accuracy')
        plt.title('Loss/Accuracy')
        plt.legend()
        plt.show()

    def plot_decision_boundary(self, x, y, parameters):
        pass

    def plot_confusion_matrix(self, y_true, y_pred):
        pass



def mean_squared_error(y_pred, y_true):
    return np.mean((y_pred - y_true)**2)

def cross_entropy_error(y_pred, y_true):
    return -np.sum(y_true*np.log(y_pred))


def predict(x):
    return np.argmax(x, axis=1)

def accuracy(y_pred, y_true):
    return np.mean(y_pred == y_true)
 

In [285]:
def one_hot_encoding(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

In [286]:

def update_parameters(x, y, weights, biases, a, z, nodes_num, layers_num, activations):
    for i in range(layers_num+1):
        a['a_'+str(i)] = linear(z['z_'+str(i)], weights['w_'+str(i)], biases['b_'+str(i)])
        if activations[i] == 'sigmoid':
            z['z_'+str(i+1)] = sigmoid(a['a_'+str(i)])
        elif activations[i] == 'relu':
            z['z_'+str(i+1)] = relu(a['a_'+str(i)])
        elif activations[i] == 'tanh':
            z['z_'+str(i+1)] = tanh(a['a_'+str(i)])
        elif activations[i] == 'softmax':
            z['z_'+str(i+1)] = softmax(a['a_'+str(i)])
    print('done with update parameters')
    return a, z

def finding_gradients(x, y, weights, biases, a, z, nodes_num, layers_num, activations, optimisers, dw, dz, da, db):
    # Assuming the intention was to calculate the gradient of the loss with respect to z at the last layer
    dz['dz_'+str(layers_num+1)] = z['z_'+str(layers_num+1)] - y
    # If x.shape[0] was meant to be used for normalization or another purpose, it should be assigned or used separately
    batch_size = x.shape[0]  # Example of assigning x.shape[0] to a variable for clarity
    for i in range(layers_num, -1, -1):
        da['da_'+str(i)] = linear_backward(dz['dz_'+str(i+1)], z['z_'+str(i)], weights['w_'+str(i)], biases['b_'+str(i)])[0]
        if i!=0:
            dz['dz_'+str(i)] = activation_backward(da['da_'+str(i)], a['a_'+str(i)], activations[i])
        else:
            dz['dz_'+str(i)] = np.dot(da['da_'+str(i)], weights['w_'+str(i)])
        dw['dw_'+str(i)], db['db_'+str(i)] = linear_backward(dz['dz_'+str(i)], z['z_'+str(i)], weights['w_'+str(i)], biases['b_'+str(i)])[1:]
    return dw, dz, da, db

In [287]:
def train(x, y ,weights, biases, nodes_num, layers_num, activations, loss, optimiser, epochs, learning_rate,a, z, dw, dz, da, db):
    #This function will take care of the training of the neural network. This function will have the following steps:
    #1. Forward pass : This will take care of the forward pass of the neural network. This will take the input features and the initial weights and biases of the neural network and will return the output of the neural network.
    #2. Loss function : This will take care of the loss function of the neural network. This will take the output of the neural network and the true labels and will return the loss of the neural network.
    #3. Backward pass : This will
    #4. Update parameters : This will take care of the updating of the weights and biases of the neural network. This will take the gradients of the weights and biases and will update the weights and biases of the neural network.
    #5. Training loop : This will take care of the training of the neural network. This will take the input features, true labels, initial weights and biases, number of nodes in each layer, number of layers, activation functions, loss function, optimisation algorithm, number of epochs and learning rate as input and will return the trained weights and biases of the neural network.
    #6. Plot : This will plot the loss function and the accuracy of the neural network.
    #7. Return : This will return the trained weights and biases of the neural network.
    
    for i in range(epochs):
        a, z = update_parameters(x, y, weights, biases, a, z, nodes_num, layers_num, activations)
        dw, dz, da, db = finding_gradients(x, y, weights, biases, a, z, nodes_num, layers_num, activations, optimiser, dw, dz, da, db)
        for j in range(layers_num+1):
            weights['w_'+str(j)], biases['b_'+str(j)] = optimiser.gradient_descent(weights['w_'+str(j)], biases['b_'+str(j)], dw['dw_'+str(j)], db['db_'+str(j)])
        loss, accuracy = test(x, y, weights, biases, nodes_num, layers_num, activations, loss, a, z)
        print('Epoch :', i, 'Loss :', loss, 'Accuracy :', accuracy)
    return weights, biases

def test(x, y, weights, biases, nodes_num, layers_num, activations, loss, a, z):
    #This function will take care of the testing of the neural network. This function will take the input features, true labels, trained weights and biases, number of nodes in each layer, number of layers and activation functions as input and will return the loss and accuracy of the neural network.
    for i in range(layers_num+1):
        a['a_'+str(i)] = linear(z['z_'+str(i)], weights['w_'+str(i)], biases['b_'+str(i)])
        if activations[i] == 'sigmoid':
            z['z_'+str(i+1)] = sigmoid(a['a_'+str(i)])
        elif activations[i] == 'relu':
            z['z_'+str(i+1)] = relu(a['a_'+str(i)])
        elif activations[i] == 'tanh':
            z['z_'+str(i+1)] = tanh(a['a_'+str(i)])
        elif activations[i] == 'softmax':
            z['z_'+str(i+1)] = softmax(a['a_'+str(i)])
    loss = cross_entropy_error(z['z_'+str(layers_num+1)], y)
    y_pred = predict(z['z_'+str(layers_num+1)])
    acc = accuracy(y_pred, y)
    return loss, acc


In [288]:

def weights_and_biases_initialisation(x, y, nodes_num, layers_num):
    weights = {}
    biases = {}
    for i in range(layers_num+1):
        print('Layer:', i)
        if i == 0:
            weights['w_0'] = np.random.randn(x.shape[1], nodes_num)
            biases['b_0'] = np.random.randn(1, nodes_num)
        elif i == layers_num:
            weights['w_'+str(i)] = np.random.randn(nodes_num, y.shape[1])
            biases['b_'+str(i)] = np.random.randn(1, y.shape[1])
        else:
            weights['w_'+str(i)] = np.random.randn(nodes_num, nodes_num)
            biases['b_'+str(i)] = np.random.randn(1, nodes_num)
    print('done with weights and biases initialisation')
    return weights, biases

def z_initialisation(x, y, nodes_num, layers_num):
    z={}
    for i in range(layers_num+2):
        print('Layer:', i)
        if i == 0:
            z['z_0'] = x
        else:
            z['z_'+str(i)] = np.zeros((x.shape[0], nodes_num))
    print('done with z initialisation')
    return z


def layer_forward_linear_initialisation(x, y, nodes_num, layers_num):
    a={}

    for i in range(layers_num+1):
        print('Layer:', i)
        a['a_'+str(i)] = np.zeros((x.shape[0], nodes_num)) #we will evaluate in network class using forward class
    print('done with layer forward linear initialisation')
    return a

def activations_initialisation(x, y, nodes_num, layers_num):
    activations = []
    for i in range(layers_num):
        print('Layer:', i)
        activations.append('sigmoid')
    activations.append('softmax')
    print('done with activations initialisation')
    return activations

def dz_initialisation(x, y, nodes_num, layers_num):
    dz = {}
    for i in range(layers_num+1):
        print('Layer:', i)
        dz['dz_'+str(i)] = np.zeros((x.shape[0], nodes_num))
    print('done with dz initialisation')
    return dz

def da_initialisation(x, y, nodes_num, layers_num):
    da = {}
    for i in range(layers_num+1):
        print('Layer:', i)
        da['da_'+str(i)] = np.zeros((x.shape[0], nodes_num))
    print('done with da initialisation')
    return da

def dw_initialisation(x, y, nodes_num, layers_num):
    dw = {}
    for i in range(layers_num+1):
        print('Layer:', i)
        dw['dw_'+str(i)] = np.zeros((x.shape[1], nodes_num))
    print('done with dw initialisation')
    return dw

def db_initialisation(x, y, nodes_num, layers_num):
    db = {}
    for i in range(layers_num+1):
        print('Layer:', i)
        db['db_'+str(i)] = np.zeros((1, nodes_num))
    print('done with db initialisation')
    return db


In [289]:

#Get the MNIST DATASET
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)
x, y = mnist['data'], mnist['target']
x = x/255
y = np.array(y, dtype='int')
y = np.eye(10)[y]
x_train, x_test, y_train, y_test = x[:60000], x[60000:], y[:60000], y[60000:]


In [290]:

#Initialise the parameters
nodes_num = 10
layers_num = 2
learning_rate = 0.01
epochs = 100
weights, biases = weights_and_biases_initialisation(x_train, y_train, nodes_num, layers_num)
a = layer_forward_linear_initialisation(x_train, y_train, nodes_num, layers_num)
z = z_initialisation(x_train, y_train, nodes_num, layers_num)
activations = activations_initialisation(x_train, y_train, nodes_num, layers_num)
dz = dz_initialisation(x_train, y_train, nodes_num, layers_num)
da = da_initialisation(x_train, y_train, nodes_num, layers_num)
dw = dw_initialisation(x_train, y_train, nodes_num, layers_num)
db = db_initialisation(x_train, y_train, nodes_num, layers_num)


Layer: 0
Layer: 1
Layer: 2
done with weights and biases initialisation
Layer: 0
Layer: 1
Layer: 2
done with layer forward linear initialisation
Layer: 0
Layer: 1
Layer: 2
Layer: 3
done with z initialisation
Layer: 0
Layer: 1
done with activations initialisation
Layer: 0
Layer: 1
Layer: 2
done with dz initialisation
Layer: 0
Layer: 1
Layer: 2
done with da initialisation
Layer: 0
Layer: 1
Layer: 2
done with dw initialisation
Layer: 0
Layer: 1
Layer: 2
done with db initialisation


In [291]:
#Train the neural network
weights, biases = train(x_train, y_train, weights, biases, nodes_num, layers_num, activations, cross_entropy_error, optimiser(learning_rate), epochs, learning_rate, a, z, dw, dz, da, db)

#Test the neural network
loss, acc = test(x_test, y_test, weights, biases, nodes_num, layers_num, activations, cross_entropy_error, a, z)
print('Loss:', loss)
print('Accuracy:', acc)

#Plot the loss and accuracy of the neural network
plot = plot(x_train, y_train, weights)
plot.plot_loss(loss)
plot.plot_accuracy(acc)
plot.plot(loss, acc)


done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)


  return 1/(1 + np.exp(-x))


done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)


  return 1/(1 + np.exp(-x))


done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)


  return 1/(1 + np.exp(-x))


done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)


  return 1/(1 + np.exp(-x))


done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)


  return 1/(1 + np.exp(-x))


done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)


  return 1/(1 + np.exp(-x))


done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)


  return 1/(1 + np.exp(-x))


done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)


  return 1/(1 + np.exp(-x))


done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)


  return 1/(1 + np.exp(-x))


done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)


  return 1/(1 + np.exp(-x))


done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with update parameters
(60000, 10) (60000, 10)
(60000, 10) (60000, 10)
done with up

KeyboardInterrupt: 