<a href="https://colab.research.google.com/github/sheldor07/understainding-transformers/blob/main/multilayer-perceptron-from-scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np

# Loading the dataset
fashion_mnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

print(x_train.shape)
print(x_test.shape)
print('----------------')
print(y_train.shape)
print(y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
(60000, 28, 28)
(10000, 28, 28)
----------------
(60000,)
(10000,)


In [2]:
# reshaping the data to fit our input layer

x_train = x_train.reshape(x_train.shape[0],-1)/255.0
x_test = x_test.reshape(x_test.shape[0],-1)/255.0


print(x_train.shape)
print(x_test.shape)

(60000, 784)
(10000, 784)


In [3]:
#one hot encoding for labels
from keras.utils import to_categorical

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
print(y_train.shape)
print(y_test.shape)

(60000, 10)
(10000, 10)


In [4]:
class NN:
    def __init__(self, input_neurons, hidden_neurons, output_neurons, learning_rate, epochs):

        # initializing the instance variables
        self.input_neurons = input_neurons
        self.hidden_neurons = hidden_neurons
        self.output_neurons = output_neurons
        self.epochs = epochs

        # Links of weights from input layer to hidden layer
        self.wih = np.random.normal(0.0, pow(self.input_neurons, -0.5), (self.hidden_neurons, self.input_neurons))
        self.bih = 0

        # Links of weights from hidden layer to output layer
        self.who = np.random.normal(0.0, pow(self.hidden_neurons, -0.5), (self.output_neurons, self.hidden_neurons))
        self.bho = 0

        self.lr = learning_rate # Learning rate

    def activation(self, z):
        """Returns the sigmoid of z"""
        z = np.clip(z, -500, 500) # Avoid overflow error
        return 1 / (1 + np.exp(-z))

    def sigmoid_derivative(self, z):
        """Returns the derivative of the sigmoid of z"""
        return self.activation(z) * (1 - self.activation(z))

    # Forward propagation
    def forward(self, input_list):
        inputs = np.array(input_list, ndmin=2).T

        # Passing inputs to the hidden layer
        hidden_inputs = np.dot(self.wih, inputs) + self.bih

        # Getting outputs from the hidden layer
        hidden_outputs = self.activation(hidden_inputs)

        # Passing inputs from the hidden layer to the output layer
        final_inputs = np.dot(self.who, hidden_outputs) + self.bho

        # Getting output from the output layer
        yj = self.activation(final_inputs)

        return yj


    # Back propagation
    def backprop(self, inputs_list, targets_list):

        inputs = np.array(inputs_list, ndmin=2).T

        tj = np.array(targets_list, ndmin=2).T # Targets
        # passing inputs to the hidden layer
        hidden_inputs = np.dot(self.wih, inputs) + self.bih

        # Getting outputs from the hidden layer
        hidden_outputs = self.activation(hidden_inputs)

        # Passing inputs from the hidden layer to the output layer
        final_inputs = np.dot(self.who, hidden_outputs) + self.bho

        # Getting output from the output layer
        yj = self.activation(final_inputs)

        # Finding the errors from the output layer
        output_errors = -(tj - yj)

        # Finding the error in the hidden layer
        hidden_errors = np.dot(self.who.T, output_errors)

        # Updating the weights using Update Rule
        self.who -= self.lr * np.dot((output_errors * self.sigmoid_derivative(yj)), np.transpose(hidden_outputs))
        self.wih -= self.lr * np.dot((hidden_errors * self.sigmoid_derivative(hidden_outputs)), np.transpose(inputs))


        #updating bias
        self.bho -= self.lr * (output_errors * self.sigmoid_derivative(yj))
        self.bih -= self.lr * (hidden_errors * self.sigmoid_derivative(hidden_outputs))
        pass

    # Performing Gradient Descent Optimization using Backpropagation
    def fit(self, inputs_list, targets_list):
        for epoch in range(self.epochs):
            self.backprop(inputs_list, targets_list)
            print(f"Epoch {epoch}/{self.epochs} completed.")

    def predict(self, X):
        outputs = self.forward(X).T
        return outputs

In [7]:
nn = NN(input_neurons=784, hidden_neurons=64, output_neurons=10, learning_rate=0.01, epochs=100)
nn.fit(x_train, y_train)

Epoch 0/100 completed.
Epoch 1/100 completed.
Epoch 2/100 completed.
Epoch 3/100 completed.
Epoch 4/100 completed.
Epoch 5/100 completed.
Epoch 6/100 completed.
Epoch 7/100 completed.
Epoch 8/100 completed.
Epoch 9/100 completed.
Epoch 10/100 completed.
Epoch 11/100 completed.
Epoch 12/100 completed.
Epoch 13/100 completed.
Epoch 14/100 completed.
Epoch 15/100 completed.
Epoch 16/100 completed.
Epoch 17/100 completed.
Epoch 18/100 completed.
Epoch 19/100 completed.
Epoch 20/100 completed.
Epoch 21/100 completed.
Epoch 22/100 completed.
Epoch 23/100 completed.
Epoch 24/100 completed.
Epoch 25/100 completed.
Epoch 26/100 completed.
Epoch 27/100 completed.
Epoch 28/100 completed.
Epoch 29/100 completed.
Epoch 30/100 completed.
Epoch 31/100 completed.
Epoch 32/100 completed.
Epoch 33/100 completed.
Epoch 34/100 completed.
Epoch 35/100 completed.
Epoch 36/100 completed.
Epoch 37/100 completed.
Epoch 38/100 completed.
Epoch 39/100 completed.
Epoch 40/100 completed.
Epoch 41/100 completed.
Ep

In [None]:
# Predicting probabilities
probs = []
for sample in x_test:
    prob = nn.predict(sample)
    probs.append(prob)

# Converting probabilities to one-hot vector format
predictions = []
for prob in probs:
    max_idx = np.argmax(prob)
    prediction = np.zeros_like(prob)
    prediction[max_idx] = 1
    predictions.append(prediction)


In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report


print("Accuracy:",accuracy_score(predictions, y_test))
print("CR:", classification_report(predictions, y_test))