[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/khetansarvesh/CV/blob/main/Foundation_Model_Digit_Classification_CNN_scratch.ipynb)

In [None]:
import numpy as np
from scipy.signal import correlate2d
from scipy.signal import convolve2d

# Dataset

In [None]:
import tensorflow.keras as keras
import numpy as np
from keras.utils import to_categorical

# Load the Fashion MNIST dataset
(train_images, train_labels), (test_images, test_labels) = keras.datasets.fashion_mnist.load_data()

X_train = train_images[:5000] / 255.0
y_train = train_labels[:5000]
y_train = to_categorical(y_train)

X_test = train_images[5000:10000] / 255.0
y_test = train_labels[5000:10000]
y_test = to_categorical(y_test)

# Training

In [None]:
class Convolution:

    def __init__(self, input_shape, filter_size, num_filters):
        input_height, input_width = input_shape
        self.num_filters = num_filters
        self.input_shape = input_shape

        # Size of outputs and filters
        self.filter_shape = (num_filters, filter_size, filter_size) # (3,3)
        self.output_shape = (num_filters, input_height - filter_size + 1, input_width - filter_size + 1)

        # initializing weights with random numbers
        self.filters = np.random.randn(*self.filter_shape)
        self.biases = np.random.randn(*self.output_shape)

    def forward(self, input_data):
        self.input_data = input_data

        # Initialized output
        output = np.zeros(self.output_shape)

        # Performing 2D Convolution (Cross-Correlation) without any padding
        for i in range(self.num_filters):
            output[i] = correlate2d(self.input_data, self.filters[i], mode="valid")

        # Applying Relu Activtion function on all the filters
        output = np.maximum(output, 0)

        return output

    def backward(self, inp_p_bp, lr):
        '''
        inp_p_bp : input received to this layer from the next layer during back propagation
        '''

        # initializing gradients
        opt_bp = np.zeros_like(self.input_data)
        dL_dfilters = np.zeros_like(self.filters)

        for i in range(self.num_filters):

                # Calculating the gradient of loss with respect to kernels/ weights
                dL_dfilters[i] = correlate2d(self.input_data, inp_p_bp[i],mode="valid")

                # Calculating the gradient of loss with respect to inputs
                opt_bp += convolve2d(inp_p_bp[i], self.filters[i], mode="full")

        # Updating the parameters with learning rate
        self.filters -= lr * dL_dfilters
        self.biases -= lr * inp_p_bp

        # returning the gradient for next layer back propagation
        return opt_bp

In [None]:
class MaxPool:

    def __init__(self, pool_size):
        self.pool_size = pool_size

    def forward(self, input_data):
        self.input_data = input_data
        self.num_filters, self.input_height, self.input_width = input_data.shape
        self.output_height = self.input_height // self.pool_size
        self.output_width = self.input_width // self.pool_size

        # Determining the output shape
        self.output = np.zeros((self.num_filters, self.output_height, self.output_width))

        # Iterating over different filters
        for f in range(self.num_filters):
            # Looping through the height
            for i in range(self.output_height):
                # looping through the width
                for j in range(self.output_width):

                    # Starting postition
                    start_i = i * self.pool_size
                    start_j = j * self.pool_size

                    # Ending Position
                    end_i = start_i + self.pool_size
                    end_j = start_j + self.pool_size

                    # Creating a patch from the input data
                    patch = input_data[f, start_i:end_i, start_j:end_j]

                    #Finding the maximum value from each patch/window
                    self.output[f, i, j] = np.max(patch)

        return self.output

    def backward(self, dL_dout, lr):

        # initializing output of backprop
        dL_dinput = np.zeros_like(self.input_data)

        for c in range(self.num_filters):
            for i in range(self.output_height):
                for j in range(self.output_width):
                    start_i = i * self.pool_size
                    start_j = j * self.pool_size

                    end_i = start_i + self.pool_size
                    end_j = start_j + self.pool_size
                    patch = self.input_data[c, start_i:end_i, start_j:end_j]

                    mask = patch == np.max(patch)

                    dL_dinput[c,start_i:end_i, start_j:end_j] = dL_dout[c, i, j] * mask

        return dL_dinput

In [None]:
class Fully_Connected:

    def __init__(self, input_size, output_size):
        self.input_size = input_size # Size of the inputs coming
        self.output_size = output_size # Size of the output producing
        self.weights = np.random.randn(output_size, self.input_size)
        self.biases = np.random.rand(output_size, 1)

    def softmax(self, z):
        # Shift the input values to avoid numerical instability
        shifted_z = z - np.max(z)
        exp_values = np.exp(shifted_z)
        sum_exp_values = np.sum(exp_values, axis=0)
        log_sum_exp = np.log(sum_exp_values)
        return exp_values / sum_exp_values

    def softmax_derivative(self, s):
        return np.diagflat(s) - np.dot(s, s.T)

    def forward(self, input_data):
        self.input_data = input_data

        # Flattening the inputs from the previous layer into a vector
        flattened_input = input_data.flatten().reshape(1, -1)
        self.z = np.dot(self.weights, flattened_input.T) + self.biases

        # Applying Softmax
        self.output = self.softmax(self.z)
        return self.output

    def backward(self, dL_dout, lr):
        # Calculate the gradient of the loss with respect to the pre-activation (z)
        dL_dy = np.dot(self.softmax_derivative(self.output), dL_dout)
        # Calculate the gradient of the loss with respect to the weights (dw)
        dL_dw = np.dot(dL_dy, self.input_data.flatten().reshape(1, -1))

        # Calculate the gradient of the loss with respect to the biases (db)
        dL_db = dL_dy

        # Calculate the gradient of the loss with respect to the input data (dL_dinput)
        dL_dinput = np.dot(self.weights.T, dL_dy)
        dL_dinput = dL_dinput.reshape(self.input_data.shape)

        # Update the weights and biases based on the learning rate and gradients
        self.weights -= lr * dL_dw
        self.biases -= lr * dL_db

        # Return the gradient of the loss with respect to the input data
        return dL_dinput

In [None]:
def cross_entropy_loss(predictions, targets):
    predictions = np.clip(predictions, 1e-7, 1 - 1e-7) # Avoid numerical instability by adding a small epsilon value
    return -np.sum(targets * np.log(predictions)) / 10

def cross_entropy_loss_gradient(actual_labels, predicted_probs):
    num_samples = actual_labels.shape[0]
    gradient = -actual_labels / (predicted_probs + 1e-7) / num_samples
    return gradient

In [None]:
conv = Convolution(X_train[0].shape, 6, 1)
pool = MaxPool(2)
full = Fully_Connected(121, 10)
lr = 0.01
epochs = 200

for epoch in range(epochs):
    total_loss = 0.0
    correct_predictions = 0

    for i in range(len(X_train)):

        # Forward pass
        conv_out = conv.forward(X_train[i])
        pool_out = pool.forward(conv_out)
        full_out = full.forward(pool_out)
        loss = cross_entropy_loss(full_out.flatten(), y_train[i])
        total_loss += loss

        # Converting to One-Hot encoding
        one_hot_pred = np.zeros_like(full_out)
        one_hot_pred[np.argmax(full_out)] = 1
        one_hot_pred = one_hot_pred.flatten()

        num_pred = np.argmax(one_hot_pred)
        num_y = np.argmax(y_train[i])

        if num_pred == num_y:
            correct_predictions += 1

        # Backward pass
        gradient = cross_entropy_loss_gradient(y_train[i], full_out.flatten()).reshape((-1, 1))
        full_back = full.backward(gradient, lr)
        pool_back = pool.backward(full_back, lr)
        conv_back = conv.backward(pool_back, lr)

    # Print epoch statistics
    average_loss = total_loss / len(X_train)
    accuracy = correct_predictions / len(X_train) * 100.0
    print(f"Epoch {epoch + 1}/{epochs} - Loss: {average_loss:.4f} - Accuracy: {accuracy:.2f}%")

Epoch 1/200 - Loss: 1.0685 - Accuracy: 22.94%
Epoch 2/200 - Loss: 1.0122 - Accuracy: 35.06%
Epoch 3/200 - Loss: 0.9646 - Accuracy: 38.00%
Epoch 4/200 - Loss: 0.9350 - Accuracy: 40.18%
Epoch 5/200 - Loss: 0.9171 - Accuracy: 41.16%
Epoch 6/200 - Loss: 0.9076 - Accuracy: 42.04%
Epoch 7/200 - Loss: 0.9007 - Accuracy: 42.70%
Epoch 8/200 - Loss: 0.8980 - Accuracy: 42.74%
Epoch 9/200 - Loss: 0.8914 - Accuracy: 43.08%
Epoch 10/200 - Loss: 0.8850 - Accuracy: 43.66%
Epoch 11/200 - Loss: 0.8846 - Accuracy: 43.80%
Epoch 12/200 - Loss: 0.8825 - Accuracy: 44.00%
Epoch 13/200 - Loss: 0.8795 - Accuracy: 44.18%
Epoch 14/200 - Loss: 0.8796 - Accuracy: 44.30%
Epoch 15/200 - Loss: 0.8780 - Accuracy: 44.18%
Epoch 16/200 - Loss: 0.8749 - Accuracy: 44.70%
Epoch 17/200 - Loss: 0.8731 - Accuracy: 44.66%
Epoch 18/200 - Loss: 0.8729 - Accuracy: 44.74%
Epoch 19/200 - Loss: 0.8720 - Accuracy: 44.92%
Epoch 20/200 - Loss: 0.8715 - Accuracy: 44.86%
Epoch 21/200 - Loss: 0.8698 - Accuracy: 44.88%
Epoch 22/200 - Loss: 0

# Inference

In [None]:
predictions = []
for data in X_test:
    conv_out = conv.forward(data)
    pool_out = pool.forward(conv_out)
    flattened_output = pool_out.flatten()
    pred = full.forward(flattened_output)
    one_hot_pred = np.zeros_like(pred)
    one_hot_pred[np.argmax(pred)] = 1
    predictions.append(one_hot_pred.flatten())
predictions = np.array(predictions)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(predictions, y_test)

0.5766