# Step 1 : Load and preprocess data


In [1]:
## import necessarty files
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


In [2]:
# Load data
data = pd.read_csv('../dataset/mnist/mnist_train.csv')
labels = data['label'].values
images = data.drop('label', axis=1).values

# Reshape images to 28x28 and normalize pixel values
images = images.reshape(-1, 28, 28, 1) / 255.0

# Split data into train and validation sets
split_ratio = 0.8
split_index = int(len(images) * split_ratio)

train_images, val_images = images[:split_index], images[split_index:]
train_labels, val_labels = labels[:split_index], labels[split_index:]

## Step 2: Initialize parameters

In [3]:
# Hyperparameters
learning_rate = 0.001
num_epochs = 10
batch_size = 64

# Model architecture
num_classes = 10
conv1_filters = 16
conv2_filters = 32
fc1_units = 128

# Initialize weights and biases
np.random.seed(42)
conv1_weights = np.random.randn(3, 3, 1, conv1_filters) * 0.01
conv1_bias = np.zeros((1, 1, 1, conv1_filters))
conv2_weights = np.random.randn(3, 3, conv1_filters, conv2_filters) * 0.01
conv2_bias = np.zeros((1, 1, 1, conv2_filters))
fc1_weights = np.random.randn(7 * 7 * conv2_filters, fc1_units) * 0.01
fc1_bias = np.zeros((1, fc1_units))
output_weights = np.random.randn(fc1_units, num_classes) * 0.01
output_bias = np.zeros((1, num_classes))


train data contains 60000 examples, each column represents a feature. in this case the pixel values. but the column 1 represents the label of each example.

In [7]:
# spliting the dataset for train and validation set
#validation set
data_val=train_data[:1000].T
X_val=data_val[1:n] #discarding 0 column which contains labels
X_val=np.asfarray(X_val)
X_val/=255.0 # normalize data to train faster
Y_val=data_val[0]

#training set
data_train=train_data[1000:m].T
X_train=data_train[1:n]
X_train=np.asfarray(X_train)
Y_train=data_train[0]
X_train/=255

In [8]:
def ReLU(Z):
    return np.maximum(Z, 0)

In [9]:
def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

In [12]:
def conv2d(X,W,b):
    return np.sum(X*W)+b

In [13]:
def max_pooling(X, pool_size):
    height, width = X.shape
    new_height = height // pool_size
    new_width = width // pool_size
    pooled = np.zeros((new_height, new_width))
    for i in range(new_height):
        for j in range(new_width):
            pooled[i, j] = np.max(X[i * pool_size : (i + 1) * pool_size, j * pool_size : (j + 1) * pool_size])
    return pooled

In [10]:
# Initialize CNN parameters
num_filters1 = 16
filter_size1 = 3
num_filters2 = 32
filter_size2 = 3
pool_size = 2
num_fc_units = 128
num_classes = 10


In [11]:
#initializing parameters
def init_params():
    W_conv1 = np.random.randn(num_filters1, filter_size1, filter_size1, 1) * 0.01
    b_conv1 = np.zeros((num_filters1, 1))

    W_conv2 = np.random.randn(num_filters2, filter_size2, filter_size2, num_filters1) * 0.01
    b_conv2 = np.zeros((num_filters2, 1))

    W_fc1 = np.random.randn(num_fc_units, 7 * 7 * num_filters2) * 0.01
    b_fc1 = np.zeros((num_fc_units, 1))

    W_fc2 = np.random.randn(num_classes, num_fc_units) * 0.01
    b_fc2 = np.zeros((num_classes, 1))


In [None]:
#training loop
learning_rate=.001
num_epochs=10
batch_size=100
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i+batch_size]
        Y_batch = Y_train[i:i+batch_size]

        # Forward pass
        conv1_out = np.zeros((num_filters1, 28, 28))
        for f in range(num_filters1):
            for j in range(28 - filter_size1 + 1):
                for k in range(28 - filter_size1 + 1):
                    conv1_out[f, j, k] = np.sum(X_batch[:, j:j+filter_size1, k:k+filter_size1] * W_conv1[f]) + b_conv1[f]

        relu1_out = ReLU(conv1_out)
        conv2_out = np.zeros((num_filters2, 26, 26))
        for f in range(num_filters2):
            for j in range(26 - filter_size2 + 1):
                for k in range(26 - filter_size2 + 1):
                    conv2_out[f, j, k] = np.sum(relu1_out[:, j:j+filter_size2, k:k+filter_size2] * W_conv2[f]) + b_conv2[f]

        relu2_out =ReLU(conv2_out)
        pooled_out = np.zeros((num_filters2, 13, 13))
        for f in range(num_filters2):
            pooled_out[f] = max_pooling(relu2_out[f], pool_size)

        fc_in = pooled_out.reshape(-1, 13 * 13 * num_filters2)
        fc1_out =ReLU(np.dot(W_fc1, fc_in.T) + b_fc1)
        scores = np.dot(W_fc2, fc1_out) + b_fc2
        probs = softmax(scores)

        # Backpropagation (same as previous code)
        # ...
        dscores = probs
        dscores[range(batch_size), Y_batch] -= 1
        dscores /= batch_size

        dW_out = np.dot(dscores, fc_out.T)
        db_out = np.sum(dscores, axis=1, keepdims=True)

        dfc_out = np.dot(W_out.T, dscores)
        dfc_out[fc_out <= 0] = 0

        dpool = np.dot(W_fc.T, dfc_out)
        dpool = dpool.reshape(pooled_out.shape)

        drelu = dpool
        drelu[relu_out <= 0] = 0

        dconv = np.zeros(conv_out.shape)
        for f in range(num_filters):
            for j in range(28 - filter_size + 1):
                for k in range(28 - filter_size + 1):
                    dconv[f, j, k] = np.sum(drelu[f, j:j+filter_size, k:k+filter_size] * X_batch[:, j:j+filter_size, k:k+filter_size])

        dW_conv = np.zeros(W_conv.shape)
        db_conv = np.zeros(b_conv.shape)
        for f in range(num_filters):
            dW_conv[f] = np.sum(dconv[f] / batch_size)
            db_conv[f] = np.sum(dconv[f] / batch_size)
        # Update parameters (same as previous code)
        # Update parameters
        W_out -= learning_rate * dW_out
        b_out -= learning_rate * db_out
        W_fc -= learning_rate * dW_fc
        b_fc -= learning_rate * db_fc
        W_conv -= learning_rate * dW_conv
        b_conv -= learning_rate * db_conv


    print("Epoch %d completed" % (epoch + 1))

print("Training finished")