<a href="https://colab.research.google.com/github/nitinmalviya326/deep-learning-lab/blob/main/NN_on_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Cell 1: Import libraries
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split



In [7]:
# loading the MNIST dataset for handwritten digit classification
# normalizing image pixel values to make training easier
# converting labels to integers
# splitting data into training and test sets for evaluation
print("Loading data...")
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, parser='auto')
X = X.to_numpy() / 255.0
y = y.to_numpy().astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Train: {len(X_train)}, Test: {len(X_test)}")

Loading data...
Train: 56000, Test: 14000


In [11]:
# random seed is set to keep results consistent
# weights are initialized with small random values
# biases are initialized as zero vectors for each layer
np.random.seed(42)
W1 = np.random.randn(784, 128) * 0.01
b1 = np.zeros((1, 128))
W2 = np.random.randn(128, 10) * 0.01
b2 = np.zeros((1, 10))

In [12]:
# Cell 4: Training dataset for 30 epocs with learning rate .1 nd batch size 128
epochs = 30
lr = 0.1
batch_size = 128

print("Training...")
for epoch in range(epochs):
    indices = np.random.permutation(len(X_train))
    X_train, y_train = X_train[indices], y_train[indices]

    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]

        # Forward forward propogation
        a1 = np.maximum(0, X_batch @ W1 + b1)
        z2 = a1 @ W2 + b2
        a2 = np.exp(z2 - np.max(z2, axis=1, keepdims=True))
        a2 = a2 / np.sum(a2, axis=1, keepdims=True)

        # Backward propogation
        dz2 = a2 - np.eye(10)[y_batch]
        dW2 = a1.T @ dz2 / len(X_batch)
        db2 = np.sum(dz2, axis=0, keepdims=True) / len(X_batch)
        dz1 = (dz2 @ W2.T) * (a1 > 0)
        dW1 = X_batch.T @ dz1 / len(X_batch)
        db1 = np.sum(dz1, axis=0, keepdims=True) / len(X_batch)

         # Update weights nd biases
        W1 -= lr * dW1
        b1 -= lr * db1
        W2 -= lr * dW2
        b2 -= lr * db2

    #
    a1 = np.maximum(0, X_train @ W1 + b1)
    a2 = a1 @ W2 + b2
    acc = np.mean(np.argmax(a2, axis=1) == y_train)
    print(f"Epoch {epoch+1}: {acc:.4f}")



Training...
Epoch 1: 0.8980
Epoch 2: 0.9184
Epoch 3: 0.9317
Epoch 4: 0.9402
Epoch 5: 0.9493
Epoch 6: 0.9540
Epoch 7: 0.9577
Epoch 8: 0.9628
Epoch 9: 0.9669
Epoch 10: 0.9696
Epoch 11: 0.9719
Epoch 12: 0.9736
Epoch 13: 0.9762
Epoch 14: 0.9764
Epoch 15: 0.9791
Epoch 16: 0.9809
Epoch 17: 0.9813
Epoch 18: 0.9826
Epoch 19: 0.9833
Epoch 20: 0.9845
Epoch 21: 0.9861
Epoch 22: 0.9867
Epoch 23: 0.9869
Epoch 24: 0.9873
Epoch 25: 0.9886
Epoch 26: 0.9891
Epoch 27: 0.9896
Epoch 28: 0.9902
Epoch 29: 0.9895
Epoch 30: 0.9921


In [10]:

# Cell 5: Test Accuracy (using softmax probabilities)

z1 = X_test @ W1 + b1
a1 = np.maximum(0, z1)

z2 = a1 @ W2 + b2

# softmax for probability distribution
exp_scores = np.exp(z2 - np.max(z2, axis=1, keepdims=True))
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

test_acc = np.mean(np.argmax(probs, axis=1) == y_test)
print(f"Test Accuracy: {test_acc:.4f}")


Test Accuracy: 0.9734
