<a href="https://colab.research.google.com/github/nitinmalviya326/deep-learning-lab/blob/main/NN_on_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
# Cell 1: Import libraries
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split



In [23]:
# Cell 2: Load MNIST data
print("Loading data...")
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, parser='auto')
X = X.to_numpy() / 255.0
y = y.to_numpy().astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Train: {len(X_train)}, Test: {len(X_test)}")

Loading data...
Train: 56000, Test: 14000


In [24]:
# Cell 3: Initialize weights
np.random.seed(42)
W1 = np.random.randn(784, 128) * 0.01
b1 = np.zeros((1, 128))
W2 = np.random.randn(128, 10) * 0.01
b2 = np.zeros((1, 10))

In [25]:

# Cell 4: Training
epochs = 30
lr = 0.1
batch_size = 128

print("Training...")
for epoch in range(epochs):
    indices = np.random.permutation(len(X_train))
    X_train, y_train = X_train[indices], y_train[indices]

    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]

        # Forward
        a1 = np.maximum(0, X_batch @ W1 + b1)
        z2 = a1 @ W2 + b2
        a2 = np.exp(z2 - np.max(z2, axis=1, keepdims=True))
        a2 = a2 / np.sum(a2, axis=1, keepdims=True)

        # Backward
        dz2 = a2 - np.eye(10)[y_batch]
        dW2 = a1.T @ dz2 / len(X_batch)
        db2 = np.sum(dz2, axis=0, keepdims=True) / len(X_batch)
        dz1 = (dz2 @ W2.T) * (a1 > 0)
        dW1 = X_batch.T @ dz1 / len(X_batch)
        db1 = np.sum(dz1, axis=0, keepdims=True) / len(X_batch)

        # Update
        W1 -= lr * dW1
        b1 -= lr * db1
        W2 -= lr * dW2
        b2 -= lr * db2

    # Accuracy
    a1 = np.maximum(0, X_train @ W1 + b1)
    a2 = a1 @ W2 + b2
    acc = np.mean(np.argmax(a2, axis=1) == y_train)
    print(f"Epoch {epoch+1}: {acc:.4f}")

Training...
Epoch 1: 0.8997
Epoch 2: 0.9166
Epoch 3: 0.9329
Epoch 4: 0.9428
Epoch 5: 0.9471
Epoch 6: 0.9554
Epoch 7: 0.9584
Epoch 8: 0.9633
Epoch 9: 0.9663
Epoch 10: 0.9691
Epoch 11: 0.9714
Epoch 12: 0.9745
Epoch 13: 0.9765
Epoch 14: 0.9776
Epoch 15: 0.9803
Epoch 16: 0.9806
Epoch 17: 0.9813
Epoch 18: 0.9825
Epoch 19: 0.9841
Epoch 20: 0.9851
Epoch 21: 0.9861
Epoch 22: 0.9866
Epoch 23: 0.9861
Epoch 24: 0.9882
Epoch 25: 0.9879
Epoch 26: 0.9896
Epoch 27: 0.9904
Epoch 28: 0.9902
Epoch 29: 0.9880
Epoch 30: 0.9914


In [21]:

# Cell 5: Test
a1 = np.maximum(0, X_test @ W1 + b1)
a2 = a1 @ W2 + b2
test_acc = np.mean(np.argmax(a2, axis=1) == y_test)
print(f"\nTest Accuracy: {test_acc:.4f}")


Test Accuracy: 0.9789
