<a href="https://colab.research.google.com/github/tripti-bhardwaj/Deep_Learning_Lab/blob/main/Lab2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd

In [None]:
# Define hyperparameters for the neural network
input_size = 784 # 28x28 pixel images
hidden_size = 64 # Number of neurons in the hidden layer
output_size = 10 # 10 classes for digits 0-9
learning_rate = 0.01 # Learning rate for gradient descent
epochs = 5000 # Number of training epochs

In [None]:
# Load the MNIST training and testing datasets from CSV files
train_data = pd.read_csv("/content/mnist_train.csv")
test_data = pd.read_csv("/content/mnist_test.csv")

In [None]:
# Prepare the training data:
# - Separate features (X_train) from labels (y_train)
# - Fill any NaN values with 0 (representing black pixels)
# - Normalize pixel values by dividing by 255.0 (max pixel value)
X_train = train_data.drop(columns=['label']).fillna(0).values / 255.0
y_train = train_data['label'].values

# Prepare the testing data similarly
X_test = test_data.drop(columns=['label']).fillna(0).values / 255.0
y_test = test_data['label'].values

In [None]:
# Convert integer labels to one-hot encoded vectors
# For example, label 3 becomes [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

In [None]:
# Define the sigmoid activation function
# np.clip is used to prevent overflow issues with large 'z' values
def sigmoid(z):
    z = np.clip(z, -500, 500)
    return 1 / (1 + np.exp(-z))

# Define the softmax activation function
# Used for output layer in multi-class classification
def softmax(z):
    # Subtract max for numerical stability to prevent overflow
    e = np.exp(z - np.max(z, axis=1, keepdims=True))
    return e / np.sum(e, axis=1, keepdims=True)

In [None]:
class NeuralNetwork():
    # Constructor to initialize weights and biases
    def __init__(self, input_size, hidden_size, output_size, lr):
        # Initialize weights with Xavier initialization (scaled random normal)
        self.weight1 = np.random.randn(input_size, hidden_size) * np.sqrt(1 / input_size)
        self.bias1 = np.zeros((1, hidden_size))
        self.weight2 = np.random.randn(hidden_size, output_size) * np.sqrt(1 / hidden_size)
        self.bias2 = np.zeros((1, output_size))
        self.lr = lr # Store the learning rate

    # Forward propagation calculates the output of the network
    def forward_propagation(self, X):
        # Layer 1: Linear transformation + sigmoid activation
        z1 = np.dot(X, self.weight1) + self.bias1
        self.a1 = sigmoid(z1)
        # Layer 2: Linear transformation + softmax activation
        z2 = np.dot(self.a1, self.weight2) + self.bias2
        z2 = np.clip(z2, -50, 50)
        a2 = softmax(z2) # Output probabilities
        return a2

    # Backward propagation calculates gradients and updates weights
    def backward_propagation(self, X, y_actual, y_predicted):
        m = X.shape[0] # Number of samples

        # Output layer gradients
        dz2 = y_predicted - y_actual # Error at the output
        dweight2 = np.dot(self.a1.T, dz2) / m # Gradient for weight2
        dbias2 = np.sum(dz2, axis=0, keepdims=True) / m # Gradient for bias2

        # Hidden layer gradients
        # dz1 incorporates the derivative of sigmoid activation
        dz1 = np.dot(dz2, self.weight2.T) * self.a1 * (1 - self.a1)
        dweight1 = np.dot(X.T, dz1) / m # Gradient for weight1
        dbias1 = np.sum(dz1, axis=0, keepdims=True) / m # Gradient for bias1

        # Clip gradients to prevent exploding gradients
        dweight1 = np.clip(dweight1, -5, 5)
        dweight2 = np.clip(dweight2, -5, 5)

        # Update weights and biases using gradient descent
        self.weight1 -= self.lr * dweight1
        self.bias1 -= self.lr * dbias1
        self.weight2 -= self.lr * dweight2
        self.bias2 -= self.lr * dbias2

    # Training loop for the neural network
    def train(self, X, y, epochs):
      for epoch in range(epochs):
          y_predicted = self.forward_propagation(X)
          # Clip predicted probabilities to avoid log(0) for loss calculation
          y_predicted = np.clip(y_predicted, 1e-8, 1 - 1e-8)
          self.backward_propagation(X, y, y_predicted)
          # Calculate cross-entropy loss
          loss = -np.sum(y * np.log(y_predicted)) / y.shape[0]
          # Print loss periodically
          if (epoch + 1) % 100 == 0:
              print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

    # Predict class labels for new input data
    def predict(self, X):
        y_predicted = self.forward_propagation(X)
        # Return the index of the highest probability as the predicted class
        return np.argmax(y_predicted, axis=1)

In [None]:
# Create an instance of the NeuralNetwork class
nn = NeuralNetwork(input_size, hidden_size, output_size, learning_rate)

# Train the neural network using the training data
nn.train(X_train, y_train, epochs)

Epoch 100/5000, Loss: 2.2534
Epoch 200/5000, Loss: 2.1796
Epoch 300/5000, Loss: 2.1100
Epoch 400/5000, Loss: 2.0395
Epoch 500/5000, Loss: 1.9674
Epoch 600/5000, Loss: 1.8935
Epoch 700/5000, Loss: 1.8182
Epoch 800/5000, Loss: 1.7423
Epoch 900/5000, Loss: 1.6669
Epoch 1000/5000, Loss: 1.5931
Epoch 1100/5000, Loss: 1.5218
Epoch 1200/5000, Loss: 1.4537
Epoch 1300/5000, Loss: 1.3894
Epoch 1400/5000, Loss: 1.3290
Epoch 1500/5000, Loss: 1.2727
Epoch 1600/5000, Loss: 1.2204
Epoch 1700/5000, Loss: 1.1718
Epoch 1800/5000, Loss: 1.1268
Epoch 1900/5000, Loss: 1.0851
Epoch 2000/5000, Loss: 1.0465
Epoch 2100/5000, Loss: 1.0107
Epoch 2200/5000, Loss: 0.9775
Epoch 2300/5000, Loss: 0.9466
Epoch 2400/5000, Loss: 0.9178
Epoch 2500/5000, Loss: 0.8910
Epoch 2600/5000, Loss: 0.8660
Epoch 2700/5000, Loss: 0.8426
Epoch 2800/5000, Loss: 0.8206
Epoch 2900/5000, Loss: 0.8000
Epoch 3000/5000, Loss: 0.7806
Epoch 3100/5000, Loss: 0.7624
Epoch 3200/5000, Loss: 0.7451
Epoch 3300/5000, Loss: 0.7289
Epoch 3400/5000, Lo

In [None]:
# Make predictions on the test set
y_test_predictions = nn.predict(X_test)

# Calculate accuracy by comparing predictions with actual labels
# np.argmax(y_test, axis=1) converts one-hot encoded y_test back to integer labels
accuracy = np.mean(y_test_predictions == np.argmax(y_test, axis=1))

# Print the test accuracy
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 83.13%
