# Exercise 4
In this exercise, you will implement forward and backward pass of a simple neural network. You are expected to write all the functions using vectorized numpy operations only.

The following cell has code to load the train and test data. You will be working with the MNIST dataset. The images have been flattened and normalised to be between 0 and 1 for you already.

In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Load MNIST dataset
def load_mnist():
    mnist = fetch_openml('mnist_784', version=1)
    X, y = mnist.data / 255.0, mnist.target.astype(int)
    return X, y.to_numpy()  # Convert y to a NumPy array

# One-hot encode labels
def one_hot_encode(y, num_classes):
    encoder = OneHotEncoder(sparse_output=False, categories=[range(num_classes)])
    return encoder.fit_transform(y.reshape(-1, 1))

# Split dataset
def prepare_data(test_size=0.2):
    X, y = load_mnist()
    y_encoded = one_hot_encode(y, num_classes=10)
    return train_test_split(X, y_encoded, test_size=test_size, random_state=42)

X_train, X_test, y_train, y_test = prepare_data()
X_train, X_test = X_train.to_numpy(), X_test.to_numpy()
print(f"Training Data Shape: {X_train.shape}, Test Data Shape: {X_test.shape}")


You need to implement a two-layer neural network (one hidden layer) using NumPy. Fill in all the required cells below. Only use numpy functions.

- Implement the forward pass. Use ReLU activation for the hidden layer and softmax for the final output. Be sure to use the bias as well. (0.5 point)
- Implement the backward pass. This should return the gradients of the loss w.r.t the weights and biases of the network. The return signature of the backward pass is provided as a comment in the function. (1.5 points)
- For your loss function, use the cross-entropy loss. (0.5 point)
- The `predict` function should run the forward pass and return the predicted class. (0.5 point)
- The `train` function should run the forward pass, compute the loss and and the gradients, and update the parameters using gradient descent with the given learning rate. It should repeat this for the given number of epochs. You are given some code to evaluate the performance of your network during training. You can uncomment it and match your variable names. (1 point)

In [None]:
class TwoLayerNN:
    def __init__(self, input_size, hidden_size, output_size):
        """
        Initialize weights and biases.
        """
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def relu(self, Z):
        """
        ReLU activation function.
        """
        return np.maximum(0, Z)

    def relu_derivative(self, Z):
        """
        Derivative of ReLU activation.
        """
        return (Z > 0).astype(float)

    def softmax(self, Z):
        """
        Softmax activation function.
        """
        expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return expZ / np.sum(expZ, axis=1, keepdims=True)

    def forward(self, X):
        """
        Forward pass.
        """
        # TO DO: Implement forward pass
        pass

    def backward(self, X, y, learning_rate):
        """
        Backpropagation to update weights.
        """
        # TO DO: Implement backward pass
        # return dW1, db1, dW2, db2
        pass

    def compute_loss(self, y_true, y_pred):
        """
        Compute cross-entropy loss.
        """
        # TO DO: Implement cross-entropy loss
        pass

    def predict(self, X):
        """
        Predict class probabilities and labels.
        """
        # TO DO: Implement prediction logic
        pass

    def train(self, X, y, epochs, learning_rate):
        """
        Train the model using gradient descent.
        """
        for epoch in range(epochs):
            # TO DO: Implement training loop
            # if epoch % 10 == 0 or epoch == epochs - 1:
            #     print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")
            pass


The following code evaluates the performance of your network on X_test. You can expect an accuracy of around 90%.

In [None]:
# Initialize model
input_size = X_train.shape[1]
hidden_size = 64  # You can choose a suitable value
output_size = 10  # Number of classes

model = TwoLayerNN(input_size, hidden_size, output_size)

# Training the model
epochs = 100
learning_rate = 0.5
model.train(X_train, y_train, epochs, learning_rate)

# Evaluate on test data
predictions = model.predict(X_test)
# print(predictions.shape)
accuracy = np.mean(predictions == np.argmax(y_test, axis=1))
print(f"Test Accuracy: {accuracy * 100:.2f}%")
