In [1]:
import numpy as np
import tensorflow.keras as keras
from tensorflow.keras.datasets import mnist
from sklearn.metrics import accuracy_score, confusion_matrix

(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = (X_train.reshape(-1, 28*28) / 255.0).astype(np.float32)
X_test = (X_test.reshape(-1, 28*28) / 255.0).astype(np.float32)

def one_hot_encode(y):
    n_classes = len(np.unique(y))
    encoded = np.zeros((len(y), n_classes))
    for i in range(len(y)):
        encoded[i, y[i]] = 1
    return encoded

y_train_encoded = one_hot_encode(y_train)
y_test_encoded = one_hot_encode(y_test)

class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.weights, self.biases = self.initialize_parameters()

    def initialize_parameters(self):
        W1 = np.random.randn(self.input_size, self.hidden_size)
        b1 = np.zeros((1, self.hidden_size))
        W2 = np.random.randn(self.hidden_size, self.output_size)
        b2 = np.zeros((1, self.output_size))
        return [W1, W2], [b1, b2]

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def softmax(self, z):
        exp_scores = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    def forward(self, X):
        z1 = np.dot(X, self.weights[0]) + self.biases[0]
        a1 = self.sigmoid(z1)
        z2 = np.dot(a1, self.weights[1]) + self.biases[1]
        a2 = self.softmax(z2)
        return a2, {'z1': z1, 'a1': a1, 'z2': z2, 'a2': a2}

    def compute_loss(self, y_true, y_pred):
        return -np.mean(y_true * np.log(y_pred + 1e-9))

    def backward(self, X, y_true, cache):
        m = X.shape[0]
        dz2 = cache['a2'] - y_true
        dW2 = np.dot(cache['a1'].T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m
        dz1 = np.dot(dz2, self.weights[1].T) * cache['a1'] * (1 - cache['a1'])
        dW1 = np.dot(X.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m
        return [dW1, dW2], [db1, db2]

    def update_parameters(self, gradients, learning_rate):
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * gradients[0][i]
            self.biases[i] -= learning_rate * gradients[1][i]

    def train(self, X, y, epochs, batch_size, learning_rate):
        for epoch in range(epochs):
            for i in range(0, len(X), batch_size):
                X_batch = X[i:i+batch_size]
                y_batch = y[i:i+batch_size]
                y_pred, cache = self.forward(X_batch)
                loss = self.compute_loss(y_batch, y_pred)
                gradients = self.backward(X_batch, y_batch, cache)
                self.update_parameters(gradients, learning_rate)
            if epoch % 10 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")

    def predict(self, X):
        y_pred, _ = self.forward(X)
        return np.argmax(y_pred, axis=1)

# Initialize and train MLP model
input_size = X_train.shape[1]
hidden_size = 128  # Number of neurons in the hidden layer
output_size = 10   # Number of classes (digits 0-9)
mlp = MLP(input_size, hidden_size, output_size)
mlp.train(X_train, y_train_encoded, epochs=50, batch_size=64, learning_rate=0.1)

# Evaluate model on test set
y_pred = mlp.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
print(f"Test Accuracy: {accuracy}")

Epoch 0, Loss: 0.062419277674882576
Epoch 10, Loss: 0.006147419179194401
Epoch 20, Loss: 0.003926343221244846
Epoch 30, Loss: 0.0031423158281755252
Epoch 40, Loss: 0.0027708885249469795
Test Accuracy: 0.9449
