In [5]:
import numpy as np
import tensorflow

In [6]:
class NeuralNetwork:
    def __init__(self, layer_sizes, learning_rate=0.01):
        self.layer_sizes = layer_sizes
        self.learning_rate = learning_rate
        self.parameters = self.initialize_parameters()

    def initialize_parameters(self):
        parameters = {}
        for i in range(1, len(self.layer_sizes)):
            parameters[f'W{i}'] = np.random.randn(self.layer_sizes[i - 1], self.layer_sizes[i])
            parameters[f'b{i}'] = np.zeros((1, self.layer_sizes[i]))
        return parameters

    def relu(self, Z):
        return np.maximum(0, Z)

    def relu_derivative(self, Z):
        return Z > 0

    def softmax(self, Z):
        expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return expZ / np.sum(expZ, axis=1, keepdims=True)

    def forward_propagation(self, X):
        cache = {"A0": X}
        A = X
        for i in range(1, len(self.layer_sizes) - 1):
            Z = np.dot(A, self.parameters[f'W{i}']) + self.parameters[f'b{i}']
            A = self.relu(Z)
            cache[f'Z{i}'] = Z
            cache[f'A{i}'] = A

        Z = np.dot(A, self.parameters[f'W{len(self.layer_sizes) - 1}']) + self.parameters[f'b{len(self.layer_sizes) - 1}']
        A = self.softmax(Z)
        cache[f'Z{len(self.layer_sizes) - 1}'] = Z
        cache[f'A{len(self.layer_sizes) - 1}'] = A
        return cache

    def compute_loss(self, Y, A):
        m = Y.shape[0]
        log_probs = -np.log(A[range(m), np.argmax(Y, axis=1)])
        loss = np.sum(log_probs) / m
        return loss

    def backward_propagation(self, X, Y, cache):
        grads = {}
        m = X.shape[0]
        dZ = cache[f'A{len(self.layer_sizes) - 1}'] - Y
        for i in reversed(range(1, len(self.layer_sizes))):
            grads[f'dW{i}'] = np.dot(cache[f'A{i - 1}'].T, dZ) / m
            grads[f'db{i}'] = np.sum(dZ, axis=0, keepdims=True) / m
            if i > 1:
                dA = np.dot(dZ, self.parameters[f'W{i}'].T)
                dZ = dA * self.relu_derivative(cache[f'Z{i - 1}'])
        return grads

    def update_parameters(self, grads):
        for i in range(1, len(self.layer_sizes)):
            self.parameters[f'W{i}'] -= self.learning_rate * grads[f'dW{i}']
            self.parameters[f'b{i}'] -= self.learning_rate * grads[f'db{i}']

    def train(self, X, Y, X_test, Y_test, epochs):
        for epoch in range(epochs):
            cache = self.forward_propagation(X)
            loss = self.compute_loss(Y, cache[f'A{len(self.layer_sizes) - 1}'])
            grads = self.backward_propagation(X, Y, cache)
            self.update_parameters(grads)

            if epoch % 100 == 0:
                accuracy = self.compute_accuracy(X_test, Y_test)
                print(f'Epoch {epoch}, Loss: {loss}, Accuracy: {accuracy * 100}%')

    def compute_accuracy(self, X, Y):
        cache = self.forward_propagation(X)
        predictions = np.argmax(cache[f'A{len(self.layer_sizes) - 1}'], axis=1)
        labels = np.argmax(Y, axis=1)
        accuracy = np.mean(predictions == labels)
        return accuracy

In [None]:
(train_data, train_labels), (test_data, test_labels) = tensorflow.keras.datasets.mnist.load_data()

train_data = train_data.reshape(-1, 28*28)
test_data = test_data.reshape(-1, 28*28)

train_data = train_data / 255.0
test_data = test_data / 255.0

train_labels = tensorflow.keras.utils.to_categorical(train_labels)
test_labels = tensorflow.keras.utils.to_categorical(test_labels)

layer_sizes = [784, 128, 10] 
learning_rate = 0.01
epochs = 1000

nn = NeuralNetwork(layer_sizes, learning_rate)
nn.train(train_data, train_labels, test_data, test_labels, epochs)

accuracy = nn.compute_accuracy(test_data, test_labels)
print(f'Final Test Accuracy: {accuracy * 100}%')