# Import libraries

In [1]:
import numpy as np
import torchvision.datasets as datasets
from sklearn.preprocessing import OneHotEncoder

# Read data

In [2]:
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=None)
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=None)

# Data preprocessing

In [3]:
def split_data_labels(dataset):
    return dataset.data.numpy(), dataset.targets.numpy().reshape(-1, 1)


In [4]:
def normalization_data(data):
    return data / 255.0

In [5]:
def reshape_data(data):
    return data.reshape(len(data), -1)

In [6]:
def preprocessing_data(dataset, type_data):
    data, labels = split_data_labels(dataset)
    data = normalization_data(data)
    data = reshape_data(data)
    if type_data == 'train':
        encoder = OneHotEncoder(sparse=False)
        labels = encoder.fit_transform(labels)
    return data, labels

In [7]:
train_data, train_labels = preprocessing_data(mnist_trainset, 'train')
test_data, test_labels = preprocessing_data(mnist_trainset, 'test')



# Model

In [8]:
class NeuralNetwork:
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.weights1 = np.random.randn(input_dim, hidden_dim)
        self.bias1 = np.zeros((1, hidden_dim))

        self.weights2 = np.random.randn(hidden_dim, output_dim)
        self.bias2 = np.zeros((1, output_dim))
        
    def relu(self, x):
        return np.maximum(0, x)
    
    def softmax(self, x):
        exp_vals = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_vals / np.sum(exp_vals, axis=1, keepdims=True)
    
    def mse_loss(self, y_pred, y_true):
        return np.mean((y_pred - y_true) ** 2)
    
    def forward_pass(self, X):
        self.hidden_output = self.relu(np.dot(X, self.weights1) + self.bias1)
        output_before_softmax = np.dot(self.hidden_output, self.weights2) + self.bias2
        self.output = self.softmax(output_before_softmax)
        return self.output
    
    def backward_pass(self, X, y, learning_rate):
        m = X.shape[0]

        grad_softmax = self.output - y   

        grad_weights2 = np.dot(self.hidden_output.T, grad_softmax) / m  
        grad_bias2 = np.sum(grad_softmax, axis=0, keepdims=True) / m
        
        grad_hidden = np.dot(grad_softmax, self.weights2.T) * (self.hidden_output > 0)
        
        grad_weights1 = np.dot(X.T, grad_hidden) / m
        grad_bias1 = np.sum(grad_hidden, axis=0, keepdims=True) / m
        
        self.weights2 -= learning_rate * grad_weights2
        self.bias2 -= learning_rate * grad_bias2
        self.weights1 -= learning_rate * grad_weights1
        self.bias1 -= learning_rate * grad_bias1
    
    def train(self, X, y, epochs, learning_rate):
        for i in range(epochs):
            output = self.forward_pass(X)
            loss = self.mse_loss(output, y)
            self.backward_pass(X, y, learning_rate)
            if i % 10 == 0:
                print(f'Epoch {i}, Loss: {loss}')

    def predict(self, X):
        hidden_output = self.relu(np.dot(X, self.weights1) + self.bias1)
        output = self.softmax(np.dot(hidden_output, self.weights2) + self.bias2)
        return np.argmax(output, axis=1)


# Train

In [9]:
# setting hyperparameters
input_dim = train_data.shape[1]
hidden_dim = 512
output_dim = train_labels.shape[1]
epochs = 100
learning_rate = 0.01

In [10]:
# Network creation and training
nn = NeuralNetwork(input_dim, hidden_dim, output_dim)
nn.train(train_data, train_labels, epochs, learning_rate)

Epoch 0, Loss: 0.18481271328466992
Epoch 10, Loss: 0.16794339092015162
Epoch 20, Loss: 0.1504265760424067
Epoch 30, Loss: 0.13430067647974098
Epoch 40, Loss: 0.12050285133400418
Epoch 50, Loss: 0.1084432929661027
Epoch 60, Loss: 0.09874163498215775
Epoch 70, Loss: 0.09097147373423747
Epoch 80, Loss: 0.08462718676577068
Epoch 90, Loss: 0.07934327032502272


# Evaluation

In [11]:
from sklearn.metrics import accuracy_score

y_pred = nn.predict(test_data)
print(f'Model accuracy: {accuracy_score(test_labels, y_pred)}')


Model accuracy: 0.6179833333333333
