In [631]:
import numpy as np
# from torchvision.datasets import MNIST
# def download_mnist(is_train: bool):
#     dataset = MNIST(root='./data',
#                     transform=lambda x: np.array(x).flatten(),
#                     download=True,
#                     train=is_train)
#     mnist_data = []
#     mnist_labels = []
#     for image, label in dataset:
#         mnist_data.append(image)
#         mnist_labels.append(label)
#     return mnist_data, mnist_labels
# train_X, train_Y = download_mnist(True)
# test_X, test_Y = download_mnist(False)

In [632]:
# We have numbers and we have them labeled
# We normalize the data by applying a Min-Max Normalization

normalized_data_train = (train_X - np.min(train_X)) / (np.max(train_X) - np.min(train_X))
normalized_data_test = (test_X - np.min(test_X)) / (np.max(test_X) - np.min(test_X))

In [633]:
# One-Hot Encoding
# there are 10 labels "0 -> 9"

numberOfClasses = 10

one_hot_encoded_train = np.zeros((len(train_Y), numberOfClasses))
one_hot_encoded_train[np.arange(len(train_Y)), train_Y] = 1

one_hot_encoded_test = np.zeros((len(test_Y), numberOfClasses))
one_hot_encoded_test[np.arange(len(test_Y)), test_Y] = 1

# 5
print(one_hot_encoded[0])

[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


In [634]:
# Split the data for each epochs in batches of 100 elements

def split_into_batches(data, batch_size):
    num_batches = data.shape[0] // batch_size 
    batches = np.array_split(data, num_batches)
    
    return batches

batchSize = 100

print(normalized_data_train[0].shape)
print(one_hot_encoded_train[0].shape)

normalized_data_train = split_into_batches(normalized_data_train, batchSize)
one_hot_encoded_train = split_into_batches(one_hot_encoded_train, batchSize)

print(normalized_data_train[0].shape)
print(one_hot_encoded_train[0].shape)

(784,)
(10,)
(100, 784)
(100, 10)


In [635]:
# Creating the weights matrix with random alocation between 0 and 1

weightsMatrix = np.random.rand(784, 10)
biasMatrix = np.random.rand(10)


In [636]:
# Forward propagation and softmax function

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def forwardPropagation(data, weights, bias):
    z = np.dot(data, weights) + bias
    return softmax(z)

# def cross_entropy_loss(y_pred, y_true):
#     y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
#     loss = -np.sum(y_true * np.log(y_pred), axis=1)
#     return np.mean(loss)

def gradient_descent_update(W, b, x, target, y, learning_rate):
    error = target - y 
    W += learning_rate * np.dot(x.T, error)
    b += learning_rate * np.sum(error, axis=0)

    return W, b

learning_rate = 0.005

# Number of epochs
print(len(normalized_data_train))  # the split generated 600 batches

max_iterations = 450
iteration_count = 0

for normalized_data_batch, one_hot_encoded_batch in zip(normalized_data_train, one_hot_encoded_train):
    predictions = forwardPropagation(normalized_data_batch, weightsMatrix, biasMatrix)
    # loss = cross_entropy_loss(predictions, one_hot_encoded_batch)
    
    weightsMatrix, biasMatrix = gradient_descent_update(weightsMatrix, biasMatrix, normalized_data_batch, one_hot_encoded_batch, predictions, learning_rate)
    
    iteration_count += 1
    if iteration_count >= max_iterations:
        break

600


In [637]:
# Testing the NN

print(normalized_data_test.shape)
print(one_hot_encoded_test.shape)

def calculate_accuracy(predictions, one_hot_labels):
    predicted_classes = np.argmax(predictions, axis=1)
    actual_classes = np.argmax(one_hot_labels, axis=1) 

    return np.mean(predicted_classes == actual_classes)

test_predictions = forwardPropagation(normalized_data_test, weightsMatrix, biasMatrix)
accuracy = calculate_accuracy(test_predictions, one_hot_encoded_test)

print(accuracy)

(10000, 784)
(10000, 10)
0.8973
