# Creating the dataset

In [None]:
import torch
from torch import nn
import pandas as pd
from PIL import Image
import numpy as np
import random
from sklearn.model_selection import train_test_split
import random

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device} device")

# Creates a dataframe to extract the data
df = pd.read_csv('data/train.csv')

# Create the dataset and split it in train and test
X, y = np.array(df.loc[:, ~df.columns.isin(['label'])]), np.array(df['label'])
X, y = torch.tensor(X).type(torch.float), torch.tensor(y).type(torch.LongTensor)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = X_train.to(device), X_test.to(device), y_train.to(device), y_test.to(device)

X_train.shape, X_test.shape, y_train.shape, y_test.shape


# Vizualizing the images

In [None]:
imgs = np.array(df.loc[:, ~df.columns.isin(['label'])])

# Defining the size of the matrix of numbers
MATRIX_WIDTH = 70
MATRIX_HEIGHT = 10

# The size of each image
IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28

# Creates the matrix
matrix = Image.new('L', (MATRIX_WIDTH * IMAGE_WIDTH, MATRIX_HEIGHT * IMAGE_HEIGHT))

# Put random images in the matrix
for i in range(MATRIX_HEIGHT):
    for j in range(MATRIX_WIDTH):
        image = Image.fromarray(imgs[random.randint(0, len(X))].reshape(28, 28).astype('uint8'), mode='L')
        x = j * IMAGE_WIDTH
        y = i * IMAGE_HEIGHT
        matrix.paste(image, (x, y))

display(matrix)

# The model

In [None]:
class MNIST(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.sequence = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.LeakyReLU(),
            nn.Linear(512, 256),
            nn.LeakyReLU(),
            nn.Linear(256, 128),
            nn.LeakyReLU(),
            nn.Linear(128, 64),
            nn.LeakyReLU(),
            nn.Linear(64, 10)
        )

    def forward(self, X: torch.Tensor) -> torch.Tensor:
        return self.sequence(X)

model = MNIST().to(device)
display(model)

# Defining some hyperparameters

In [None]:
# Calculate the accuracy
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

# Loss function
loss_fn = nn.CrossEntropyLoss()

# Optmizer
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.0001)

# Training

In [None]:
epochs = 1000

for epoch in range(epochs + 1):
    ### Train
    model.train()

    # 1. Forward
    pred = model(X_train)

    # 2. Loss
    loss = loss_fn(pred, y_train)

    # 3. Backward
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()


    ### Testing
    model.eval()
    with torch.inference_mode():
        test_preds = torch.softmax(model(X_test), dim=1).argmax(dim=1)
        test_acc = accuracy_fn(y_test, test_preds)

    if epoch % 100 == 0:
        print('Epoch: {:<5d}    | Loss: {:<.5f}    | Acc: {:.2f}%'.format(epoch, loss, test_acc))


# Predictions

In [None]:
model.eval()
with torch.inference_mode():
    y_logits = model(X_test)
    a = random.randint(0, len(X_test))
    b = a + 10

    # Prediction
    pred = torch.softmax(y_logits, dim=1).argmax(dim=1)
    pred = pred[a:b].tolist()

    # Correct labels
    labels = y_test[a:b].tolist()

print('Predictions:  ', pred)
print('Labels:       ', labels)

pred = torch.tensor(pred)
labels = torch.tensor(labels)
print(f'\nCorrects: {torch.eq(pred, labels).sum().item()} / 10')