# Task 0: Install data

Download from kaggle website. Create data folder with .gitignore file. Using Linux commands, change accordingly.

In [1]:
! mkdir data
! echo data/ > .gitignore
! curl -L -o ./data/mnist-dataset.zip https://www.kaggle.com/api/v1/datasets/download/hojjatk/mnist-dataset
! unzip ./data/mnist-dataset.zip -d ./data/

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 22.0M  100 22.0M    0     0  4344k      0  0:00:05  0:00:05 --:--:-- 5362k
Archive:  ./data/mnist-dataset.zip
  inflating: ./data/t10k-images-idx3-ubyte/t10k-images-idx3-ubyte  
  inflating: ./data/t10k-images.idx3-ubyte  
  inflating: ./data/t10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte  
  inflating: ./data/t10k-labels.idx1-ubyte  
  inflating: ./data/train-images-idx3-ubyte/train-images-idx3-ubyte  
  inflating: ./data/train-images.idx3-ubyte  
  inflating: ./data/train-labels-idx1-ubyte/train-labels-idx1-ubyte  
  inflating: ./data/train-labels.idx1-ubyte  


# Task 1: Importing Libraries

In [2]:
# Importing Necessary Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, Dataset
from torch import nn, optim
import cv2
import random
import idx2numpy
import numpy as np
from typing import Tuple

# Task 2: Loading the Dataset

In [3]:

train_images_path = 'data/train-images.idx3-ubyte'
train_images_np = idx2numpy.convert_from_file(train_images_path)
# train_images_np is now a np.ndarray type of object of shape (60000, 28, 28)

train_labels_path = 'data/train-labels.idx1-ubyte'
train_labels_np = idx2numpy.convert_from_file(train_labels_path)
# train_labels_np is now a np.ndarray type of object of shape (60000, 1)

t10k_images_path = 'data/t10k-images.idx3-ubyte'
t10k_images_np = idx2numpy.convert_from_file(t10k_images_path)
# t10k_images_np is now a np.ndarray type of object of shape (10000, 28, 28)

t10k_labels_path = 'data/t10k-labels.idx1-ubyte'
t10k_labels_np = idx2numpy.convert_from_file(t10k_labels_path)
# t10k_labels_np is now a np.ndarray type of object of shape (10000, 1)

# Task 3: Exploratory Data Analysis (EDA)

In [4]:
# press q to exit
while True:
    # Display the image using OpenCV
    idx = random.randint(0, 59999)
    cv2.imshow(f"Value: {train_labels_np[idx]}", train_images_np[idx])

    # Wait for key press and check if 'q' is pressed (ASCII 113)
    if cv2.waitKey(0) & 0xFF == ord('q'):
        break

# Close the OpenCV window
cv2.destroyAllWindows()

Only C and default locale supported with the posix collation implementation
Only C and default locale supported with the posix collation implementation
Case insensitive sorting unsupported in the posix collation implementation
Numeric mode unsupported in the posix collation implementation


- All images are made of 28x28 matrices with uint8 values, ranging 0 to 255.
- All labels are uint8 values with the correspondent digit, ranging 0 to 9.

# Task 4: Data Preprocessing

In [5]:
# Data Preprocessing with a Custom Dataset Class
class MNISTDataset(Dataset):
    def __init__(self, train_images_np: np.ndarray, train_labels_np: np.ndarray):
        self.images = train_images_np.astype(np.float32) / 255.0
        self.labels = train_labels_np

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Returns two tensors, one 28x28 float32 with the image / 255.0 (0-1), one long (uint64) with the corresponding value (0-9)
        """
        image = self.images[idx]#.reshape(28, 28)
        label = self.labels[idx]
        return torch.tensor(image, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

# Task 5: Splitting Data into Training and Validation Sets

In [6]:
# Train-Validation Split
train_images_set, val_images_set, train_labels_set, val_labels_set = train_test_split(train_images_np, train_labels_np, test_size=0.2, random_state=42)

train_dataset = MNISTDataset(train_images_set, train_labels_set)
val_dataset = MNISTDataset(val_images_set, val_labels_set)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# Task 6: Building the Neural Network Model

In [7]:
# Define the Neural Network Model
input_layer = 784
hidden_layer1 = 128
hidden_layer2 = 64
output_layer = 10

model = nn.Sequential(
    nn.Linear(input_layer, hidden_layer1),
    nn.ReLU(),
    nn.Linear(hidden_layer1, hidden_layer2),
    nn.ReLU(),
    nn.Linear(hidden_layer2, output_layer)
)

# Loss Function and Optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Task 7: Training the Model

In [8]:
# Training the Model
def train_model(model, train_loader, val_loader, epochs=10):
    """Trains the neural network and evaluates it on validation data."""
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for images, labels in train_loader:
            images = images.view(images.size(0), -1)
            optimizer.zero_grad()
            predictions = model(images)
            loss = loss_function(predictions, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        # Validation Step
        model.eval()
        val_loss = 0
        correct = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.view(images.size(0), -1)
                predictions = model(images)
                val_loss += loss_function(predictions, labels).item()
                correct += (predictions.argmax(1) == labels).sum().item()
        
        # Print Epoch Metrics
        print(f"Epoch {epoch+1}/{epochs}")
        print(f"Train Loss: {total_loss/len(train_loader):.4f}")
        print(f"Validation Loss: {val_loss/len(val_loader):.4f}")
        print(f"Validation Accuracy: {100 * correct / len(val_dataset):.2f}%")
        print("-" * 30)

In [9]:
train_model(model, train_loader, val_loader)

Epoch 1/10
Train Loss: 0.3833
Validation Loss: 0.2094
Validation Accuracy: 93.80%
------------------------------
Epoch 2/10
Train Loss: 0.1665
Validation Loss: 0.1379
Validation Accuracy: 95.84%
------------------------------
Epoch 3/10
Train Loss: 0.1139
Validation Loss: 0.1331
Validation Accuracy: 95.97%
------------------------------
Epoch 4/10
Train Loss: 0.0857
Validation Loss: 0.1042
Validation Accuracy: 96.77%
------------------------------
Epoch 5/10
Train Loss: 0.0659
Validation Loss: 0.1028
Validation Accuracy: 96.88%
------------------------------
Epoch 6/10
Train Loss: 0.0543
Validation Loss: 0.0833
Validation Accuracy: 97.53%
------------------------------
Epoch 7/10
Train Loss: 0.0426
Validation Loss: 0.0884
Validation Accuracy: 97.51%
------------------------------
Epoch 8/10
Train Loss: 0.0342
Validation Loss: 0.0838
Validation Accuracy: 97.74%
------------------------------
Epoch 9/10
Train Loss: 0.0290
Validation Loss: 0.0958
Validation Accuracy: 97.37%
--------------

# Task 8: Preprocessing Test Data

In [10]:
t10k_images_path = 'data/t10k-images.idx3-ubyte'
t10k_images_np = idx2numpy.convert_from_file(t10k_images_path)
# t10k_images_np is now a np.ndarray type of object of shape (10000, 28, 28)

t10k_labels_path = 'data/t10k-labels.idx1-ubyte'
t10k_labels_np = idx2numpy.convert_from_file(t10k_labels_path)
# t10k_labels_np is now a np.ndarray type of object of shape (10000, 1)

In [11]:
test_dataset = MNISTDataset(t10k_images_np, t10k_labels_np)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Task 9: Making Predictions

In [12]:
# press q to exit
while True:
    # Display the image using OpenCV
    idx = random.randint(0, 9999)
    
    # Display the image using OpenCV
    test_dataset = MNISTDataset(np.expand_dims(t10k_images_np[idx], axis=0), np.expand_dims(t10k_labels_np[idx], axis=0))
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    for image, label in test_loader: # for each item in batch_size
        image = image.view(image.size(0), -1)

        model.eval()
        with torch.no_grad():
            outputs = model(image)
            predicted_label = outputs.argmax(1).tolist()[0]
    
    printable_label = label.numpy()[0]
    printable_predicted_label = predicted_label
    printable_image = image.view(image.size(0), 1, 28, 28).numpy()[0][0]
    # print(printable_label, printable_predicted_label, printable_image.shape)
    #break
    cv2.imshow(f"True Value: {printable_label}, Predicted Value: {printable_predicted_label}", printable_image)
    # Wait for key press and check if 'q' is pressed (ASCII 113)
    if cv2.waitKey(0) & 0xFF == ord('q'):
        break

# Close the OpenCV window
cv2.destroyAllWindows()