https://www.youtube.com/watch?v=aircAruvnKk
https://www.youtube.com/watch?v=IHZwWFHWa-w

In [None]:
%matplotlib inline 
import pickle
import torch
import numpy as np
import PIL.Image
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader

from IPython.display import Image, display
from matplotlib.pyplot import imshow

In [None]:
# Load data from www.openml.org
mnist_path = r'D:/workspace/machine-learning/number-recognition/local_data/mnist.pickle'
with open(mnist_path, 'rb') as f:
        y = pickle.load(f)
        X = pickle.load(f)
print(f"Dataset shapes: X={X.shape}, y={y.shape}")

In [None]:
# Determine device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
index = 250
# Display an image (optional)
# Reshape the 784-pixel flat array into a 28x28 image
image_data = X[index].reshape(28, 28)
plt.imshow(image_data, cmap='gray')
plt.title(f"Label: {y[index]}")
plt.show()

# X contains the image data (as a 2D array of samples x pixels)
# y contains the labels (the actual number for each image)

In [None]:
class NumberRecognition(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc_head = torch.nn.Sequential(
            torch.nn.Linear(784, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 10),
            torch.nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        return self.fc_head(x)

In [None]:
recognition_model = NumberRecognition().to(device)
recognition_model

In [None]:
# Split the data (stratify preserves label proportions)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Ensure labels are integers
y_train = y_train.astype(int)
y_test = y_test.astype(int)

# Convert to torch tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32, device=device)
y_train_t = torch.tensor(y_train, dtype=torch.long, device=device)

X_test_t = torch.tensor(X_test, dtype=torch.float32, device=device)
y_test_t = torch.tensor(y_test, dtype=torch.long, device=device)

# Create datasets and loaders
train_dataset = TensorDataset(X_train_t, y_train_t)
test_dataset = TensorDataset(X_test_t, y_test_t)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Now `train_dataset`, `test_dataset`, `train_loader`, and `test_loader` are available

In [None]:
# demonstration purposes
x, y = next(iter(train_loader))
y_pred = recognition_model(x)
print(y_pred.argmax(dim=1))
print(x)

In [None]:
# train loop
criterion = torch.nn.NLLLoss()
optimizer = torch.optim.Adam(recognition_model.parameters(), lr=0.001)
epoch_losses = []
for epoch in range(100):  # number of epochs
    recognition_model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = recognition_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_losses.append(epoch_loss)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

In [None]:
# Plot training loss
import matplotlib.pyplot as plt
plt.figure(figsize=(8,4))
plt.plot(epoch_losses, marker='o')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.grid(True)
plt.show()

In [None]:
# model accuracy evaluation
recognition_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = recognition_model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
accuracy = correct / total
print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
# model accuracy evaluation
recognition_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in train_loader:
        outputs = recognition_model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
accuracy = correct / total
print(f"Train Accuracy: {accuracy * 100:.2f}%")

In [None]:
# Prediction helpers and examples
import numpy as np
import matplotlib.pyplot as plt
import torch

def predict_index(idx):
    """Predict and show a single test image by index."""
    recognition_model.eval()
    img = X_test[idx]
    inp = torch.tensor(img, dtype=torch.float32, device=device).unsqueeze(0)
    with torch.no_grad():
        out = recognition_model(inp)
        probs = torch.exp(out)  # model uses LogSoftmax
        top_prob, top_idx = probs.topk(1, dim=1)
    pred = int(top_idx[0,0].item())
    prob = float(top_prob[0,0].item())
    plt.imshow(img.reshape(28,28), cmap='gray')
    plt.title(f'Pred: {pred} ({prob:.2f}), True: {int(y_test[idx])}')
    plt.axis('off')
    plt.show()

def show_random_predictions(n=9):
    """Display n random predictions from the test set in a grid."""
    recognition_model.eval()
    n = min(n, len(X_test))
    inds = np.random.choice(len(X_test), n, replace=False)
    cols = int(np.ceil(np.sqrt(n)))
    rows = int(np.ceil(n / cols))
    plt.figure(figsize=(cols*2, rows*2))
    for i, idx in enumerate(inds):
        ax = plt.subplot(rows, cols, i+1)
        img = X_test[idx].reshape(28,28)
        inp = torch.tensor(X_test[idx], dtype=torch.float32, device=device).unsqueeze(0)
        with torch.no_grad():
            out = recognition_model(inp)
            probs = torch.exp(out)
            pred = int(probs.argmax(dim=1).item())
            prob = float(probs[0, pred].item())
        plt.imshow(img, cmap='gray')
        plt.title(f'{pred} ({prob:.2f})\nTrue: {int(y_test[idx])}')
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Examples - call these to visualize predictions:
predict_index(0)
show_random_predictions(10)

In [None]:
# Display all images from X with labels, paginated, 10 images per row
# WARNING: still a lot of output for full dataset; use smaller page_size if desired
import math
import matplotlib.pyplot as plt

n_cols = 10  # 10 images per line
n_rows = 10  # rows per page (adjust to change page size)
page_size = n_cols * n_rows
total = len(X)
for start in range(0, total, page_size):
    end = min(start + page_size, total)
    imgs = X[start:end]
    count = len(imgs)
    rows = math.ceil(count / n_cols)
    plt.figure(figsize=(n_cols * 1.6, rows * 1.6))
    for i in range(count):
        ax = plt.subplot(rows, n_cols, i + 1)
        plt.imshow(imgs[i].reshape(28, 28), cmap='gray')
        plt.title(f'Idx {start + i}  Label: {y[start + i]}')
        plt.axis('off')
    plt.tight_layout()
    plt.show()