In [6]:
import numpy as np
import pandas as pd
from PIL import Image
import torch
from torch import tensor
from torchvision.datasets import MNIST

### Loading the Training and Test sets

In [7]:
# Download and load the training and test samples.
train = MNIST('./mnist_data/', download=True, train=True)
test = MNIST('./mnist_data/', download=True, train=False)

print(type(train))
print('Number of samples in the training set', len(train))
print('Number of samples in the test set', len(test))

<class 'torchvision.datasets.mnist.MNIST'>
Number of samples in the training set 60000
Number of samples in the test set 10000


In [8]:
def load_train_test():
    # Download and load the training and test samples.
    train = MNIST('./mnist_data/', download=True, train=True)
    test = MNIST('./mnist_data/', download=True, train=False)

    X_train, y_train, X_test, y_test = [], [], [], []

    for sample in train:
        X_train.append(torch.tensor(np.array(sample[0]), dtype=torch.float32))
        y_train.append(torch.tensor(sample[1], dtype=torch.int8))

    for sample in test:
        X_test.append(torch.tensor(np.array(sample[0]), dtype=torch.float32))
        y_test.append(torch.tensor(sample[1], dtype=torch.int8))

    X_train = torch.stack(X_train)
    y_train = torch.stack(y_train)
    X_test = torch.stack(X_test)
    y_test = torch.stack(y_test)

    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = load_train_test()

### Grouping the Training Set

In [15]:
def group_samples_by_label(X: tensor, y: tensor) -> list[int]:
    '''
    Group the MNIST dataset by label.
    '''
    #mnist_grouped = {label:[] for label in range(10)}
    # Initialize the list which is a list of lists.
    mnist_grouped = [[] for _ in range(10)]
    for index in range(len(y)):
        mnist_grouped[y[index].item()].append(X[index])

    for label in range(10):
        mnist_grouped[label] = torch.stack(mnist_grouped[label])

    return mnist_grouped

In [16]:
train_grouped = group_samples_by_label(X_train, y_train)

# Investigate the group for zero.
print('Mean tensor:', type(train_grouped[0]))
print('Type of mean tensor:', train_grouped[0].dtype)
print('ndim (or Rank):', train_grouped[0].ndim)
print('Shape:', train_grouped[0].shape)
for label in range(10):
    print(f'{label} count: {len(train_grouped[label])}')

Mean tensor: <class 'torch.Tensor'>
Type of mean tensor: torch.float32
ndim (or Rank): 3
Shape: torch.Size([5923, 28, 28])
0 count: 5923
1 count: 6742
2 count: 5958
3 count: 6131
4 count: 5842
5 count: 5421
6 count: 5918
7 count: 6265
8 count: 5851
9 count: 5949


### Calculating a Per Pixel Average for each Label

In [17]:
def calculate_mean_tensors(train_grouped: list) -> tensor:
    # Initialize the list of mean (average) tensors.
    train_mean = [None for label in range(10)]
    for label in range(10):
        train_mean[label] = train_grouped[label].mean(0)
    return torch.stack(train_mean)

train_mean = calculate_mean_tensors(train_grouped)
    
# Investigate the mean tensor for zero.
print('Mean tensor:', type(train_mean[0]))
print('Type of mean tensor:', train_mean[0].dtype)
print('ndim (or Rank):', train_mean[0].ndim)
print('Shape:', train_mean[0].shape)

Mean tensor: <class 'torch.Tensor'>
Type of mean tensor: torch.float32
ndim (or Rank): 2
Shape: torch.Size([28, 28])


### Exploring the Images of Averages

In [18]:
train_mean_images = [[] for label in range(10)]
for label in range(10):
    train_mean_images[label] = Image.fromarray(torch.round(train_mean[label]).numpy())

train_mean_images[1].show()

In [19]:
def create_banner_image(samples: list) -> Image.Image:
    '''
    Creates a banner image from a list of images.
    '''
    banner = None
    for sample in samples:
        
        if banner is None:  # First sample.
            banner = sample
        else:
            # Existing banner width plus the width of the new sample image.
            new_width = banner.size[0] + sample.size[0]

            # Height should always be equal.
            new_height = max(banner.size[1], sample.size[1])

            # Initialize the new banner.
            new_banner = Image.new('RGBA', (new_width, new_height)) 
            
            #Paste in the existing banner.
            new_banner.paste(banner)

            # Paste the current sample image to the end of the new banner.
            new_banner.paste(sample, (banner.size[0], 0))  

            banner = new_banner

    return banner

In [20]:
banner_image = create_banner_image(train_mean_images)

banner_image.show()

### Calculating Deltas on the Training Set

In [21]:
def mnist_abs_mean(samples: tensor, mean: tensor) -> tensor:
    predictions = []
    for i in range(len(samples)):
        sample = samples[i]
        delta_vector = (sample - mean).abs().mean((-1, -2))
        prediction = delta_vector.argmin()
        predictions.append(prediction)
    return torch.stack(predictions)

predictions = mnist_abs_mean(X_test, train_mean)
comparisons = torch.eq(y_test, predictions)

print('Prediction of first sample:', predictions[0])
print('Label for first sample:', y_test[0])

print('Mean tensor:', type(predictions))
print('Type of mean tensor:', predictions.dtype)
print('ndim (or Rank):', predictions.ndim)
print('Shape:', predictions.shape)

Prediction of first sample: tensor(7)
Label for first sample: tensor(7, dtype=torch.int8)
Mean tensor: <class 'torch.Tensor'>
Type of mean tensor: torch.int64
ndim (or Rank): 1
Shape: torch.Size([10000])


In [22]:
torch.sum(torch.eq(y_test, predictions) == True)/len(y_test)

tensor(0.6685)