In [None]:
import matplotlib
import numpy as np
import pandas as pd
from PIL.Image import Image
import torch
from torch import tensor
from torchvision import transforms
from torchvision.datasets import MNIST

In [None]:
matplotlib.rc('image', cmap='Greys')

# configure pandas to print full data sets
pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 1000)
np.set_printoptions(linewidth=200)
torch.set_printoptions(linewidth=200)


### Loading the Images

In [None]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

# Download and load the training and test samples.
train = MNIST('./mnist_data/', download=True, train=True)
test = MNIST('./mnist_data/', download=True, train=False)

print(type(train))
print('Number of samples in the training set', len(train))
print('Number of samples in the test set', len(test))

### Understanding the Samples

In [None]:

# Get a single sample.
sample = train[0]

print('Sample type:', type(sample))
print('Sample length:', len(sample))
print('Image type:', type(sample[0]))
print('Label type:', type(sample[1]))

In [None]:
print('The image below is a:', sample[1])
sample[0]

In [None]:
import sys

# TODO: This produces odd results. Need to understand this better.
print(sys.getsizeof(train))
print(sys.getsizeof(test))

### Exploring the Training Set

In [None]:
def filter_samples_by_label(samples: MNIST, label: int) -> list:
    '''
    The samples parameter will usually be either the training set or the test set.
    label must be an integer between 0 and 9.
    '''
    matches = []
    for sample in samples:
        if sample[1] == label:
            matches.append(sample)
    return matches

In [None]:
fives = filter_samples_by_label(train, 5)

print('Type:', type(fives))
print('Number of fives:', len(fives))

In [None]:
def create_banner_image(samples: list) -> Image:
    '''
    Creates a banner image from a list of MNIST samples.
    '''
    banner = None
    for sample in samples:
        
        # We need to get the image from the MNIST sample.
        sample_image = sample[0]

        if banner is None:  # First sample.
            banner = sample_image
        else:
            # Existing banner width plus the width of the new sample image.
            new_width = banner.size[0] + sample_image.size[0]

            # Height should always be equal.
            new_height = max(banner.size[1], sample_image.size[1])

            # Initialize the new banner.
            new_banner = Image.new('RGBA', (new_width, new_height)) 
            
            #Paste in the existing banner.
            new_banner.paste(banner)

            # Paste the current sample image to the end of the new banner.
            new_banner.paste(sample_image, (banner.size[0], 0))  

            banner = new_banner

    return banner

In [None]:
banner = create_banner_image(fives[0:10])
banner

### Converting from an Image to a Numpy array.

In [None]:
image_array = np.array(sample[0])

print(type(image_array))
print(image_array[4:10,4:10])
print(image_array)

In [None]:
# List comprehension to load all images into an array of tensors.
train_tensors = [tensor(np.array(sample[0])) for sample in train]
test_tensors = [tensor(np.array(sample[0])) for sample in test]

print(type(train_tensors))
print(len(train_tensors))
print(len(test_tensors))

In [None]:
# Create a Pandas dataframe from the tensor so that we can use the DataFrame's style object to 
# make a nice display.
#from fastbook import *
from fastai.vision.all import *
train_tensor = train_tensors[0]
#print(train_tensor)
image_df = pd.DataFrame(train_tensor)
image_df.style.set_properties(**{'font-size':'6pt'}).background_gradient('Greys')


In [None]:
image_df.style

In [None]:
# Turn our tensors into loaders.
train_loader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=False)
test_loader = torch.utils.data.DataLoader(test, batch_size=64, shuffle=False)