In [None]:
# import libraries
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,TensorDataset
import torchvision
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

# Import and process the data

In [None]:
MNIST = torchvision.datasets.MNIST(".", download=True)
data = MNIST.data
labels = MNIST.targets

# Randomly drop samples to shrink the size to 20,000
np.random.seed(42) # Set random seed for reproducibility
indices = np.random.choice(len(data), size=20000, replace=False)
data = data[indices]
labels = labels[indices]

# Reshape data to 2D array
data = data.reshape(data.shape[0], -1)
# Reshape labels to 2D array
labels = labels.reshape(labels.shape[0], -1)

# Create train/test groups using DataLoader

In [None]:
# Step 1: convert to tensor
dataT = torch.tensor(data).float()
labelsT = torch.tensor(labels).long() # long = int64

# Step 2: use scikitlearn to split the data
train_data, test_data, train_labels, test_labels = train_test_split(dataT, labelsT, test_size=.1)
train_data_norm = train_data / torch.max(train_data)
test_data_norm = test_data / torch.max(test_data)

# Step 3: convert into PyTorch Datasets
train_data = TensorDataset(train_data, train_labels)
test_data = TensorDataset(test_data, test_labels)
train_data_norm = TensorDataset(train_data_norm, train_labels)
test_data_norm = TensorDataset(test_data_norm, test_labels)

# Step 4: translate into dataloader objects
batchsize = 32
train_loader = DataLoader(train_data, batch_size=batchsize, shuffle=True, drop_last=True)
test_loader  = DataLoader(test_data, batch_size=test_data.tensors[0].shape[0])
train_loader_norm = DataLoader(train_data_norm, batch_size=batchsize, shuffle=True, drop_last=True)
test_loader_norm  = DataLoader(test_data_norm, batch_size=test_data_norm.tensors[0].shape[0])

In [None]:
print(f"Training data range from {torch.min(train_data.tensors[0])} to {torch.max(train_data.tensors[0])}")
print(f"Test data range from {torch.min(test_data.tensors[0])} to {torch.max(test_data.tensors[0])}")
print(f"Training data NORM range from {torch.min(train_data_norm.tensors[0])} to {torch.max(train_data_norm.tensors[0])}")
print(f"Test data NORM range from {torch.min(test_data_norm.tensors[0])} to {torch.max(test_data_norm.tensors[0])}")

In [None]:
# check all variables in workspace
%whos

# Create the DL model

In [None]:
# create a class for the model
def createTheMNISTNet():

  class mnistNet(nn.Module):
    def __init__(self):
      super().__init__()

      ### input layer
      self.input = nn.Linear(784,64)
      
      ### hidden layer
      self.fc1 = nn.Linear(64,32)
      self.fc2 = nn.Linear(32,32)

      ### output layer
      self.output = nn.Linear(32,10)

    # forward pass
    def forward(self,x):
      x = F.relu( self.input(x) )
      x = F.relu( self.fc1(x) )
      x = F.relu( self.fc2(x) )
      return self.output(x)
  
  # create the model instance
  net = mnistNet()
  
  # loss function
  lossfun = nn.CrossEntropyLoss()

  # optimizer
  optimizer = torch.optim.SGD(net.parameters(),lr=.01)

  return net,lossfun,optimizer

# Create a function that trains the model

In [None]:
# a function that trains the model

def function2trainTheModel(train_loader, test_loader):

  # number of epochs
  numepochs = 60
  
  # create a new model
  net, lossfun, optimizer = createTheMNISTNet()

  # initialize losses
  losses    = torch.zeros(numepochs)
  trainAcc  = []
  testAcc   = []


  # loop over epochs
  for epochi in range(numepochs):

    # loop over training data batches
    batchAcc  = []
    batchLoss = []
    for X, y in train_loader:
      y = torch.flatten(y)

      # forward pass and loss
      yHat = net(X)
      loss = lossfun(yHat,y)

      # backprop
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # loss from this batch
      batchLoss.append(loss.item())

      # compute accuracy
      matches = torch.argmax(yHat,axis=1) == y     # booleans (false/true)
      matchesNumeric = matches.float()             # convert to numbers (0/1)
      accuracyPct = 100*torch.mean(matchesNumeric) # average and x100
      batchAcc.append( accuracyPct )               # add to list of accuracies
    # end of batch loop...

    # now that we've trained through the batches, get their average training accuracy
    trainAcc.append( np.mean(batchAcc) )

    # and get average losses across the batches
    losses[epochi] = np.mean(batchLoss)

    # test accuracy
    X,y = next(iter(test_loader)) # extract X,y from test dataloader
    yHat = net(X)
      
    # compare the following really long line of code to the training accuracy lines
    testAcc.append( 100*torch.mean((torch.argmax(yHat,axis=1)==y).float()) )

  # end epochs

  # function output
  return trainAcc,testAcc,losses,net


# Run the model and show the results!

In [None]:
train_accuracy_experiment0, test_accuracy_experiment0, losses_experiment0, net_experiment0 = function2trainTheModel(train_loader_norm, test_loader_norm)
train_accuracy_experiment1, test_accuracy_experiment1, losses_experiment1, net_experiment1 = function2trainTheModel(train_loader_norm, test_loader)
train_accuracy_experiment2, test_accuracy_experiment2, losses_experiment2, net_experiment2 = function2trainTheModel(train_loader, test_loader_norm)


In [None]:
fig,ax = plt.subplots(1,2,figsize=(16,5))

ax[0].plot(losses_experiment0)
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_ylim([0,3])
ax[0].set_title('Model loss')

ax[1].plot(train_accuracy_experiment0,label='Train')
ax[1].plot(test_accuracy_experiment0,label='Test')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_ylim([10,100])
ax[1].set_title(f'Final model test accuracy: {test_accuracy_experiment0[-1]:.2f}%')
ax[1].legend()

plt.show()

In [None]:
fig,ax = plt.subplots(1,2,figsize=(16,5))

ax[0].plot(losses_experiment1)
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_ylim([0,3])
ax[0].set_title('Model loss')

ax[1].plot(train_accuracy_experiment1,label='Train')
ax[1].plot(test_accuracy_experiment1,label='Test')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_ylim([10,100])
ax[1].set_title(f'Final model test accuracy: {test_accuracy_experiment1[-1]:.2f}%')
ax[1].legend()

plt.show()

In [None]:
fig,ax = plt.subplots(1,2,figsize=(16,5))

ax[0].plot(losses_experiment2)
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss')
ax[0].set_ylim([0,3])
ax[0].set_title('Model loss')

ax[1].plot(train_accuracy_experiment2,label='Train')
ax[1].plot(test_accuracy_experiment2,label='Test')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Accuracy (%)')
ax[1].set_ylim([10,100])
ax[1].set_title(f'Final model test accuracy: {test_accuracy_experiment2[-1]:.2f}%')
ax[1].legend()

plt.show()

# Inspect the results in more detail

In [None]:
# run the model through for the test data
X,y = next(iter(test_loader))
predictions = net(X).detach()

predictions

In [None]:
# Evidence for all numbers from one sample
sample2show = 120

plt.bar(range(10),predictions[sample2show]) # try adding exp!
plt.xticks(range(10))
plt.xlabel('Number')
plt.ylabel('Evidence for that number')
plt.title('True number was %s' %y[sample2show].item())
plt.show()

In [None]:
# find the errors
errors = np.where( torch.max(predictions,axis=1)[1] != y )[0]
print(errors)

# Evidence for all numbers from one sample
sample2show = 10

fig,ax = plt.subplots(1,2,figsize=(14,5))

ax[0].bar(range(10),np.exp(predictions[errors[sample2show]]))
ax[0].set_xticks(range(10))
ax[0].set_xlabel('Number')
ax[0].set_ylabel('Evidence for that number')
ax[0].set_title('True number: %s, model guessed %s' 
                %( y[errors[sample2show]].item(), torch.argmax(predictions[errors[sample2show]]).item() ))

ax[1].imshow( np.reshape(X[errors[sample2show],:],(28,28)) ,cmap='gray')

plt.show()

# Additional explorations

In [None]:
# 1) Average together the correct 7's and the error 7's, and make images of them (that is, one image
#    of all correct 7's and one image of all incorrectly labeled 7's). How do they look?
# 
# 2) Repeat #1 for all numbers to produce a 2x10 matrix of images with corrects on top
#    and errors on the bottom.
# 
# 3) Identify "almost errors," which we can define as correct categorizations that had a probability of
#    e.g., >.1 for any other number. Make images of some of these numbers. Can you understand why the model
#    was confused?
# 
# 4) I didn't use .train(), .eval(), or no_grad() here. Is that a problem? Can you add those in without checking
#    other notebooks?
# 