In [2]:
import torch
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn as nn
import numpy as np
from sklearn.model_selection import KFold
import torch.utils.data as data
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset, Subset
import matplotlib.pyplot as plt

Discussion on data splits:
1. Important for train and val splits to be separate from test split (we want the test data to be completely unseen to the model before evaluation) --> avoids overfitting, CV can be used to train hyperparameters
2. Created batches for training and testing of data

In [None]:
class Net(nn.Module):
  """
  Build the best MNIST classifier.
  """
  def __init__(self, dropout=0.4):
    super(Net, self).__init__()
    self.layer1 = nn.Sequential(
      nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
      nn.ReLU(),
      nn.BatchNorm2d(16),
      nn.MaxPool2d(kernel_size=2, stride=2))
    self.layer2 = nn.Sequential(
        nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
        nn.ReLU(),
        nn.BatchNorm2d(32),
        nn.MaxPool2d(kernel_size=2, stride=2))
    self.fc = nn.Linear((7**2)*32, 10)
    self.softmax = nn.Softmax(dim=1)
    self.dropout = nn.Dropout(dropout)
    self.criterion = nn.CrossEntropyLoss()

  def forward(self, x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = out.reshape(out.size(0), -1)
    out = self.dropout(out)
    out = self.fc(out)
    return out

  def train(self, device, train_loader, optimizer, epoch, log_interval):
    """
    This is your training function. When you call this function, the model is trained for 1 epoch.
    """
    self.to(device)
    loss_history = []
    for i, data in enumerate(train_loader):
      if i%log_interval == 0:
        print('.', end="")
      images, labels = data
      images = images.float().to(device)
      labels = labels.to(device)
      optimizer.zero_grad()
      output = self(images)
      loss = self.criterion(output, labels)
      loss_history.append(loss)
      loss.backward()
      optimizer.step()
    return loss_history

  def test(self, device, test_loader):
    test_total = 0
    test_correct = 0
    val_loss = 0
    val_acc = 0
    self.to(device)
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            # forward pass
            output = self(images)
            # find accuracy
            _, predicted = torch.max(output.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
            # find loss
            loss = self.criterion(output, labels)
            val_loss += loss.item()
        val_loss /= len(test_loader)
        val_acc = test_correct / test_total
    return val_loss, val_acc

  def predict(self, x):
    with torch.no_grad():
      output = model(x.unsqueeze(0).unsqueeze(0).float())
      prediction = torch.max(output.data, 1)
      return int(prediction[1])

In [10]:

data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.ColorJitter(),
    transforms.RandomAffine(0),
    transforms.ElasticTransform(),
    transforms.Normalize((0.1307,), (0.3081,))
])
mnist = datasets.MNIST('data/', train=True, download=True)

18.2%

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz


100.0%
91.4%

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%
100.0%


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [None]:
from os import access
# Feel free to change these pre-sets and experiment with different values.
# Set random seed.
seed = 33
# Set batch size.
batch_size = 64
# Set learning rate
lr = 1.0
# Set total number of epochs
epochs = 10
# Set other hyperparameters of your choice.

torch.manual_seed(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Pytorch has default MNIST dataloader which loads data at each iteration
data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.ColorJitter(),
    transforms.RandomAffine(0),
    transforms.ElasticTransform(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_data = datasets.MNIST('data/', train=True, download=True, transform=data_transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

# Splitting the dataset for k-fold CV.
n_folds = 5
folds = kf = KFold(n_splits=n_folds)

# Setting up the model with an optimizer.
model = Net()
optimizer = optim.RMSprop(model.parameters())
log_interval = 150

# Training loop
train_loss_history = []
train_acc_history = []
for epoch in range(1, epochs+1):
    print(f'\nEpoch {epoch}: ', end="")
    train_loss = []
    train_acc = []
    for i, (train_idx, val_idx) in enumerate(folds.split(train_data)):
      print(f'\n\tfold {i+1}:', end="")
      train = Subset(train_data, train_idx)
      train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
      val = Subset(train_data, val_idx)
      val_loader = DataLoader(val, batch_size=batch_size, shuffle=True)

      loss_history = model.train(device, train_loader, optimizer, epoch, log_interval)
      loss, acc = model.test(device, val_loader)
      train_loss.append(loss)
      train_acc.append(acc)
    avg_loss = np.sum(train_loss)/len(train_loss)
    avg_acc = np.sum(train_acc)/len(train_acc)
    print(f'\nLoss: {avg_loss}, Accuracy: {avg_acc}\n')
    train_loss_history.append(avg_loss)
    train_acc_history.append(avg_acc)

    # You may optionally save your model at each epoch here
    torch.save(model.state_dict, f'MNIST_Epoch {epoch}')

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 99339606.00it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 29538086.77it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 25525263.23it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 5104643.29it/s]


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw


Epoch 1: 
	fold 1:.....
	fold 2:.....
	fold 3:.....
	fold 4:.....
	fold 5:.....
Loss: 0.12557810144573253, Accuracy: 0.9619166666666666


Epoch 2: 
	fold 1:.....
	fold 2:.....
	fold 3:.....
	fold 4:.....
	fold 5:.....
Loss: 0.09568356866118537, Accuracy: 0.9716833333333333


Epoch 3: 
	fold 1:.....
	fold 2:..

In [None]:
plt.figure()
plt.plot(range(epochs), train_loss_history)
plt.title('Training Loss over Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.savefig('loss_by_epoch')
plt.figure()
plt.plot(range(epochs), train_acc_history)
plt.title('Training Accuracy over Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (Number Correct/1)')
plt.savefig('accuracy_by_epoch')

In [None]:
test_dataset = datasets.MNIST('data/', train=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ]))
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
loss, acc = model.test(device, test_loader)
print(f'Model loss: {loss}\nModel accuracy: {acc}')

In [None]:
predictions = []
outputs = []
for image in test_dataset.data:
  with torch.no_grad():
      image = image.to(device)
      output = model(image.unsqueeze(0).unsqueeze(0).float())
      prediction = torch.max(output.data, 1)
      y = int(prediction[1].clone().detach())
      predictions.append(y)
      outputs.append(output.data.squeeze(0).cpu().clone().detach().numpy())
predictions = np.array(predictions)
outputs = np.array(outputs)

In [None]:
from sklearn.metrics import precision_recall_curve

def plot_precision_recall(recalls, precisions, label, c):
    plt.plot(recalls, precisions, linewidth=2, color = c, label=label)
    plt.xlabel("Recall", fontsize=14)
    plt.ylabel("Precision", fontsize=14)
    plt.axis([0,1,0,1])

colors = ['#0000ff', '#3399ff', '#66ccff', '#99ccff', '#ccccff', '#808080', '#ffcccc', '#ff9999', '#ff6666', '#ff0000']
plt.figure(figsize=(7,5))
plt.title("Precision-Recall All Classes", fontsize=17)
for num in range(0, 10):
  y_true = np.array([True if x==num else False for x in test_dataset.targets])
  y_scores = outputs[:, num]
  precisions, recalls, thresholds = precision_recall_curve(y_true.ravel(), y_scores.ravel())
  plot_precision_recall(recalls, precisions, f"Class {num}", colors[num])
plt.legend(fontsize=12)
plt.savefig('Precision_Recall')
plt.show()

In [None]:
import random
misclassified = dict()
for i in range(len(test_dataset.targets)):
  if(test_dataset.targets[i] != predictions[i]):
    if test_dataset.targets[i].item() not in misclassified.keys():
      misclassified[test_dataset.targets[i].item()] = []
    misclassified[test_dataset.targets[i].item()].append((test_dataset.data[i], predictions[i]))

for k in range(10):
  print(f'\n# Misclassified points for key {k}: {len(misclassified[k])}')
  fig, ax = plt.subplots(1, 3)
  for i, v in enumerate(random.sample(misclassified[k], 3)):
    ax[i].imshow(v[0], cmap = 'gray')
    ax[i].title(f'Classified as {v[1]}')
  plt.title(f'Target: {k}')
  plt.savefig(f'Misclassified_{k}')
  plt.show()

In [None]:
from sklearn import metrics
confusion_matrix = metrics.confusion_matrix(test_dataset.targets, torch.tensor(predictions))
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix)
cm_display.plot()
plt.savefig('Confusion_Matrix')
plt.show()

In [None]:
def last_layer(model, x):
  x = x.to(device)
  model = model.to(device)
  out = model.layer1(x)
  out = model.layer2(out)
  out = out.reshape(out.size(0), -1)
  out = model.dropout(out)
  features = out  # store the features before the final layer
  out = model.fc(out)
  return out, features

import numpy as np

features_all = np.empty((0, 7*7*32))
labels_all = np.empty(0)

with torch.no_grad():
    for images, labels in test_loader:
        outputs, features = last_layer(model, images)
        features_all = np.vstack((features_all, features.cpu().numpy()))
        labels_all = np.append(labels_all, labels.numpy())


from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

tsne = TSNE(n_components=2, random_state=42)
features_tsne = tsne.fit_transform(features_all)

In [None]:
plt.figure(figsize=(10, 10))
for i in range(10):
    plt.scatter(features_tsne[labels_all==i, 0], features_tsne[labels_all==i, 1], label=str(i), color=colors[i])
plt.legend()
plt.savefig('tSNE')
plt.show()