In [1]:
%pylab inline

!pip install pycm livelossplot

from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedShuffleSplit

from livelossplot import PlotLosses
from pycm import *

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import MNIST


def set_seed(seed):
    """
    Use this to set ALL the random seeds to a fixed value and take out any randomness from cuda kernels
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.benchmark = False  ##uses the inbuilt cudnn auto-tuner to find the fastest convolution algorithms. -
    torch.backends.cudnn.enabled   = False

    return True

device = 'cpu'
if torch.cuda.device_count() > 0 and torch.cuda.is_available():
    print("Cuda installed! Running on GPU!")
    device = 'cuda'
else:
    print("No GPU available!")

Populating the interactive namespace from numpy and matplotlib


ModuleNotFoundError: No module named 'torch'

Pytorch is a more or less drop-in replacement to numpy functionality but with added automatic differentiation. Here's some basic torch.Tensor operations.

In [None]:
set_seed(42)

#A scalar value
a = torch.Tensor(1)
print(a, a.item())

b = torch.zeros_like(a)
print(b, b.item())

c = torch.from_numpy(np.array(range(42)))
print(c)

print(c.numpy())

In [None]:
m = torch.randn(1, 1, 28, 28)
print(m.size())

In [None]:
plt.imshow(m[0, 0])
plt.colorbar()

In [None]:
device = 'cpu'
if torch.cuda.device_count() > 0 and torch.cuda.is_available():
    print("Cuda installed! Running on GPU!")
    device = 'cuda'
else:
    print("No GPU available!")
    
tensor_on_device = torch.ones(1).to(device)
print(tensor_on_device.device)


Load the MNIST dataset

In [None]:
mnist_train = MNIST("./", download=True, train=True)
mnist_test = MNIST("./", download=True, train=False)

Create splits of data

In [None]:
shuffler = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42).split(mnist_train.train_data, mnist_train.train_labels)
indices = [(train_idx, validation_idx) for train_idx, validation_idx in shuffler][0]

In [None]:
def apply_normalization(X):
  X /= 255.
  X -= 0.1307
  X /= 0.3081
  return X

Normalise the data

In [None]:
X_train, y_train = apply_normalization(mnist_train.train_data[indices[0]].float()), mnist_train.train_labels[indices[0]]
X_val, y_val = apply_normalization(mnist_train.train_data[indices[1]].float()), mnist_train.train_labels[indices[1]]
X_test, y_test =  apply_normalization(mnist_test.test_data.float()), mnist_test.test_labels

Create train, test and validation sets

In [None]:
mnist_train = TensorDataset(X_train, y_train.long())
mnist_validate = TensorDataset(X_val, y_val.long())
mnist_test = TensorDataset(X_test, y_test.long())

Let's verify our data

In [None]:
plt.imshow(X_train[0])
print(X_train.mean(), X_train.std())

### Create a SimpleNet as a nn.Module
Create a simple feed-forward neural network with the following architecture:
- Input Layer: 28*28 neurons
- Hidden Layer: 25 neurons
- Output Layer: 10 neurons
- Activation: ReLU

In [None]:
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.linear_1 = nn.Linear(28*28, 25)
        self.linear_2 = nn.Linear(25, 10)
        self.activation = nn.ReLU()
        
    def forward(self, x):
        z1 = self.linear_1(x)
        a1 = self.activation(z1)
        z2 = self.linear_2(a1)
        return z2

Sanity checks

In [None]:
model = SimpleNet().to(device)
criterion = nn.CrossEntropyLoss()

X_ = X_train[0].view(-1, 28*28).to(device)
y_ = torch.zeros((1)).to(device).long()

a2 = model(X_)
loss = criterion(a2, y_)

y_pred = F.log_softmax(a2, dim=1).max(1)[1]

print(F.log_softmax(a2, dim=1))
print(y_pred)

### Create simple train function
The function should perform the following tasks:
- Set the model into training mode
- Iterate over all the mini-batches
- Send the batches to the GPU / CPU
- Zero all the gradients
- Perform the forward-pass
- Compute the loss
- Perform the backward-pass
- Keep a running measure of training loss and accuracy
- Perform a step of gradient-descent
- Once done with all batches, return average training loss and accuracy

In [None]:
def train(model, optimizer, criterion, data_loader):
    model.train()
    train_loss, train_accuracy = 0, 0
    for X, y in data_loader:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        a2 = model(X.view(-1, 28*28))
        loss = criterion(a2, y)
        loss.backward()
        train_loss += loss*X.size(0)
        y_pred = F.log_softmax(a2, dim=1).max(1)[1]
        train_accuracy += accuracy_score(y.cpu().numpy(), y_pred.detach().cpu().numpy())*X.size(0)
        optimizer.step()  
        
    return train_loss/len(data_loader.dataset), train_accuracy/len(data_loader.dataset)

### Create simple validate function
The function should perform the following tasks:
- Set the model into evaluation mode
- Iterate over all the mini-batches
- Send the batches to the GPU / CPU
- Perform the forward-pass
- Compute the loss
- Keep a running measure of validation loss and accuracy
- Once done with all batches, return average validation loss and accuracy

In [None]:
def validate(model, criterion, data_loader):
    model.eval()
    validation_loss, validation_accuracy = 0., 0.
    for X, y in data_loader:
        with torch.no_grad():
            X, y = X.to(device), y.to(device)
            a2 = model(X.view(-1, 28*28))
            loss = criterion(a2, y)
            validation_loss += loss*X.size(0)
            y_pred = F.log_softmax(a2, dim=1).max(1)[1]
            validation_accuracy += accuracy_score(y.cpu().numpy(), y_pred.cpu().numpy())*X.size(0)
            
    return validation_loss/len(data_loader.dataset), validation_accuracy/len(data_loader.dataset)

Hyperparameters

In [None]:
seed = 42
lr = 1e-2
momentum = 0.9
batch_size = 64
test_batch_size = 1000
n_epochs = 30

In [None]:
set_seed(seed)
model = SimpleNet().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)
criterion = nn.CrossEntropyLoss()

In [None]:
train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0)
validation_loader = DataLoader(mnist_validate, batch_size=test_batch_size, shuffle=False, num_workers=0)
test_loader = DataLoader(mnist_test, batch_size=test_batch_size, shuffle=False, num_workers=0)

In [None]:
liveloss = PlotLosses()
for epoch in range(30):
    logs = {}
    train_loss, train_accuracy = train(model, optimizer, criterion, train_loader)

    logs['' + 'log loss'] = train_loss.item()
    logs['' + 'accuracy'] = train_accuracy.item()
    
    validation_loss, validation_accuracy = validate(model, criterion, validation_loader)
    logs['val_' + 'log loss'] = validation_loss.item()
    logs['val_' + 'accuracy'] = validation_accuracy.item()
    
    liveloss.update(logs)
    liveloss.draw()

This model may not be the best. Let's do some hyperparameter optimisation. 

In [None]:
def train_model(momentum):
  set_seed(seed)
  model = SimpleNet().to(device)
  optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)
  criterion = nn.CrossEntropyLoss()
  
  train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0)
  validation_loader = DataLoader(mnist_validate, batch_size=test_batch_size, shuffle=False, num_workers=0)
  test_loader = DataLoader(mnist_test, batch_size=test_batch_size, shuffle=False, num_workers=0)
  
  liveloss = PlotLosses()
  for epoch in range(30):
      logs = {}
      train_loss, train_accuracy = train(model, optimizer, criterion, train_loader)

      logs['' + 'log loss'] = train_loss.item()
      logs['' + 'accuracy'] = train_accuracy.item()

      validation_loss, validation_accuracy = validate(model, criterion, validation_loader)
      logs['val_' + 'log loss'] = validation_loss.item()
      logs['val_' + 'accuracy'] = validation_accuracy.item()

      liveloss.update(logs)
      liveloss.draw()
      
  return True

In [None]:
train_model(0.1)

In [None]:
train_model(0.5)

In [None]:
train_model(0.9)

In [None]:
def evaluate(model, data_loader):
    model.eval()
    ys, y_preds = [], []
    for X, y in data_loader:
        with torch.no_grad():
            X, y = X.to(device), y.to(device)
            a2 = model(X.view(-1, 28*28))
            y_pred = F.log_softmax(a2, dim=1).max(1)[1]
            ys.append(y.cpu().numpy())
            y_preds.append(y_pred.cpu().numpy())
            
    return np.concatenate(y_preds, 0),  np.concatenate(ys, 0)

y_pred, y_gt = evaluate(model, validation_loader)

In [None]:
cm = ConfusionMatrix(actual_vector=y_gt, predict_vector=y_pred) # Create CM From Data
print(cm)

Retrain on full dataset

In [None]:
mnist_train = MNIST("./", download=True, train=True)

X_train, y_train = apply_normalization(mnist_train.train_data.float()), mnist_train.train_labels
mnist_train = TensorDataset(X_train, y_train)
train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4)

set_seed(seed)
model = SimpleNet().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=best_momentum)
criterion = nn.CrossEntropyLoss()

liveloss = PlotLosses()
for epoch in range(n_epochs):
    logs = {}
    train_loss, train_accuracy = train(model, optimizer, criterion, train_loader)

    logs['' + 'log loss'] = train_loss.item()
    logs['' + 'accuracy'] = train_accuracy.item()
    liveloss.update(logs)
    liveloss.draw()

test_loss, test_accuracy = validate(model, criterion, test_loader)    
print("Avg. Test Loss: %1.3f" % test_loss.item(), " Avg. Test Accuracy: %1.3f" % test_accuracy.item())
print("")

Exercise: 
- Tune the learning rate
- Add one more layer