In [0]:
# PyTorch imports

import torch

import torchvision
from torchvision import transforms, utils, datasets

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler


# Numpy and matplotlib


import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt


# For file operations
import os

# For logging metrics to csv file
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
DRIVE_DIRECTORY = '/content/gdrive/My Drive/Kaggle-IFT6135'
DRIVE_PATH = DRIVE_DIRECTORY + '/%s'

In [0]:
BATCH_SIZE = 128

In [0]:
def test_model(model):
  x = torch.randn(128,1,28,28)
  y = model(x)
  print(y.size())

In [0]:
def params(model):
  print(sum(p.numel() for p in model.parameters()))

In [0]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(28*28, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x 

In [0]:
class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    
    self.convnet = nn.Sequential(*[
    nn.Conv2d(1, 32, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(32, 64, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(64, 64, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),    
    nn.Conv2d(64, 128, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(128, 128, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),    
    nn.Conv2d(128, 128, kernel_size=3, padding=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),        
    nn.Conv2d(128, 208, kernel_size=3, padding=1),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.AvgPool2d(kernel_size=1, stride=1)])

    self.fc = nn.Linear(208, 10)
    
  def forward(self, x):
    out = self.convnet(x)
    out = out.view(out.size(0), -1)
    out = self.fc(out)
    return out

In [23]:
params(MLP())

669706


In [60]:
params(CNN())

667162


In [64]:
test_model(CNN())

torch.Size([128, 208, 1, 1])
torch.Size([128, 10])


In [0]:
def get_train_valid_loaders(batch_size=BATCH_SIZE, split_ratio = 5/6, transform = transforms.ToTensor(), shuffle = True):
 
    train_dataset = datasets.MNIST(root=".", train=True, transform=transform, download=True)
    test_dataset = datasets.MNIST(root=".", train=False, transform=transform, download=True)

    num_samples = len(train_dataset)
    indices = list(range(num_samples))
    split_index = int(np.floor(split_ratio * num_samples))
    
    if shuffle:
        np.random.seed(5)
        np.random.shuffle(indices)

    train_index, valid_index = indices[:split_index], indices[split_index:]
    train_sampler = SubsetRandomSampler(train_index)
    valid_sampler = SubsetRandomSampler(valid_index)
    
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler,
        num_workers=1, pin_memory=True,
    )
    
    valid_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=valid_sampler,
        num_workers=1, pin_memory=True,
    )
    
    print(" Training set  : {} batches = {} samples".format(len(train_loader),len(train_index)))
    print("Validation set : {} batches = {} samples".format(len(valid_loader),len(valid_index)))
    
    return train_loader, valid_loader   

In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [0]:
def evaluate_metrics(model,data_loader,criterion):
  
  correct = 0.
  total = 0.
  
  loss = 0.
  
  model.eval()
  with torch.no_grad():
    for data in data_loader:
      
      images, labels = data
      outputs = model(images.to(device))
      
      mini_batch_size = labels.size(0)
      
      loss += mini_batch_size * criterion(outputs, labels.to(device))
                
      _, predicted = torch.max(outputs.data, 1)
      total += mini_batch_size
      correct += (predicted == labels.to(device)).sum().item()
      
  loss = loss/total
  accuracy = correct/total  
  
  return loss, accuracy

In [0]:
def train(experiment_name, train_loader, valid_loader, model, optimizer, criterion, num_epochs,start_epoch):
        
    torch.cuda.manual_seed(10)
    
    model = model.to(device)
    criterion = criterion.to(device)
    #optimizer = optim.SGD(model.parameters(), lr=0.001) 
    

    train_accuracy = []
    valid_accuracy = []
    train_loss = []
    valid_loss = []
    
    state_buffer = []
    
    print("Training for epochs #", num_epochs)
    
    for epoch in range(start_epoch,start_epoch + num_epochs):

      # Training set    
            
        model.train()
        for i, data in enumerate(train_loader, 0):
            
            inputs, labels = data
            optimizer.zero_grad()
            
            
            
            outputs = model(inputs.to(device))
            
            loss = criterion(outputs, labels.to(device))
            loss.backward()
            optimizer.step()
            
            
        loss_metric, accuracy_metric = evaluate_metrics(model,train_loader,criterion)
        
        train_accuracy.append(accuracy_metric)
        train_loss.append(loss_metric)
        
        # Validation set 
        
        loss_metric, accuracy_metric = evaluate_metrics(model,valid_loader,criterion)
        
                
        valid_accuracy.append(accuracy_metric)
        valid_loss.append(loss_metric)
        
        
        print("======================================================================")
        print('Epoch:', epoch)
        print(' Training  :: Accuracy = %.4f, Loss = %.6f' % (train_accuracy[-1]*100, train_loss[-1]))
        print('Validation :: Accuracy = %.4f, Loss = %.6f' % (valid_accuracy[-1]*100, valid_loss[-1]))
        
        state = {
           "epoch" : epoch,
           "train_accuracy" : train_accuracy[-1]*100,
           "valid_accuracy" : valid_accuracy[-1]*100,
           "train_loss" : train_loss[-1],
           "valid_loss" : valid_loss[-1],            
        }
        
        state_buffer.append(state)
        
        if epoch%5 == 0 or epoch == num_epochs-1 :
          path = os.path.join("{}/models/{}".format(DRIVE_DIRECTORY,experiment_name), "{}-e{}.pth".format(experiment_name,epoch))
          torch.save(model.state_dict(), path)
          
          df = pd.DataFrame(state_buffer)
          
          state_file_name = '{}/results/{}.csv'.format(DRIVE_DIRECTORY,experiment_name)
          
          if not os.path.isfile(state_file_name):
            df.to_csv(state_file_name, index = False)
          else: 
            df.to_csv(state_file_name, mode='a', header=False, index = False)
                      
          state_buffer.clear()
          
        
    return model #, train_accuracy, train_loss, valid_accuracy, valid_loss   

In [0]:
def run_experiment(name, learning_rate, num_epochs, model, batch_size = BATCH_SIZE,start_epoch=0):
  
  experiment_name = "%s[lr=%.3f][e=%d][b=%d]" % (name,learning_rate,num_epochs,batch_size)
  
  print("Running experiment : ",experiment_name)
  
  model_directory_name = "{}/models/{}".format(DRIVE_DIRECTORY,experiment_name)
  
  if not os.path.exists(model_directory_name):
        os.mkdir(model_directory_name)
  
  train_loader, valid_loader = get_train_valid_loaders(batch_size = batch_size)
  
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(), lr=learning_rate)     
  
  model = train(experiment_name, train_loader, valid_loader, model, optimizer, criterion, num_epochs,start_epoch)

  return model

In [72]:
model = run_experiment("CNN",0.1,50,CNN())

Running experiment :  CNN[lr=0.100][e=50][b=128]
 Training set  : 391 batches = 50000 samples
Validation set : 79 batches = 10000 samples
Training for epochs # 50
Epoch: 0
 Training  :: Accuracy = 11.1980, Loss = 2.301305
Validation :: Accuracy = 11.4300, Loss = 2.301816
Epoch: 1
 Training  :: Accuracy = 11.1980, Loss = 2.301283
Validation :: Accuracy = 11.4300, Loss = 2.301444
Epoch: 2
 Training  :: Accuracy = 11.1980, Loss = 2.301166
Validation :: Accuracy = 11.4300, Loss = 2.301195
Epoch: 3
 Training  :: Accuracy = 11.1980, Loss = 2.301083
Validation :: Accuracy = 11.4300, Loss = 2.301383
Epoch: 4
 Training  :: Accuracy = 11.1980, Loss = 2.300903
Validation :: Accuracy = 11.4300, Loss = 2.301342
Epoch: 5
 Training  :: Accuracy = 11.1980, Loss = 2.300483
Validation :: Accuracy = 11.4300, Loss = 2.301093
Epoch: 6
 Training  :: Accuracy = 24.1860, Loss = 2.273721
Validation :: Accuracy = 24.2100, Loss = 2.273932
Epoch: 7
 Training  :: Accuracy = 71.3780, Loss = 0.862056
Validation :: A