### First cell can be skipped if you are not working with: https://colab.research.google.com/

In [1]:
!pip3 install http://download.pytorch.org/whl/cu90/torch-0.3.1-cp36-cp36m-linux_x86_64.whl 
!pip3 install torchvision



In [1]:
import torch
import math
import os

from torch import optim
from torch import Tensor
from torch.autograd import Variable
from torch import nn
from torchvision import datasets
import torchvision.transforms as transforms
import seaborn as sns
from sklearn.model_selection import KFold
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
def create_mnist_model():
  return nn.Sequential(
      nn.Linear(784, 100),
      nn.ReLU(),
      nn.Linear(100, 10)
  )

def train_model_sgd(model, train_input, train_target, nb_epochs = 150, mini_batch_size = 100, lr = 1e-1):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(), lr)

  for e in range(0, nb_epochs):
      for b in range(0, train_input.size(0), mini_batch_size):
          output = model(train_input.narrow(0, b, mini_batch_size))
          loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
          model.zero_grad()
          loss.backward()
          optimizer.step()

def train_model_adam(model, train_input, train_target, nb_epochs = 150, mini_batch_size = 100, lr = 1e-3, beta1 = 0.9, beta2 = 0.999):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr, betas = (beta1,beta2))

  for e in range(0, nb_epochs):
      for b in range(0, train_input.size(0), mini_batch_size):
          output = model(train_input.narrow(0, b, mini_batch_size))
          loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
          model.zero_grad()
          loss.backward()
          optimizer.step()

def train_model_amsgrad(model, train_input, train_target, nb_epochs = 150, mini_batch_size = 100, lr = 1e-3, beta1 = 0.9, beta2 = 0.999):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr, betas = (beta1,beta2), amsgrad = True)

  for e in range(0, nb_epochs):
      for b in range(0, train_input.size(0), mini_batch_size):
          output = model(train_input.narrow(0, b, mini_batch_size))
          loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
          model.zero_grad()
          loss.backward()
          optimizer.step()
          
def compute_prc_errors(model, data_input, data_target, mini_batch_size = 100, test_prc = False):
  nb_data_errors = 0
  
  for b in range(0, data_input.size(0), mini_batch_size):
      output = model(data_input.narrow(0, b, mini_batch_size))
      _, predicted_classes = torch.max(output.data, 1)
      for k in range(0, mini_batch_size):
          if data_target.data[b + k] != predicted_classes[k]:
              nb_data_errors = nb_data_errors + 1
              
  # compute error rate for train/test set depending on flag test_prc
  if (test_prc):
    percentage = nb_data_errors/test_input.size(0) * 100
  else:
    percentage = nb_data_errors/train_input.size(0) * 100
  return percentage

def print_errors(mini_batch_size = 100):
  train_percent = compute_prc_errors(model, train_input, train_target,  mini_batch_size, test_prc = False)
  print('train error = {:0.2f}%'.format(train_percent))
  test_percent = compute_prc_errors(model, test_input, test_target, mini_batch_size, test_prc = True)
  print('test error = {:0.2f}%'.format(test_percent))

In [4]:
def get_dataset():
  
  root = './data'
  if not os.path.exists(root):
    os.mkdir(root)

  mnist_train_set = datasets.MNIST(root = root, train = True, download = True)
  mnist_test_set = datasets.MNIST(root = root, train = False, download = True)
  return mnist_train_set, mnist_test_set


In [5]:
def get_data():
  
  root = './data'
  if not os.path.exists(root):
    os.mkdir(root)

  #trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
  mnist_train_set = datasets.MNIST(root = root, train = True, download = True) #, transform = trans
  mnist_test_set = datasets.MNIST(root = root, train = False, download = True) # , transform = trans

  train_input = mnist_train_set.train_data.view(mnist_train_set.train_data.size(0),-1).float()
  train_target = mnist_train_set.train_labels
  test_input = mnist_test_set.test_data.view(mnist_test_set.test_data.size(0),-1).float()
  test_target = mnist_test_set.test_labels
  
  return train_input, train_target, test_input, test_target

Load the train and test data for MNIST dataset, normalize

In [6]:
train_input, train_target, test_input, test_target = get_data()
# normalize the data
mean, std = train_input.mean(), train_input.std()
train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)

# converting Tensors into Variables before using themin model
train_input = Variable(train_input)
train_target = Variable(train_target)
test_input = Variable(test_input)
test_target = Variable(test_target)

In [20]:
kf = KFold(n_splits = 5, shuffle=True)

In [21]:
from sklearn.model_selection import KFold
from torch.utils.data.sampler import SubsetRandomSampler

KFold(n_splits=5, random_state=None, shuffle=True)

In [22]:
def train_model_kfold_sgd(model, train_dataset, kfold=5, shuffle=True, nb_epochs = 150, mini_batch_size = 100, lr = 1e-1):
    
    #Defining Criterion
    criterion = nn.CrossEntropyLoss()
    
    if torch.cuda.is_available():
     #   train_dataset = train_dataset.cuda()
        criterion = nn.CrossEntropyLoss().cuda()
        
    #Defining optimizer
    optimizer = optim.SGD(model.parameters(), lr)
    #optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(beta1, beta2))
    
    kf = KFold(n_splits = kfold, shuffle=shuffle)

    # Define vectors to store results for each fold
    train_loss_fold = []
    val_loss_fold = []
    train_acc_fold = []
    val_acc_fold = []

    for train_index, val_index in kf.split(train_dataset.train_data):  
        
        train_sampler = SubsetRandomSampler(train_index)
        val_sampler = SubsetRandomSampler(val_index)

        train_loader = torch.utils.data.DataLoader(train_dataset, sampler=train_sampler, drop_last=False)
        #batch_size=mini_batch_size,
        val_loader = torch.utils.data.DataLoader(train_dataset, sampler=val_sampler, drop_last=False)

        #Defining model
        model = create_mnist_model()
        if torch.cuda.is_available():
            model = model.cuda()
        #train_dataset.input, val_dataset.input = train_input.cuda(), train_target.cuda(),test_input.cuda(), test_target.cuda()

        # Store loss and accuracy for each epoch
        train_e_loss = []
        val_e_loss = []
        train_e_acc = []
        val_e_acc = []

        for epoch in range(0, nb_epochs):
            # for each epoch calculate train loss, accuracy
            train_loss, train_acc = train_model(train_loader, model, criterion, optimizer)

            # Store them in list to be able to plot
            train_e_loss.append(train_loss)
            train_e_acc.append(train_acc)

            # Evaluate validation loss and accuracy
            val_loss, val_acc = validate_model(val_loader, model, criterion)

            # Store them in the list to be able to plot
            val_e_loss.append(val_loss)
            val_e_acc.append(val_acc)

    # for k-fold sets, store loss and accuracy through epochs 
    train_loss_kfold.append(train_e_loss)
    val_loss_kfold.append(val_e_loss)
    train_acc_kfold.append(train_e_acc)
    val_acc_kfold.append(val_e_acc)

    return train_loss_kfold, val_loss_kfold, train_acc_kfold, val_acc_kfold

def train_model(train_loader, model, criterion, optimizer):
    # Set model for training
    model.train()
    
    # Initialize counters to 0
    nb_correct = 0
    nb_elem = 0
    loss_epoch = 0
    
    # Iterate over batches
    for i, (train_data, train_labels) in enumerate(train_loader):
        # Create Variable
        inputs = Variable(train_data)
        targets = Variable(train_labels)
        if torch.cuda.is_available():
            inputs = inputs.cuda()
            targets = targets.cuda()

        # Clear gradients
        model.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Predicted labels (the one with highest probability)
        pred_label = outputs.data.max(1)[1]

         # Compute and store the loss
        loss = criterion(outputs, targets)
        loss_epoch += loss.data[0]
        
        # Update nb. correct and nb of elem
        nb_correct += (pred_label == targets.data).sum()
        nb_elem += len(pred_label)

        # Backward pass
        loss.backward()
        optimizer.step()
    
    loss_epoch /=nb_elem
    acc_epoch = nb_correct/nb_elem
    return loss_epoch, acc_epoch


def validate_model(val_loader, model, criterion):
    # Switch to evaluate mode
    model.eval()

    # Initialize counters
    nb_correct = 0
    nb_elem = 0
    val_loss = 0
    
    # Iterate over batches
    for i, (val_data, val_labels) in enumerate(val_loader):
        # Create Variable
        inputs = Variable(val_data)
        targets = Variable(val_labels)
        if torch.cuda.is_available():
          inputs = inputs.cuda()
          targets = targets.cuda()
        
        # Obtain predictions
        outputs = model(inputs)
        
        # Predicted label (highest probability)
        pred_label = scores.data.max(1)[1]

        # Loss
        loss = criterion(scores, targets)
        loss_epoch += loss.data[0]

        # Update nb. correct and nb. total
        nb_correct += (pred_label == targets.data).sum()
        nb_elem += len(pred_label)
        
    loss_epoch/=nb_elem
    acc_epoch = nb_correct/nb_elem
    return test_loss, acc_epoch

Function for plotting

In [8]:
import seaborn as sns
def plot_acc_loss(tr_loss, te_loss, tr_acc, te_acc, title="MNIST"):   
    plt.figure()
    title="MNIST loss"
    sns.tsplot(np.array(tr_loss)).set_title(title)
    sns.tsplot(np.array(te_loss), color = 'r')
    plt.legend(['Train', 'Validation'])
    plt.xlabel('Epoch')
    plt.ylabel('Loss')

    plt.figure()
    title="MNIST accuracy"
    sns.tsplot(np.array(tr_acc)).set_title(title)
    sns.tsplot(np.array(te_acc), color = 'r')
    plt.legend(['Train', 'Validation'])
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')

Train the model with specific set of parameters, i.e lr=0.1, , it takes so much time, like 2 min GPU ![alt text](https://)

In [9]:
model = create_mnist_model()
if torch.cuda.is_available():
  model = model.cuda()
  train_input, train_target,test_input, test_target= train_input.cuda(), train_target.cuda(),test_input.cuda(), test_target.cuda()
nb_epochs = 150
mini_batch = 100
lr = 1e-1

train_model_sgd(model, train_input, train_target, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = lr)

Print error rate on train and test set

In [10]:
# nb_epochs = 150, mini_batch = 100, lr = 1e-1
print_errors(mini_batch)

train error = 0.00%
test error = 1.90%


Train with new learning rates

In [11]:
# nb_epochs = 150, mini_batch = 100, lr = 1e-2
lr = 1e-2
train_model_sgd(model, train_input, train_target, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = lr)
print_errors(mini_batch)

train error = 0.00%
test error = 1.88%


In [12]:
# nb_epochs = 150, mini_batch = 100, lr = 0.5
lr = 1e-3
train_model_sgd(model, train_input, train_target, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = lr)
print_errors(mini_batch)

train error = 0.00%
test error = 1.88%


In [13]:
# nb_epochs = 150, mini_batch = 100, lr = 0.5
lr = 1e-6
train_model_sgd(model, train_input, train_target, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = lr)
print_errors(mini_batch)

train error = 0.00%
test error = 1.88%


In [14]:
# nb_epochs = 150, mini_batch = 100, lr = 0.5
lr = 1
train_model_sgd(model, train_input, train_target, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = lr)
print_errors(mini_batch)

train error = 22.49%
test error = 23.18%


In [15]:
# nb_epochs = 150, mini_batch = 100, lr = 0.5
lr = 0.5
train_model_sgd(model, train_input, train_target, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = lr)
print_errors(mini_batch)

train error = 9.62%
test error = 11.31%


In [16]:
train_dataset, test_dataset = get_dataset()
#print(train_dataset.train_data)
print(train_dataset.train_labels)

tensor([ 5,  0,  4,  ...,  5,  6,  8])


In [17]:
print(test_dataset.test_labels)

tensor([ 7,  2,  1,  ...,  4,  5,  6])


Normalizing data

In [18]:
train_dataset, test_dataset = get_dataset()
train_dataset.train_data = train_dataset.train_data.view(train_dataset.train_data.size(0),-1).float()
test_dataset.test_data = test_dataset.test_data.view(test_dataset.test_data.size(0),-1).float()

mean, std = train_dataset.train_data.mean(), train_dataset.train_data.std()
train_dataset.train_data.sub_(mean).div_(std)
test_dataset.test_data.sub_(mean).div_(std)

tensor([[-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
        [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
        [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
        ...,
        [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
        [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241],
        [-0.4241, -0.4241, -0.4241,  ..., -0.4241, -0.4241, -0.4241]])

In [29]:
train_dataset.train_label

AttributeError: 'MNIST' object has no attribute 'train_label'

In [23]:
model = create_mnist_model()
nb_epochs = 150
lr = 1e-1
mini_batch = 100

train_loss_kfold, val_loss_kfold, train_acc_kfold, val_acc_kfold = train_model_kfold_sgd(model, train_dataset, kfold=5, shuffle=True, nb_epochs = nb_epochs, mini_batch_size = mini_batch, lr = 1e-1)


AttributeError: 'MNIST' object has no attribute 'cuda'

Plotting functions

In [None]:
plot_acc_loss(train_loss_kfold, val_loss_kfold, train_acc_kfold, val_acc_kfold)