# Hands-On Session No. 3
## (Training CNNs on MNIST/CIFAR10)


In [0]:
# connect to google drive
from google.colab import drive
drive.mount('/content/gdrive')
# this should print all folders in your google drive main folder
!ls "/content/gdrive/My Drive/"

# change "your_data_folder" to your data_dir name
data_dir = "/content/gdrive/My Drive/colab_data"

In [0]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision.utils import save_image, make_grid
import matplotlib.pyplot as plt
import numpy as np
from IPython import display
import os

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'

def mnist_loaders(batch_size, data_dir):
  transform = transforms.ToTensor()
  mnist_train = datasets.MNIST(data_dir, train=True, download=True, transform=transform)
  mnist_test = datasets.MNIST(data_dir, train=False, download=True, transform=transform)
  train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, pin_memory=True)
  test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, pin_memory=True)
  return train_loader, test_loader


def cifar_loaders(batch_size, data_dir):
  normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                   std=[0.225, 0.225, 0.225])
  train = datasets.CIFAR10(data_dir, train=True, download=True,
                           transform=transforms.Compose([
                               transforms.RandomHorizontalFlip(),
                               transforms.RandomCrop(32, 4),
                               transforms.ToTensor(),
                               normalize,
                           ]))
  test = datasets.CIFAR10(data_dir, train=False, transform=transforms.Compose([transforms.ToTensor(), normalize]))
  train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, pin_memory=True)
  test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, pin_memory=True)
  return train_loader, test_loader

class AverageValueMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0.0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


# Instructions for excercise

### Q1

1a. Implement a linear model.

1b. Think, what do you think will happen if weights (and bias..) are initialized to zero? Now run it and check.

1c. Did the model managed to learn? Why?

1d. Train your model end to end on MNIST
(you might want to check out momentum method)

**GPU**:

Try to move to computing on GPU. For that you'll need to "move" your model and all the data to be in the GPU. You can do it uding ".cuda()" at the end of your model and all input tensors (e.g., samples and targets and any other tensor that you created yourself)

### Q2

2a. Implement a two-layer network.

2b. Think, what do you think will happen if weights (and bias..) are initialized to zero (using the two layer net)? Now run it and check. Why this has happened?

2c. Did the model managed to learn? Why?

2d. Train your model end to end on MNIST.

### Q3

3a. Add a convolution layer to the beginning of the network. (Check Conv2d)
(don't forget the non-linearity..)

3b. Try to achieve ~1% error on MNIST

**Trying other hyperparameters**

While you're trying, try different optimizers, different initializations, l2 regularization (try it all after you feel that you can't improve the current model)

### Q4

Try training a model on CIFAR10. (Try to achieve above 65% accuracy, you can try go for 80% but you'll need to go deeper)




In [0]:
batch_size = 0 # <<<<------------ Fill this
train_loader, test_loader = mnist_loaders(batch_size, data_dir)

In [0]:
# Implement the "Flatten" module
class Flatten(nn.Module):
    def forward(self, x):
        out = 0    #  <---------------------- Fill this correctly
        return out


model = nn.Sequential(
    Flatten(),
    #nn.Linear(?????, ???????),   # <----------------- Put correct dimensions for linear layer
)


# next lines check if your model runs correctly
X, y = next(iter(test_loader))
pred = model(X)
err = pred.view(pred.shape[0], -1).max(dim=1)[1].ne(y).float().mean()

In [0]:
def weights_init(m):
    if isinstance(m, (nn.Conv2d, nn.Linear)):
        m.weight.data.zero_()    ## <--------- Note the weights are inititalized to zero
        if m.bias is not None:
            m.bias.data.zero_()

            
lr = 0 # <----------------- Choose learning rate
n_epochs = 0 # <------------- choose number of epochs

model.train()
# weights initizlization
model.apply(weights_init)
# loss function
lossf = torch.nn.CrossEntropyLoss()
# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=lr)



######### this is for statistics (no need to touch)  #########
trn_losss = []
trn_errs = []
val_errs = []
plt.ion()
fig, axes = plt.subplots(1, 2, figsize=(10,5))
##############################################################


# computing the validation error
def compute_error(loader, model):
  run_err = AverageValueMeter()
  for idx, (samples, targets) in enumerate(train_loader):
    outputs = model(samples)
    # compute stats
    err = outputs.max(1)[1].ne(targets).float().mean().item()
    run_err.update(err)
    
    #break                ##   <-------------- you might consider uncommenting this for debugging
  return run_err.avg


# iterate on epochs
for t in range(n_epochs):
  trn_err = AverageValueMeter()
  trn_loss = AverageValueMeter()

  # iterate on batches
  for idx, (samples, targets) in enumerate(train_loader):
    
    ##################  Change these lines  ####################################
    # run the model on the samples and put into the loss function
    
    
    
    ##################  Finish changing here  ##################################

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

#################### compute stats (no need to change below here) #############
    err = outputs.max(1)[1].ne(targets).float().mean().item()
    trn_err.update(err)
    trn_loss.update(loss.item())    
    if idx % 100 == 0:
        print('epoch:', t, 'batch:', idx, '/', len(train_loader), 'train-error:',  trn_err.avg)
        break
   
  # computing stats
  val_err = compute_error(test_loader, model)
  val_errs.append(val_err)    
  trn_losss.append(trn_loss.avg)
  trn_errs.append(trn_err.avg)
  display.clear_output(wait=True)
  axes[0].plot(range(len(trn_losss)), trn_losss, 'k')[0]
  axes[1].plot(range(len(trn_errs)), trn_errs, 'b')[0]
  axes[1].plot(range(len(val_errs)), val_errs, 'r')[0]
  display.display(plt.gcf())
  