<center><h2> **Transfer Learning** </h2></center>
<h3> Note </h3>
<ul> 
    <li>Resnet18 pretrained with Imagenet </li>
    <li> Images resized to 224, with resnet's normalization. Resize doesn't work with multiprocessing, so data loading couldn't be parallelized. </li>
  </ul>
<h3> Configurations</h3>
1. Only last custom linear layer trained : 77.75 %
2. All layers trained with Adam : ~ 95% 

In [2]:
%matplotlib inline
from copy import deepcopy
from collections import OrderedDict
import gc
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD,Adam,lr_scheduler
from torch.utils.data import random_split
import torchvision
from torchvision import transforms,models

In [3]:
!tar -zxvf ../input/cifar10-python/cifar-10-python.tar.gz

cifar-10-batches-py/
cifar-10-batches-py/data_batch_4
cifar-10-batches-py/readme.html
cifar-10-batches-py/test_batch
cifar-10-batches-py/data_batch_3
cifar-10-batches-py/batches.meta
cifar-10-batches-py/data_batch_2
cifar-10-batches-py/data_batch_5
cifar-10-batches-py/data_batch_1


In [4]:
train_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomHorizontalFlip(p=.40),
    transforms.RandomRotation(30),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

test_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

traindata = torchvision.datasets.CIFAR10(root='.', train=True,download=False, transform=train_transform)
trainset,valset = random_split(traindata,[42000,8000])
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=64,shuffle=False)

testset = torchvision.datasets.CIFAR10(root='.', train=False,download=False, transform=test_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,shuffle=False)

classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [5]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        base = models.resnet18(pretrained=True)
        self.base = nn.Sequential(*list(base.children())[:-1])
        in_features = base.fc.in_features
        self.drop = nn.Dropout()
        self.final = nn.Linear(in_features,10)
    
    def forward(self,x):
        x = self.base(x)
        x = self.drop(x.view(-1,self.final.in_features))
        return self.final(x)
    
model = Model().cuda()
[x for x,y in model.named_children()]

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.torch/models/resnet18-5c106cde.pth
100%|██████████| 46827520/46827520 [00:03<00:00, 12455840.01it/s]


['base', 'drop', 'final']

In [6]:
criterion = nn.CrossEntropyLoss()
param_groups = [
    {'params':model.base.parameters(),'lr':.0001},
    {'params':model.final.parameters(),'lr':.001}
]
optimizer = Adam(param_groups)
lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)
states = {}

<h2> Training the model</h2>

In [7]:
%%time
best_val_acc = -1000
best_val_model = None
for epoch in range(10):  
    model.train(True)
    running_loss = 0.0
    running_acc = 0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.cuda(),labels.cuda()

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item() * inputs.size(0)
        out = torch.argmax(outputs.detach(),dim=1)
        assert out.shape==labels.shape
        running_acc += (labels==out).sum().item()
    print(f"Train loss {epoch+1}: {running_loss/len(trainset)},Train Acc:{running_acc*100/len(trainset)}%")
    
    correct = 0
    model.train(False)
    with torch.no_grad():
        for inputs,labels in valloader:
            out = model(inputs.cuda()).cpu()
            out = torch.argmax(out,dim=1)
            acc = (out==labels).sum().item()
            correct += acc
    print(f"Val accuracy:{correct*100/len(valset)}%")
    if correct>best_val_acc:
        best_val_acc = correct
        best_val_model = deepcopy(model.state_dict())
    lr_scheduler.step()
    
print('Finished Training')  

Train loss 1: 0.4894555303482782,Train Acc:83.19285714285714%
Val accuracy:89.95%
Train loss 2: 0.2553661489373162,Train Acc:91.29285714285714%
Val accuracy:91.85%
Train loss 3: 0.155427003854797,Train Acc:94.71190476190476%
Val accuracy:94.475%
Train loss 4: 0.13297460330384117,Train Acc:95.57142857142857%
Val accuracy:94.3375%
Train loss 5: 0.12965479410829997,Train Acc:95.6595238095238%
Val accuracy:94.5875%
Train loss 6: 0.13036923788275037,Train Acc:95.64285714285714%
Val accuracy:94.4875%
Train loss 7: 0.13007481270177024,Train Acc:95.5047619047619%
Val accuracy:94.3125%
Train loss 8: 0.1283206191573824,Train Acc:95.61904761904762%
Val accuracy:94.3375%
Train loss 9: 0.13014961347125825,Train Acc:95.44285714285714%
Val accuracy:94.65%
Train loss 10: 0.12543876162880943,Train Acc:95.79285714285714%
Val accuracy:94.6125%
Finished Training
CPU times: user 31min 4s, sys: 2min 50s, total: 33min 54s
Wall time: 33min 59s


<h2> Testing the model</h2>

In [8]:
%%time
correct = 0
model.load_state_dict(best_val_model)
model.train(False)
with torch.no_grad():
    for inputs,labels in testloader:
        out = model(inputs.cuda()).cpu()
        out = torch.argmax(out,dim=1)
        acc = (out==labels).sum().item()
        
        correct += acc
print(f"Test accuracy: {correct*100/len(testset)}%")

Test accuracy: 94.93%
CPU times: user 25.4 s, sys: 3.35 s, total: 28.8 s
Wall time: 28.8 s
