In [1]:
%matplotlib inline
from copy import deepcopy
from collections import OrderedDict
import gc
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD,Adam,lr_scheduler
from torch.utils.data import random_split
import torchvision
from torchvision import transforms,models
import tensorflow as tf


In [2]:
cuda_device = torch.device("cuda:0")
cpu_device = torch.device("cpu:0")

In [5]:
train_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomHorizontalFlip(p=.40),
    transforms.RandomRotation(30),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

test_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

traindata = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=train_transform)
trainset,valset = random_split(traindata,[42000,8000])
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=64,shuffle=False)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=test_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,shuffle=False)

classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')
     

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [6]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        base = models.resnet18(pretrained=True)
        self.base = nn.Sequential(*list(base.children())[:-1])
        in_features = base.fc.in_features
        self.drop = nn.Dropout()
        self.final = nn.Linear(in_features,10)
    
    def forward(self,x):
        x = self.base(x)
        x = self.drop(x.view(-1,self.final.in_features))
        return self.final(x)
    
model = Model().cuda()
[x for x,y in model.named_children()]

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

['base', 'drop', 'final']

In [7]:
criterion = nn.CrossEntropyLoss()
param_groups = [
    {'params':model.base.parameters(),'lr':.0001},
    {'params':model.final.parameters(),'lr':.001}
]
optimizer = Adam(param_groups)
lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)
states = {}

In [8]:
%%time
best_val_acc = -1000
best_val_model = None
for epoch in range(10):  
    model.train(True)
    running_loss = 0.0
    running_acc = 0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.cuda(),labels.cuda()

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item() * inputs.size(0)
        out = torch.argmax(outputs.detach(),dim=1)
        assert out.shape==labels.shape
        running_acc += (labels==out).sum().item()
    print(f"Train loss {epoch+1}: {running_loss/len(trainset)},Train Acc:{running_acc*100/len(trainset)}%")
    
    correct = 0
    model.train(False)
    with torch.no_grad():
        for inputs,labels in valloader:
            out = model(inputs.cuda()).cpu()
            out = torch.argmax(out,dim=1)
            acc = (out==labels).sum().item()
            correct += acc
    print(f"Val accuracy:{correct*100/len(valset)}%")
    if correct>best_val_acc:
        best_val_acc = correct
        best_val_model = deepcopy(model.state_dict())
    lr_scheduler.step()
    
print('Finished Training') 

Train loss 1: 0.49266964024021515,Train Acc:83.04761904761905%
Val accuracy:90.2875%
Train loss 2: 0.21775638348715645,Train Acc:92.6%
Val accuracy:93.45%
Train loss 3: 0.18752054964076906,Train Acc:93.70714285714286%
Val accuracy:93.6%
Train loss 4: 0.18136897898288,Train Acc:93.96666666666667%
Val accuracy:93.275%
Train loss 5: 0.18235770025707426,Train Acc:93.83571428571429%
Val accuracy:93.75%
Train loss 6: 0.18668787482238952,Train Acc:93.75952380952381%
Val accuracy:93.5625%
Train loss 7: 0.18674489272776104,Train Acc:93.6547619047619%
Val accuracy:93.4625%
Train loss 8: 0.18551859997567677,Train Acc:93.70476190476191%
Val accuracy:93.475%
Train loss 9: 0.1854121912320455,Train Acc:93.76666666666667%
Val accuracy:93.2375%
Train loss 10: 0.1835045025973093,Train Acc:93.81904761904762%
Val accuracy:93.25%
Finished Training
CPU times: user 36min 11s, sys: 1min 4s, total: 37min 16s
Wall time: 37min 15s


In [9]:
model

Model(
  (base): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    

In [10]:
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
repetitions = len(testset)
timings=np.zeros((repetitions,1))

In [11]:
%%time
correct = 0
model.load_state_dict(best_val_model)
model.train(False)
with torch.no_grad():
    starter.record()
    for inputs,labels in testloader:
        out = model(inputs.cuda()).cpu()
        out = torch.argmax(out,dim=1)
        acc = (out==labels).sum().item()
        correct += acc
    ender.record()
    torch.cuda.synchronize()
    curr_time = starter.elapsed_time(ender)
    timings[epoch] = curr_time
    
mean_syn = np.sum(timings) / repetitions
std_syn = np.std(timings)
print(f'mean prediction latency: {mean_syn}')
print(f"Test accuracy: {correct*100/len(testset)}%")

mean prediction latency: 2.46263203125
Test accuracy: 94.3%
CPU times: user 23.8 s, sys: 973 ms, total: 24.8 s
Wall time: 24.6 s


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
torch.save(model.state_dict(), '/content/drive/My Drive/checkpoint.pth')

# # download checkpoint file
# files.download('checkpoint.pth')

In [None]:
import os
size=os.path.getsize("/content/drive/My Drive/checkpoint.pth")
size

44804621

In [None]:
state_dict = torch.load('/content/drive/My Drive/checkpoint.pth')
print(state_dict.keys())

odict_keys(['base.0.weight', 'base.1.weight', 'base.1.bias', 'base.1.running_mean', 'base.1.running_var', 'base.1.num_batches_tracked', 'base.4.0.conv1.weight', 'base.4.0.bn1.weight', 'base.4.0.bn1.bias', 'base.4.0.bn1.running_mean', 'base.4.0.bn1.running_var', 'base.4.0.bn1.num_batches_tracked', 'base.4.0.conv2.weight', 'base.4.0.bn2.weight', 'base.4.0.bn2.bias', 'base.4.0.bn2.running_mean', 'base.4.0.bn2.running_var', 'base.4.0.bn2.num_batches_tracked', 'base.4.1.conv1.weight', 'base.4.1.bn1.weight', 'base.4.1.bn1.bias', 'base.4.1.bn1.running_mean', 'base.4.1.bn1.running_var', 'base.4.1.bn1.num_batches_tracked', 'base.4.1.conv2.weight', 'base.4.1.bn2.weight', 'base.4.1.bn2.bias', 'base.4.1.bn2.running_mean', 'base.4.1.bn2.running_var', 'base.4.1.bn2.num_batches_tracked', 'base.5.0.conv1.weight', 'base.5.0.bn1.weight', 'base.5.0.bn1.bias', 'base.5.0.bn1.running_mean', 'base.5.0.bn1.running_var', 'base.5.0.bn1.num_batches_tracked', 'base.5.0.conv2.weight', 'base.5.0.bn2.weight', 'base.

In [None]:
model.load_state_dict(state_dict)

<All keys matched successfully>

In [None]:
print(state_dict)

OrderedDict([('base.0.weight', tensor([[[[-1.1061e-02, -7.1060e-03, -2.8317e-03,  ...,  5.5444e-02,
            1.6304e-02, -1.3294e-02],
          [ 1.0479e-02,  8.6353e-03, -1.1104e-01,  ..., -2.7256e-01,
           -1.2988e-01,  3.1216e-03],
          [-7.6097e-03,  5.8043e-02,  2.9430e-01,  ...,  5.1831e-01,
            2.5540e-01,  6.2741e-02],
          ...,
          [-2.7813e-02,  1.5418e-02,  7.1990e-02,  ..., -3.3382e-01,
           -4.2101e-01, -2.5829e-01],
          [ 3.0607e-02,  4.0574e-02,  6.2452e-02,  ...,  4.1323e-01,
            3.9343e-01,  1.6610e-01],
          [-1.3724e-02, -3.9397e-03, -2.4312e-02,  ..., -1.5100e-01,
           -8.1990e-02, -5.3305e-03]],

         [[-1.1528e-02, -2.7204e-02, -3.5279e-02,  ...,  3.1370e-02,
            2.0785e-04, -2.5914e-02],
          [ 4.5473e-02,  3.3022e-02, -1.0543e-01,  ..., -3.1403e-01,
           -1.6111e-01, -1.6181e-03],
          [-1.1579e-03,  9.7577e-02,  4.0088e-01,  ...,  7.0620e-01,
            3.6804e-01,  1.