In [1]:
import numpy as np
import torch

import matplotlib.pyplot as plt

In [2]:
import torchvision
import torchvision.transforms as transforms

In [3]:
import torch.nn as nn
import torch.optim as optim

from torchvision import models

import os

import copy

In [4]:
os.environ['TORCH_HOME'] = './torch_home'

In [5]:
# device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


#  Model (Inception - Pretrained weights transfered)

In [6]:
inception = models.inception_v3(weights='DEFAULT')

In [7]:
print(inception)

Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [8]:
# freeze weights
for param in inception.parameters():
    param.requires_grad = False

#### want to train parameters in last layer of auxillary ouputs also. as they help in gradient flow.


In [9]:
# auxillary layer
num_classes = 10
aux_in_features = inception.AuxLogits.fc.in_features
inception.AuxLogits.fc = nn.Linear(aux_in_features, num_classes)

In [10]:
# final layer

in_features = inception.fc.in_features
inception.fc = nn.Linear(in_features, num_classes)

In [11]:
# check 

for param in inception.parameters():
    if param.requires_grad:
        print(param.shape)

torch.Size([10, 768])
torch.Size([10])
torch.Size([10, 2048])
torch.Size([10])


## Dataset

In [12]:
# input size is 299x299
# different transform

transform_train = transforms.Compose([
    transforms.RandomResizedCrop(299), 
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

transform_test = transforms.Compose([
    transforms.RandomResizedCrop(299), 
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

In [13]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, 
                                        download=False, 
                                        transform=transform_train)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, 
                                        download=False, 
                                        transform=transform_test)

# this is not downloading the data again.
# transforms - modifications while not affecting the underlying data that we are dealing with.
# power of datasets and transforms

## Utils

In [15]:
# Accuracy evaluation for inception

def evaluation_inception(dataloader, model):
    model.eval() 
    # model be already in device
    total, correct = 0, 0
    for data in dataloader:
        inputs, labels = data
        # move data to device (in batches)
        inputs, labels = inputs.to(device), labels.to(device)
        with torch.no_grad():
            # outputs, aux_outputs = model(inputs)
            # ONLY MAINLINE OUTPUT IS RETURNED WHEN IN EVAL MODE.
            outputs = model(inputs)
        _, pred = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (pred == labels).sum().item()
    return 100 * correct / total

# device is global

**only mainline ouput for evalution**  
don't want aux o/p for testing.

inception class - forward fn - returns output along the standard path and also the auxiallary path(as a tuple) (as we compute loss at both places and do back-propagation in both.) - **when model in train mode**  

when in **eval mode** - it returns only the mainline ouput  


OR:  
assign the returned tuple in two seperate variables and use the mainline o/p for evaluation.

## Train

In [None]:
# build dataloader
batch_size=16
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

# build model
inception = inception.to(device)

# loss function
loss_fn = nn.CrossEntropyLoss()

# build optimizer
opt = optim.SGD(inception.parameters(), lr=0.01)

#########################################################################################

inception.train()

loss_epoch_arr = []
max_epochs = 1

min_loss = 1000

n_iters = np.ceil(50000/batch_size)

for epoch in range(max_epochs):

    for i, data in enumerate(trainloader, 0):
        
        # data
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        opt.zero_grad() # reset grad
        
        # forward pass
        outputs, aux_outputs = inception(inputs)
        
        ## loss - weighted combination of all the losses
        loss = loss_fn(outputs, labels) + 0.3 * loss_fn(aux_outputs, labels) # same labels(true o/p)
        # even though this is something complex - taking partial losses, etc.. 
        #but by expressing them as this combination, we can easily use the backward() facility of torch.
        # depending on which parameters contributing in the different terms
        # they will get back propagated accordingly.
        
        # compute gradients
        loss.backward()
        
        # update parameters
        opt.step()
        
        # checkpoint
        if min_loss > loss.item():
            min_loss = loss.item()
            best_model = copy.deepcopy(inception.state_dict())
            print('Min loss %0.2f' % min_loss)
        
        if i % 100 == 0:
            print('Iteration: %d/%d, Loss: %0.2f' % (i, n_iters, loss.item()))
            
        del inputs, labels, outputs, aux_outputs
        torch.cuda.empty_cache()
        
    loss_epoch_arr.append(loss.item())
        
    print('Epoch: %d/%d, Test acc: %0.2f, Train acc: %0.2f' % (
        epoch, max_epochs, 
        evaluation_inception(testloader, inception), 
        evaluation_inception(trainloader, inception)))
    
    
plt.plot(loss_epoch_arr)
plt.show()

In [None]:
# load checkpoint
inception.load_state_dict(best_model)

# evaluate
# print(evaluation_inception(trainloader, inception), evaluation_inception(testloader, inception))
print(evaluation_inception(testloader, inception))

### differences in implementation of inception model - is due to the aux loss.

modifying pretrained model - last layer of main one and also aux one.

calling model(X) - returns tuple of mainline output, auxillary output  
use mainline ouput only for evaluation  

combination of losses - for BP