In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, transforms, models
from torch.autograd import Variable
import numpy as np
import os

In [2]:
# Data augmentation and normalization for training
# Just normalization for validation
#class torchvision.transforms.RandomResizedCrop(size, scale=(0.08, 1.0), ratio=(0.75, 1.3333333333333333), interpolation=2)
#transforms.RandomResizedCrop(224) --> A crop of random size (default: of 0.08 to 1.0) of the original size and a 
#random aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. 
#This crop is finally resized to given size (224 in this case). 
#transforms.CenterCrop(224)--> Crops the image at the center. 224 is the Desired output size of the crop.
#class torchvision.transforms.Normalize(mean, std)
#Normalize a tensor image with mean and standard deviation. Given mean: (M1,...,Mn) and std: (S1,..,Sn) for n channels, 
#this transform will normalize each channel of the input torch.Tensor i.e. 
#input[channel] = (input[channel] - mean[channel]) / std[channel]
#Parameters:     mean (sequence) – Sequence of means for each channel.
#                std (sequence) – Sequence of standard deviations for each channel.

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224), #Crop the given PIL Image to random size and aspect ratio.
        transforms.RandomHorizontalFlip(), #Horizontally flip the given PIL Image randomly with a given probability.
        transforms.ToTensor(), #Convert a PIL Image or numpy.ndarray to tensor.
        transforms.Normalize([0.485, 0.456, 0.406], #Normalize a tensor image with mean and standard deviation.
                                [0.229, 0.224, 0.225]) #Given mean: (M1,...,Mn) and std: (S1,..,Sn) for n channels,
                                                        #this transform will normalize each channel of the input torch.Tensor
                                                        #i.e. input[channel] = (input[channel] - mean[channel]) / std[channel]
    ]),
    'val': transforms.Compose([
        transforms.Resize(256), #Resize the input PIL Image to the given size.
        transforms.CenterCrop(224), #Crops the image at the center. 224 is the Desired output size of the crop.
        transforms.ToTensor(), #Convert a PIL Image or numpy.ndarray to tensor.
        transforms.Normalize([0.485, 0.456, 0.406], #Normalize a tensor image with mean and standard deviation.
                                [0.229, 0.224, 0.225]) #Given mean: (M1,...,Mn) and std: (S1,..,Sn) for n channels,
                                                        #this transform will normalize each channel of the input torch.Tensor
                                                        #i.e. input[channel] = (input[channel] - mean[channel]) / std[channel]
    ]),
}


    

In [3]:
data_dir = 'data/hymenoptera_data'
# create a dictionary that contains the datasets for training and validation
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
# create a dictionary that contains the dataloaders for training and validation
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=4) for x in ['train', 'val']}
# create a dictionary that contains the size of the datasets for training and validation
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
# create a list that contains the names of the classes
class_names = image_datasets['train'].classes
# print the names and sizes of the datasets
print(f"Class Names: {class_names}")
print(f"Dataset Sizes: {dataset_sizes}")
print(f"Batches in training set: {len(dataloaders['train'])}")
print(f"Batches in validation set: {len(dataloaders['val'])}")

Class Names: ['ants', 'bees']
Dataset Sizes: {'train': 244, 'val': 153}
Batches in training set: 61
Batches in validation set: 39


In [4]:
# load the resnet18 model
model_conv = models.resnet18(pretrained=True)
# print the model
print(model_conv)


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  



In [5]:
# freeze all the layers in the network
for param in model_conv.parameters():
    param.requires_grad = False

In [6]:
# get the number of inputs of the last layer (or number of neurons in the layer preceeding the last layer)
num_ftrs = model_conv.fc.in_features
# replace the last layer with a new layer that has 2 neurons (one for each class)
model_conv.fc = nn.Linear(num_ftrs, 2)

In [7]:
if torch.cuda.is_available():
    model_conv = model_conv.cuda()

In [10]:
# Understand what is happening in the code below
iteration = 0
correct = 0
for inputs, labels in dataloaders['train']:
    if iteration == 1:
        break
    inputs = Variable(inputs.cuda())
    labels = Variable(labels.cuda())
    # print(f"Inputs: {inputs}")
    print(f"Inputs Shape: {inputs.shape}")
    print(f"Labels: {labels}")
    print(f"Labels Shape: {labels.shape}")
    output = model_conv(inputs)
    print(f"Output Tensor: {output}")
    print(f"Output Shape: {output.shape}")
    _, predicted = torch.max(output, 1)
    print(f"Predicted: {predicted}")
    print(f"Predicted Shape: {predicted.shape}")
    correct += (predicted == labels).sum()
    print(f"Correct Predictions: {correct}")

    iteration += 1

Inputs Shape: torch.Size([4, 3, 224, 224])
Labels: tensor([0, 1, 0, 0], device='cuda:0')
Labels Shape: torch.Size([4])
Output Tensor: tensor([[ 0.3508, -0.0597],
        [ 0.1103,  0.3776],
        [-0.4980,  0.7545],
        [-0.7765,  0.2110]], device='cuda:0', grad_fn=<AddmmBackward0>)
Output Shape: torch.Size([4, 2])
Predicted: tensor([0, 1, 1, 1], device='cuda:0')
Predicted Shape: torch.Size([4])
Correct Predictions: 2


In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)
# try experimenting with different values of lr and momentum
# decay lr by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [12]:
# Understand what is happening in the code above of scheduler.step()
def lr_scheduler(optimizer, epoch, init_lr=0.001, lr_decay_epoch=7):
    """Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs."""
    lr = init_lr * (0.1**(epoch // lr_decay_epoch))
    print(f"Learning Rate: {lr}")
    if epoch % lr_decay_epoch == 0:
        print(f"Learning Rate Changed to: {lr}")
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

    return optimizer