In [1]:
import copy
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

import torchvision
from torchvision import datasets,transforms 

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Load data
* Download train and test set
* Apply transforms
* Define dataloaders

In [3]:
mean = [0.485, 0.456, 0.406]
std  = [0.229, 0.224, 0.225]

In [4]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean,
                         std=std)
])

In [5]:
test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean,
                         std=std)
])

In [6]:
train_dir = 'cats_or_dogs/train'
test_dir = 'cats_or_dogs/test'

In [7]:
train_data = datasets.ImageFolder(root=train_dir,
                                  transform=train_transform)

In [8]:
train_data

Dataset ImageFolder
    Number of datapoints: 2002
    Root location: cats_or_dogs/train
    StandardTransform
Transform: Compose(
               RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [9]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=4,
                                           shuffle=True, num_workers=4)

In [10]:
test_data = datasets.ImageFolder(root=test_dir,
                                transform=test_transform)

In [11]:
test_data

Dataset ImageFolder
    Number of datapoints: 1012
    Root location: cats_or_dogs/test
    StandardTransform
Transform: Compose(
               Resize(size=256, interpolation=PIL.Image.BILINEAR)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [12]:
test_loader = torch.utils.data.DataLoader(test_data, batch_size=8,
                                          shuffle=True, num_workers=4)

In [13]:
dataloaders = {
    'train': train_loader,
    'test': test_loader
}

### Transfer learning
* Load the pretrained model, AlexNet
* Define parameters
* The criterion to minimize in the loss function. Given this is a classification model, we will look to minimize the cross-entropy loss
* A simple SGD optimizer with momentum which accelerate gradients vectors in the right directions and hence leads to faster converging
* Scheduler to decay Learning Rate by a factor of 0.1 every 7 epochs


In [14]:
from torchvision import models

model = models.alexnet(pretrained=True)

In [15]:
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [16]:
num_ftrs = model.classifier[6].in_features
num_ftrs

4096

### Finetuning the network

In [17]:
model.classifier[6] = nn.Linear(num_ftrs, 2)

In [18]:
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

### Training the model from scratch

In [19]:
criterion   = nn.CrossEntropyLoss()

optimizer   = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [20]:
def train_model(model, criterion, optimizer, num_epochs=25):

    model = model.to(device)
    total_step = len(dataloaders['train'])


    for epoch in range(num_epochs):
        print('epoch=',epoch)        

        for i, (images,labels)  in enumerate((dataloaders['train'])):

                images = images.to(device)
                labels = labels.to(device)
    
                outputs = model(images)
                outputs = outputs.to(device)
                loss = criterion(outputs,labels)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                if (i+1) % 100 == 0:
                    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                           .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

        print('Epoch - %d, loss - %0.5f '\
            %(epoch, loss.item()))

    return model

In [21]:
model = train_model(model, criterion, optimizer, num_epochs=10)

epoch= 0
Epoch [1/10], Step [100/501], Loss: 0.6808
Epoch [1/10], Step [200/501], Loss: 0.4314
Epoch [1/10], Step [300/501], Loss: 0.7871
Epoch [1/10], Step [400/501], Loss: 0.4017
Epoch [1/10], Step [500/501], Loss: 0.4428
Epoch - 0, loss - 0.65200 
epoch= 1
Epoch [2/10], Step [100/501], Loss: 0.5634
Epoch [2/10], Step [200/501], Loss: 0.2472
Epoch [2/10], Step [300/501], Loss: 0.6122
Epoch [2/10], Step [400/501], Loss: 0.5157
Epoch [2/10], Step [500/501], Loss: 0.8063
Epoch - 1, loss - 0.65611 
epoch= 2
Epoch [3/10], Step [100/501], Loss: 0.3014
Epoch [3/10], Step [200/501], Loss: 1.5598
Epoch [3/10], Step [300/501], Loss: 0.2335
Epoch [3/10], Step [400/501], Loss: 0.6177
Epoch [3/10], Step [500/501], Loss: 0.5949
Epoch - 2, loss - 0.72163 
epoch= 3
Epoch [4/10], Step [100/501], Loss: 0.1945
Epoch [4/10], Step [200/501], Loss: 0.4695
Epoch [4/10], Step [300/501], Loss: 0.0730
Epoch [4/10], Step [400/501], Loss: 0.8941
Epoch [4/10], Step [500/501], Loss: 0.3730
Epoch - 3, loss - 0.599

### Model Evaluation

In [22]:
model.eval() 

with torch.no_grad():
    
    correct = 0
    total = 0
    
    for images, labels in dataloaders['test']:
        
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the model on the test images: {}%'\
          .format(100 * correct / total))


Accuracy of the model on the test images: 92.29249011857708%
