In [1]:
from __future__ import print_function, division

import torch
import numpy as np
import torchvision
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
import torchvision.models as models
import time
import os
import copy
from torch import nn
import math
import torch.utils.model_zoo as model_zoo

from util import *

plt.ion()

In [2]:

model_urls = {
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
}

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
#         self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
#         self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
#         self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
#         out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
#         out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
#         out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
#         self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
#                 nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
#         x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x
    
def resnet101(pretrained=False, **kwargs):
    """Constructs a ResNet-101 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
    if pretrained:
        state_dict = model_zoo.load_url(model_urls['resnet101'])
        model_params = set(list(map(lambda x: x[0], model.named_parameters())))
        new_state_dict = {
            k:v for k, v in state_dict.items() if k in model_params
        }
        model.load_state_dict(new_state_dict, strict=False)
    return model

In [3]:
model = resnet101(pretrained=True)

In [17]:
image_mean = [0.485, 0.456, 0.406]
image_std = [0.229, 0.224, 0.225]
normalize = transforms.Normalize(mean=image_mean,
                                     std=image_std)
data_transforms = {
    TRAIN : transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(image_size),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ToTensor(),
        normalize
    ]),
    VAL : transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(image_size),
        transforms.ToTensor(),
        normalize
    ]),
    TEST : transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    normalize
    ]),
}


In [18]:
train_dataset = ImageFolder(root=train_dir,
                           transform = data_transforms[TRAIN])
train_loader = DataLoader(train_dataset, 
                         batch_size=batch_size,
                         shuffle=True,
                         num_workers = num_workers)

val_dataset = ImageFolder(root=val_dir,
                           transform = data_transforms[VAL])
val_loader = DataLoader(val_dataset, 
                         batch_size=batch_size,
                         shuffle=True,
                         num_workers = num_workers)

test_dataset = ImageFolder(root=test_dir,
                           transform = data_transforms[TEST])
test_loader = DataLoader(test_dataset, 
                         batch_size=batch_size,
                         shuffle=True,
                         num_workers = num_workers)

class_names = train_dataset.classes

In [6]:
def imshow(inp, title=None):
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array(image_mean)
    std = np.array(image_std)
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated
    plt.imsave("final_data_augmentation.png", inp)


# Get a batch of training data
batch_to_view = next(iter(train_loader))
inputs, classes = batch_to_view[0][:4], batch_to_view[1][:4]

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

# imshow(out, title=[class_names[x] for x in classes])

In [7]:
def get_model(model_name=RESNET50, use_cuda=True):
    model = resnet101(pretrained=True)
    model.fc = torch.nn.Linear(in_features=2048, out_features=2)
    if(use_cuda):
        return model.cuda()
    else:
        return model.cuda()

def get_loss_fn(use_cuda=True):
    if(use_cuda):
        return torch.nn.CrossEntropyLoss().cuda()
    else:
        return torch.nn.CrossEntropyLoss()

def get_optimizer(model, learning_rate):
    return torch.optim.Adam(
        model.fc.parameters(), learning_rate
    )

def get_lr_scheduler(optimizer, step_size = 10, gamma=1):
    return torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=step_size, gamma=gamma)

def persist_model(model_state, save_path):
    torch.save(model_state, save_path)


In [8]:
class ModelMetric():
    '''class to track the avergae accuracy for the model with different datasets
    '''
    
    def __init__(self):
        self.correct_count = 0.0
        self.total_count = 0.0
        self.total_loss = 0.0
    
    def update(self, correct_count, count, loss):
        self.correct_count+=correct_count
        self.total_count+=count
        if(use_cuda):
            loss = loss.cpu().data.numpy()[0]
        else:
            loss = loss.data.numpy()[0]
        self.total_loss+=loss*count
    
    @property
    def accuracy(self):
        return self.correct_count/self.total_count
    
    @property
    def average_loss(self):
        return self.total_loss/self.total_count

In [9]:
def compute_accuracy(y_pred, labels):
    """Computes the number of correct matches in y_pred"""
    _, predictions = y_pred.topk(k = 1, dim=1)
    batch_size = labels.size(0)
    correct_count = torch.sum(predictions.eq(labels.view(-1, 1).expand_as(predictions)))
    if(use_cuda):
        correct_count = correct_count.cpu().data.numpy()[0]
    else:
        correct_count = correct_count.data.numpy()[0]
    total_count = labels.shape[0]
    return (correct_count, total_count)

In [10]:
def val(model, val_loader, loss_fn, metric):
    
    model.eval()
    start_time = time.time()
    
    for (images, labels) in val_loader:
        images = torch.autograd.Variable(images, volatile=True)
        labels = torch.autograd.Variable(labels, volatile=True)
        if(use_cuda):
            images = images.cuda()
            labels = labels.cuda()
        
        predictions = model(images)
        predictions_loss = loss_fn(predictions, labels)
        num_correct_predictions, num_total_predictions = compute_accuracy(predictions, labels)
        metric.update(num_correct_predictions, num_total_predictions, predictions_loss)
        
    print("Validation Accuracy = {}, Validation Loss = {}, Time Taken = {} seconds".format(
        metric.accuracy,
        metric.average_loss,
        time.time() - start_time
    ))
    return metric

In [11]:
def train(model, train_loader, loss_fn, optimizer, metric = None):
    running_loss = 0
    
    model.train()
    
    start_time = time.time()
    
    for (images, labels) in train_loader:
        images = torch.autograd.Variable(images)
        labels = torch.autograd.Variable(labels)
        if(use_cuda):
            images = images.cuda()
            labels = labels.cuda()
            
        predictions = model(images)
        predictions_loss = loss_fn(predictions, labels)
        num_correct_predictions, num_total_predictions = compute_accuracy(predictions, labels)
        metric.update(num_correct_predictions, num_total_predictions, predictions_loss)
        optimizer.zero_grad()
        predictions_loss.backward()
        optimizer.step()
        running_loss += predictions_loss.data[0]
        
    print("Training Accuracy = {}, Training Loss = {}, Time Taken = {} seconds".format(
        train_metric.accuracy,
        train_metric.average_loss,
        time.time() - start_time
    ))
    return metric

In [12]:
model = get_model(model_name=model_name, 
                 use_cuda=use_cuda)
loss_fn = get_loss_fn(use_cuda=use_cuda)
optimizer = get_optimizer(model=model, 
                          learning_rate=learning_rate)
lr_scheduler = get_lr_scheduler(optimizer=optimizer)

In [13]:
best_val_loss = 1e6
train_metric = ModelMetric()
val_metric = ModelMetric()
running_loss = 0
early_stopping_counter = 0
for epoch in range(num_epochs):
    train_metric = train(model = model,
         train_loader = train_loader,
         loss_fn = loss_fn, 
         optimizer = optimizer,
         metric = train_metric)
    lr_scheduler.step()
    
    val_metric = val(model = model,
                          val_loader = val_loader,
                          loss_fn = loss_fn,
                        metric = val_metric)
    val_accuracy = val_metric.accuracy
    val_loss = val_metric.average_loss
    if(val_loss < best_val_loss):
        best_val_loss = val_loss
        model_state = {
            EPOCH: epoch+1,
            STATE_DICT: model.state_dict(),
            VAL_ACCURACY: val_accuracy,
            VAL_LOSS: val_loss
        }
        save_path = "/u/sodhanis/projects/DogsVsCats/model/checkpoint_epoch_"\
        +str(epoch)+"_val_accuracy_"+str(int(val_accuracy*1e4))+"_val_loss_"+str(int(val_loss*1e4))+".path.tar"
        persist_model(model_state=model_state,
                     save_path = save_path)
        early_stopping_counter = 0
    else:
        early_stopping_counter+=1
        if(early_stopping_counter == early_stopping_criteria):
            break
            print("Early stopping")

Training Accuracy = 0.6102926829268293, Training Loss = 0.6533627269209885, Time Taken = 186.75403571128845 seconds
Validation Accuracy = 0.614, Validation Loss = 0.6414008043289184, Time Taken = 17.024537801742554 seconds
Training Accuracy = 0.6268292682926829, Training Loss = 0.642121183075556, Time Taken = 171.85565614700317 seconds
Validation Accuracy = 0.6188, Validation Loss = 0.6419092105388642, Time Taken = 15.967804193496704 seconds
Training Accuracy = 0.6352520325203252, Training Loss = 0.6348179772229698, Time Taken = 173.24706315994263 seconds
Validation Accuracy = 0.6301333333333333, Validation Loss = 0.6332199411710103, Time Taken = 24.93998885154724 seconds
Training Accuracy = 0.6424390243902439, Training Loss = 0.6291671537655156, Time Taken = 179.59887599945068 seconds
Validation Accuracy = 0.6371, Validation Loss = 0.6283570964574814, Time Taken = 15.367051124572754 seconds
Training Accuracy = 0.6478243902439025, Training Loss = 0.6246851948435713, Time Taken = 172.68

In [19]:
def test(model, test_loader, loss_fn, metric):
    
    model.eval()
    start_time = time.time()
    
    for (images, labels) in test_loader:
        images = torch.autograd.Variable(images, volatile=True)
        labels = torch.autograd.Variable(labels, volatile=True)
        if(use_cuda):
            images = images.cuda()
            labels = labels.cuda()
        
        predictions = model(images)
        predictions_loss = loss_fn(predictions, labels)
        num_correct_predictions, num_total_predictions = compute_accuracy(predictions, labels)
        metric.update(num_correct_predictions, num_total_predictions, predictions_loss)
        
    print("Test Accuracy = {}, Test Loss = {}, Time Taken = {} seconds".format(
        metric.accuracy,
        metric.average_loss,
        time.time() - start_time
    ))
    return metric
test_metric = ModelMetric()
test_metric = test(model, test_loader, loss_fn, test_metric)

Test Accuracy = 0.7115, Test Loss = 0.5726364305019379, Time Taken = 20.62707495689392 seconds


In [None]:
def visualize_model(model, num_images=2):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()
    
    for (i, (images, labels)) in enumerate(val_loader):
        images = torch.autograd.Variable(images)
        labels = torch.autograd.Variable(labels)
        if(use_cuda):
            images = images.cuda()
            labels = labels.cuda()

        outputs = model(images)
        _, preds = torch.max(outputs.data, 1)

        for j in range(images.size()[0]):
            images_so_far += 1
            ax = plt.subplot(num_images//2, 2, images_so_far)
            ax.axis('off')
            ax.set_title('predicted: {}'.format(class_names[preds[j]]))
            imshow(images.cpu().data[j])

            if images_so_far == num_images:
                model.train(mode=was_training)
                return
    model.train(mode=was_training)

In [None]:
model_parameters = model.parameters()
params = sum([np.prod(p.size()) for p in model_parameters])
print("Total number of parameters in {} model = {}".format(model_name, params))

model_parameters = model.fc.parameters()
params = sum([np.prod(p.size()) for p in model_parameters])
print("Total number of trainable parameters in the finetuned {} model = {}".format(model_name, params))

In [None]:
# visualize_model(model, num_images=4)