In [1]:
from google.colab import drive
import os
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
%cd /content/gdrive/MyDrive/Colab Notebooks/DeepLearning2/6.TransferLearning

/content/gdrive/MyDrive/Colab Notebooks/DeepLearning2/6.TransferLearning


In [3]:
!ls ../data/dogvscat/

test  test.zip	train  train.zip


In [4]:
import os
import zipfile
import glob
import time
import pickle
import pprint
import math

%matplotlib inline
import matplotlib.pyplot as plt
import PIL

import numpy as np
import pandas as pd
import sklearn.model_selection

import torch
import torch.nn as nn
import torch
import torch.optim as optim
import torch.nn.functional as F

import torchvision
import torch.utils.data
from torch.utils.data import Dataset, DataLoader

import tqdm
import random
import torchvision.transforms as T

from sklearn.model_selection import train_test_split

# Prepare Dataset

In [5]:
TRAIN_DIR = '../data/dogvscat/train/train'
TEST_DIR = '../data/dogvscat/test/test'
train_images = glob.glob(TRAIN_DIR+"/**/**.jpg")
test_images = glob.glob(TEST_DIR+"/**.jpg")

In [6]:
dogs_list = [img for img in train_images if img.split("/")[-2] == "dogs"]
cats_list = [img for img in train_images if img.split("/")[-2] == "cats"]

print("Dogs Images: ",len(dogs_list))
print("Cats Images: ",len(cats_list))

class_to_int = {"dogs" : 0, "cats" : 1}
int_to_class = {0 : "dogs", 1 : "cats"}

Dogs Images:  0
Cats Images:  10902


In [7]:
from PIL import Image
class CatDogDataset(Dataset):
    def __init__(self, imgs, class_to_int, mode = "train", 
                 transforms = None):
        super().__init__()
        self.imgs = imgs
        self.class_to_int = class_to_int
        self.mode = mode
        self.transforms = transforms
    def __getitem__(self, idx):
        image_name = self.imgs[idx]
        if self.mode == "train" or self.mode == "val":
            img = Image.open(image_name)
            # img = img.resize((256, 256))
            ### Preparing class label
            label = self.class_to_int[image_name.split("/")[-2]]
            label = torch.tensor(label, dtype = torch.float32)
            ### Apply Transforms on image
            img = self.transforms(img)
            return img, label
        elif self.mode == "test":
            img = Image.open(image_name)
            # img = img.resize((256, 256))
            ### Apply Transforms on image
            img = self.transforms(img)
            return img, image_name
    def __len__(self):
        return len(self.imgs)

In [8]:
def get_train_transform():
    return T.Compose([
        T.RandomHorizontalFlip(p=0.5), # Random flip with probability = 0.5
        T.RandomRotation(15), # Random rotation with angle <= 15
        # T.ColorJitter(brightness=.5, hue=.3), # Bright contrast
        T.Resize((256, 256)),
        T.RandomResizedCrop(224), # Random crop Image with shape 224
        T.ToTensor(),
        T.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225)), # Normalize according to ImageNet distribution
    ])
    
def get_val_transform():
    return T.Compose([
        T.Resize((224, 224)),
        T.ToTensor(),
        T.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
    ])

In [9]:
train_imgs, val_imgs = train_test_split(train_images, test_size = 0.2)

In [10]:
train_dataset = CatDogDataset(train_imgs, class_to_int, mode = "train", 
                              transforms = get_train_transform())
val_dataset = CatDogDataset(val_imgs, class_to_int, mode = "val", 
                            transforms = get_val_transform())
test_dataset = CatDogDataset(test_images, class_to_int, mode = "test", 
                             transforms = get_val_transform())

train_data_loader = DataLoader(
    dataset = train_dataset,
    num_workers = 2,
    batch_size = 32,
    shuffle = True
)

val_data_loader = DataLoader(
    dataset = val_dataset,
    num_workers = 2,
    batch_size = 16,
    shuffle = True
)

test_data_loader = DataLoader(
    dataset = test_dataset,
    num_workers = 2,
    batch_size = 1,
    shuffle = False
)

# Fine tuning mobilenet_v2

## Initialize target model

In [19]:
from torchvision.models import mobilenet_v2
model = mobilenet_v2(pretrained = True)

# Modifying Head - classifier
model.classifier._modules['1']  = nn.Sequential(
    nn.Linear(1280, 1, bias = True),
    nn.Sigmoid()
)

In [22]:
model.classifier[1]

Sequential(
  (0): Linear(in_features=1280, out_features=1, bias=True)
  (1): Sigmoid()
)

In [23]:
# Xavier initialize
for layer in model.classifier[1]:
  if isinstance(layer, nn.modules.linear.Linear):
    nn.init.xavier_uniform_(layer.weight)

# Warm up

We need to warm up model in 5 epochs

In [13]:
#Loss Function
criterion = nn.BCELoss()

# Logs - Helpful for plotting after training finishes
train_logs = {"loss" : [], "accuracy" : [], "time" : []}
val_logs = {"loss" : [], "accuracy" : [], "time" : []}

# setup device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Loading model to device
model.to(device)

# No of epochs 
warm_up_epochs = 5
epochs = 15

In [24]:
def accuracy(preds, trues):
    preds = [1 if preds[i] >= 0.5 else 0 for i in range(len(preds))]
    acc = [1 if preds[i] == trues[i] else 0 for i in range(len(preds))]
    acc = np.sum(acc) / len(preds)
    return (acc * 100)

In [25]:
def train_one_epoch(train_data_loader, model, optimizer):
    epoch_loss = []
    epoch_acc = []
    start_time = time.time()
    # model.to(device)
    model.train()
    
    for images, labels in train_data_loader:
        
        #Loading images and labels to device
        images = images.to(device)
        labels = labels.to(device)
        labels = labels.reshape((labels.shape[0], 1)) # [N, 1] - to match with preds shape
        
        #Reseting Gradients
        optimizer.zero_grad()
        
        #Forward
        preds = model(images)
        
        #Calculating Loss
        _loss = criterion(preds, labels)
        loss = _loss.item()
        epoch_loss.append(loss)
        
        #Calculating Accuracy
        acc = accuracy(preds, labels)
        epoch_acc.append(acc)
        
        #Backward
        _loss.backward()
        optimizer.step()
    
    ###Overall Epoch Results
    end_time = time.time()
    total_time = end_time - start_time
    
    ###Acc and Loss
    epoch_loss = np.mean(epoch_loss)
    epoch_acc = np.mean(epoch_acc)
    
    ###Storing results to logs
    train_logs["loss"].append(epoch_loss)
    train_logs["accuracy"].append(epoch_acc)
    train_logs["time"].append(total_time)
        
    return epoch_loss, epoch_acc, total_time

In [26]:
def val_one_epoch(val_data_loader, model, best_val_acc, model_name):
    epoch_loss = []
    epoch_acc = []
    start_time = time.time()
    # model.to(device)
    model.eval()
    
    for images, labels in val_data_loader:
        
        #Loading images and labels to device
        images = images.to(device)
        labels = labels.to(device)
        labels = labels.reshape((labels.shape[0], 1)) # [N, 1] - to match with preds shape
        
        #Forward
        preds = model(images)
        
        #Calculating Loss
        _loss = criterion(preds, labels)
        loss = _loss.item()
        epoch_loss.append(loss)
        
        #Calculating Accuracy
        acc = accuracy(preds, labels)
        epoch_acc.append(acc)
    
    ###Overall Epoch Results
    end_time = time.time()
    total_time = end_time - start_time
    
    ###Acc and Loss
    epoch_loss = np.mean(epoch_loss)
    epoch_acc = np.mean(epoch_acc)
    
    ###Storing results to logs
    val_logs["loss"].append(epoch_loss)
    val_logs["accuracy"].append(epoch_acc)
    val_logs["time"].append(total_time)
    
    ###Saving best model
    if epoch_acc > best_val_acc:
        best_val_acc = epoch_acc
        torch.save(model.state_dict(),model_name+"_best.pth")
        
    return epoch_loss, epoch_acc, total_time, best_val_acc

## Method 1: Speed up 10x learning rate at the fully connected compare with CNN layers.

In [30]:
# Optimizer
learning_rate = 0.00001
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# Learning Rate Scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 5, gamma = 0.5)

params_1x = [param for name, param in model.named_parameters()
             if name not in ["classifier.1.0.weight", "classifier.1.0.bias"]]
trainer = torch.optim.SGD([{'params': params_1x},
                            {'params': model.classifier.parameters(), 'lr': learning_rate * 10}],
                        lr=learning_rate, weight_decay=0.001)

In [None]:
best_val_acc = 0
for epoch in range(epochs):
    ###Training
    loss, acc, _time = train_one_epoch(train_data_loader, model, optimizer)
    
    #Print Epoch Details
    print("\nTraining")
    print("Epoch {}".format(epoch+1))
    print("Loss : {}".format(round(loss, 4)))
    print("Acc : {}".format(round(acc, 4)))
    print("Time : {}".format(round(_time, 4)))
    
    ###Validation
    loss, acc, _time, best_val_acc = val_one_epoch(val_data_loader, model, best_val_acc, "mobilenet_v2")
    
    #Print Epoch Details
    print("\nValidating")
    print("Epoch {}".format(epoch+1))
    print("Loss : {}".format(round(loss, 4)))
    print("Acc : {}".format(round(acc, 4)))
    print("Time : {}".format(round(_time, 4)))