In [None]:
import os
import cv2
import time
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

# data augmentation
import albumentations as A

# pretrained models
import torchvision
from torchvision import models, transforms

Image Data from [here](https://www.kaggle.com/andrewmvd/animal-faces)
- Animal Faces

## Resizing

In [None]:
# create a dataframe for our image data
data_path = 'course_data/afhq'

rows = []
for dataset in os.listdir(data_path):
    for label in os.listdir(data_path + f'/{dataset}'):
        for image in os.listdir(data_path + f'/{dataset}' + f'/{label}'):
            row = dict()
            row['image_file'] = image
            row['label'] = label
            row['dataset'] = dataset
        
            # a bit redudant, could build from other data in __getitem__ if wanted
            row['image_path'] = data_path + f'/{dataset}' + f'/{label}'
            rows.append(row)
        
df = pd.DataFrame(rows)
print(len(df))
df.head()

In [None]:
# training and validation data
df_train = df[df['dataset'] == 'train'].reset_index(drop=True)
df_val = df[df['dataset'] == 'val'].reset_index(drop=True)
len(df_train), len(df_val)

We're going to work with a pre-trained model that takes in images of size 224x224. We will reduce the resolution as a *pre-processings* step rather than on the fly to save time during training.
- Notice the time we save during each epoch: 3 seconds for me

In [None]:
def resize_img(path, size):
    img = cv2.imread(path)
    
    start = time.time()
    img = cv2.resize(img, size)
    end = time.time()
    
    cv2.imwrite(path, img)
    return end - start

# resize all of the images to 256x256
total_time_resize = 0.0
for idx in tqdm(range(len(df_train))):
    row = df_train.iloc[idx]
    image_path = row['image_path']
    fname = row['image_file']
    path = image_path+'/'+fname
    
    total_time_resize += resize_img(path, (256, 256))
    
for idx in tqdm(range(len(df_val))):
    row = df_train.iloc[idx]
    image_path = row['image_path']
    fname = row['image_file']
    path = image_path+'/'+fname
    
    total_time_resize += resize_img(path, (256, 256))
    

In [None]:
total_time_resize

In [None]:
row = df_train.iloc[100]
image_path = row['image_path']
fname = row['image_file']
path = image_path+'/'+fname
img = cv2.imread(path)

img.shape

## Data Augmentation with [Albumentations](https://github.com/albumentations-team/albumentations)
- A suite of very fast transformations for images
- Supports masks and keypoints!

In [None]:
from albumentations.pytorch import ToTensorV2

# let's add an augmentation option
class AnimalFacesDataset(Dataset):
    def __init__(self, df, augment=False):
        self.df = df
        self.augment = augment
        
        # label dictionary
        self.label_dict = {'cat':0, 'dog':1, 'wild':2}
        
        # define the transformation
        if augment == True:
            self.transforms = A.Compose([
                # spatial transforms
                A.RandomCrop(width=224, height=224),
                A.HorizontalFlip(p=.5),
                A.VerticalFlip(p=.5),
                A.Rotate(limit = 10, 
                         border_mode = cv2.BORDER_CONSTANT, 
                         value = 0.0, p = .75),
                
                # pixel-level transformation
                A.RandomBrightnessContrast(p=0.5),
                
                # we will normalize according to ImageNet since we will be using a pre-trained ResNet
                # this adjusts from [0,255] to [0,1]
                A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                
                # convert to a tensor and move color channels
                ToTensorV2()
            ])
        else:
            self.transforms = A.Compose([
                # training/valid images have same size
                A.CenterCrop(width=224, height=224),
                
                # normalize
                A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                
                # convert to a tensor and move color channels
                ToTensorV2()
            ])
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        # get ingredients for retrieving image
        image_path = row['image_path']
        fname = row['image_file']
        path = image_path+'/'+fname
        
        # read the img
        img = cv2.imread(path)
        
        # convert to RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # transform the image
        # certain transformations expect the uint8 datatype
        transformed = self.transforms(image=img.astype(np.uint8))
        img = transformed['image']
        
        label = torch.tensor(self.label_dict[row['label']])
        
        return img, label

In [None]:
ds_train = AnimalFacesDataset(df_train, augment=True)
dl_train = DataLoader(ds_train, batch_size = 16, shuffle=True)

ds_val = AnimalFacesDataset(df_val)
dl_val = DataLoader(ds_val, batch_size = 16, shuffle=True)

Below we double check that this is working properly, and can see the transformation in practice

In [None]:
img, label = next(iter(ds_train))
print(img.shape)

# restructure for plt
img = np.transpose(np.array(img), (1,2,0))

# reverse the normalization
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
for i in range(3):
    img[:,:,i] = (img[:,:,i] * std[i]) + mean[i]

plt.imshow(img)
print(label)

## Pretrained Models
- Freezing Layers (feature extraction)
- Finetuning (weight initialization)

In [None]:
# drum roll...the pretrained resnet!
resnet = models.resnet18(pretrained=True)

In [None]:
# we can see the architecture
# note how many of the layers are organized in "BasicBlock"
resnet

- Notice how the image eventually becomes a 1D vector of dimension 512
- In some sense the network has transformed an image into a vector of features helpful for image classification
- The last layer is a simple function (linear followed by softmax) on this feature space that predicts an images class
- One strategy is to train a new simple function on this **same** feature space for our classification task

In [None]:
from torchsummary import summary
summary(resnet, input_size = (3, 224, 224), device='cpu')

In [None]:
# turn off gradients for all the parameters
for param in resnet.parameters():
            param.requires_grad = False

In [None]:
# re-intialize the last layer for our task
print(resnet.fc)
resnet.fc = nn.Linear(512, 3)
print(resnet.fc)

In [None]:
# re-initializing the layer reset to default settings
for param in resnet.fc.parameters():
    print(param.requires_grad)

In [None]:
# double-check all the parameters
for name, param in resnet.named_parameters():
    print(f"{name} gradient is set to", param.requires_grad)

In [None]:
# pass the appropriate parameters to the optimizer
params_to_update = []

for param in resnet.parameters():
    if param.requires_grad == True:
        params_to_update.append(param)

optimizer = optim.Adam(params_to_update, lr=0.001)

In [None]:
# let's make sure that this actually freezes/trains the layers, take a sample weight
print(resnet.conv1.weight[0])
print(resnet.fc.bias)

In [None]:
def one_pass(model, dataloader, optimizer, lossFun, backwards=True, print_loss=False):
    
    if backwards == True:
        model.train()
    else:
        model.eval()
    
    total_loss = 0.0
    for x, y in tqdm(dataloader):
        
        y_pred = model(x)
        loss = lossFun(y_pred, y)
        total_loss += loss.item()
        
        if backwards == True:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    avg_loss = total_loss / len(dataloader)
    
    if print_loss == True:
        print(avg_loss)
    
    return avg_loss

def one_pass_acc(model, dataloader, num_points):
    model.eval()
    total_incorrect = 0
    
    softmax = nn.LogSoftmax(dim=1)
    
    for x, y in dataloader:
        y_pred = softmax(model(x))
        y_pred = torch.argmax(y_pred, dim=1)
        
        total_incorrect += torch.count_nonzero(y - y_pred).item()
        
    percent_wrong = total_incorrect / num_points
    return 1 - percent_wrong

In [None]:
from tqdm.notebook import tqdm

lossFun = nn.CrossEntropyLoss()

num_epochs = 5
train_losses = []
valid_losses = []

for epoch in tqdm(range(num_epochs)):
    print('Epoch: ', epoch)
    
    train_loss = one_pass(resnet, dl_train, optimizer, lossFun)
    train_losses.append(train_loss)
    print('Train loss: ', train_loss)
    
    valid_loss = one_pass(resnet, dl_val, optimizer, lossFun, backwards=False)
    valid_losses.append(valid_loss)
    print('Valid loss: ', valid_loss)
    
    train_acc = one_pass_acc(resnet, dl_train, len(ds_train))
    valid_acc = one_pass_acc(resnet, dl_val, len(ds_val))
    print('Train Acc: ', train_acc)
    print('Valid Acc: ', valid_acc)

Note how long it takes to train for images

In [None]:
print(resnet.conv1.weight[0])
print(resnet.fc.bias)

If we want to finetune, we can either
- use the resnet as a starting point and train by treating the pretrained weights as good weight initilaization OR
- we can train different layers at different learning rates (the later the layer, the more we want to adjust the feature)

In [None]:
# we can pass the optimizer groups of parameters rather than all the parameters in one group
for param_group in optimizer.param_groups:
    print(param_group)

In [None]:
for name, layer in resnet.named_children():
    print(name)

In [None]:
max_lr = 0.01
params = []
for i, layer in enumerate(resnet.children()):
    if i < 6:
        params.append({'params': layer.parameters(), 'lr': max_lr / 100})
    elif 5 < i < 9:
        params.append({'params': layer.parameters(), 'lr': max_lr / 10})
    else:
        params.append({'params': layer.parameters()})
        
# only the parameters we didn't manually set the learning rate for inherit the learning rate set when defining the optimizer
optimizer = optim.Adam(params, lr = max_lr)

In [None]:
# we can see the parameters groups here
optimizer

In [None]:
# to make this a bit cleaner you can make a new model class
# use model.features1, model.features2, and model.classifier to set the learning rates
class Tune_ResNet(nn.Module):
    def __init__(self):
        super(Tune_ResNet, self).__init__()
        resnet = models.resnet18(pretrained=True)
        layers = list(resnet.children())[:9]
        self.features1 = nn.Sequential(*layers[:6])
        self.features2 = nn.Sequential(*layers[6:])
        self.classifier = nn.Linear(512, 3)
        self.unroll = nn.Flatten()
    
    def forward(self, x):
        x = self.features1(x)
        x = self.features2(x)
        x = self.unroll(x)
        x = self.classifier(x)
        return x
    
model = Tune_ResNet()
summary(model, input_size = (3, 224, 224), device='cpu')

## Training on a GPU
- We saw how slow it was to train images on a cpu
- PyTorch makes it easy to do this training on a GPU!
- Always follow GPU etiquette and check who is running what

In [None]:
# is a GPU available?
torch.cuda.is_available()

In [None]:
# check who is using what
!nvidia-smi

In [None]:
# how many devices are there?
torch.cuda.device_count()

In [None]:
device_no = 0
if torch.cuda.is_available() == True:
    device = torch.device(device_no)
else:
    device = torch.device('cpu')
device

In [None]:
# move model parameters to device
model.to(device)

In [None]:
# let's adapt our earlier function
def one_pass(model, dataloader, optimizer, lossFun, device, backwards=True, print_loss=False):
    
    if backwards == True:
        model.train()
    else:
        model.eval()
    
    total_loss = 0.0
    for x, y in tqdm(dataloader):
        
        # send labelled data to the device
        x, y = x.to(device), y.to(device)
        
        y_pred = model(x)
        loss = lossFun(y_pred, y)
        total_loss += loss.item()
        
        if backwards == True:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    avg_loss = total_loss / len(dataloader)
    
    if print_loss == True:
        print(avg_loss)
    
    return avg_loss

Note that
- The model can only take inputs on the same device
- The output is also on the specified device and cannot interact with tensors on a different device

In [None]:
x, y = next(iter(dl_train))

# move to device
x, y = x.to(device), y.to(device)

# perform computation
y_pred = model(x)

# now its on the cpu again
y_pred.cpu()