In [None]:
import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt

import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset
import time
import os

import random
from torchvision.io import read_image
from PIL import Image
from sklearn.metrics import confusion_matrix, accuracy_score

In [None]:
"""
when we train a neural network we need:

- 
-
-
-
- 
- 

"""

In [None]:
"""
In this tutorial we will do transfer learning.

Generally, this means taking a model that was trained on one dataset, called a "pre-trained model", 
and use it in "some way" to be applied towards performing classification (inference in general) on 
some other dataset, sampled from a different data distribution, and usually always with a completely
different set of classes.

What are "some ways" we can use a pre-trained model?

1. fine-tune 
- view it as a more favorable "random initialization"
- do not freeze the pre-trained model
- what do I mean by freeze?


2. embedder 
- we must freeze the pre-trained model
-map inputs from new dataset to features that are outputs of pre-trained model
- these embeddings should carry semantic information on the classes on the new dataset, though not perfectly
- can build a simple classifier on top of this embedding layer
- we can look at the embedder + new simple classifier as either a new model, or we can take the perspective
that we are simply using the pre-trained model to map data instances to new features, and the new features
are the inputs for a new smaller model


Some questions we might need to ask ourselves?

"Which layer do we use for the output layer of the pre-trained model?"

Framed differently,

"Where do we cut off the head of the network?"

Both of which should lead us to ask:

"How do we determine which layer to use for the output of the pre-trained model?"



For this tutorial, we will use a model pre-trained on IMAGENET_1K (1000 classes), and will
apply this to the downstream dataset: 

"""

In [None]:
# train epoch
def train_epoch(model, optimizer, loss_func, dataloader):
    # for metrics computations
    epoch_preds = []
    epoch_labels = []
    
    # set the model to be in "train" model
    model.train()
    
    # iterate through the dataloader, batch by batch
    for i, batch in enumerate(dataloader):
        
        images, labels = batch
        
        # pass to GPU?
        
        # zero the gradients
        optimizer.zero_grad()
        
        # make sure we are tracking gradients from here on out
        with torch.set_grad_enabled(True):
            
            # pass data through the network
            output = model(images)
            
            # compute the loss
            loss = loss_func(output, labels)
                        
            # call back-prop
            loss.backward()
            
            # do a step of gradient descent
            optimizer.step()
        
        
        # now let's update our metrics
        with torch.no_grad():
            _, preds = torch.max(output, 1)
            preds = preds.detach().tolist()
            epoch_preds.extend(preds)
            epoch_labels.extend(labels.detach().tolist())
    
    return epoch_labels, epoch_preds

In [None]:
# validation epoch
def val_epoch(model, dataloader):
    # for metrics computations
    epoch_preds = []
    epoch_labels = []
    
    # set the model to be in "train" model
    model.eval()
    
    # iterate through the dataloader, batch by batch
    for i, batch in enumerate(dataloader):
        
        images, labels = batch
        
        # pass to GPU?
     
        # don't track gradients
        with torch.no_grad():
            
            # pass data through the network
            output = model(images)
            
            # metrics
            _, preds = torch.max(output, 1)
            preds = preds.detach().tolist()
            epoch_preds.extend(preds)
            epoch_labels.extend(labels.detach().tolist())
    
    return epoch_labels, epoch_preds

In [None]:
# hyper-parameters
BATCH_SIZE = 32
LEARNING_RATE = 0.1
NUM_EPOCHS = 10
NUM_CLASSES = 101
TRAIN_SPLIT = 0.75
EMBEDDING_DIM = 512
MODEL_WEIGHT_SAVE_PATH = '/home/jovyan/multimodal-vol-1/MLTS/best-transferlearning-weights.pth'
PATH = '/home/jovyan/multimodal-vol-1/MLTS/CALTECH101/caltech-101/101_ObjectCategories/'

In [None]:
# transforms
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

preprocessing = transforms.Compose([
    transforms.Scale(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    normalize,
])

In [None]:
def sample_image_for_viewing():
    class_folders = os.listdir(PATH)
    # remove "Faces_easy" class, and use only the "Faces" class
    class_folders = [c for c in class_folders if c != 'Faces_easy'] 
    
    class_ = np.random.choice(class_folders)
    print("Class: ", class_)
    
    class_path = os.path.join(PATH, class_)
    files = os.listdir(class_path)
    instance = np.random.choice(files)
    
    img = Image.open(os.path.join(class_path, instance))
    return img
   
        

In [None]:
img = sample_image_for_viewing()
img

In [None]:
y = preprocessing(img)
y.shape

In [None]:
# Dataset objects

class CALTECH101_DATASET(Dataset):
    """
    A dataset where we *need* to load data on the fly
    
    """
    
    def __init__(self, input_paths, labels, transform=None):
        """
        args:
        
        input_paths (dict): mapping dataset indices to path of image
        labels (torch.tensor): labels 
        transform (torchvision.transform): transforms for dataset
        """
        
        self.input_paths = input_paths
        self.inputs = dict()
        self.labels = labels
        self.transform = transform
        self.N = len(self.labels)

    def __len__(self):
        return self.N
    
    def __getitem__(self, idx):
        img_path = self.input_paths[idx]
        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)
        return image, self.labels[idx]


In [None]:
def is_single_channel_images(img):
    preprocessing = transforms.Compose([
                                        transforms.ToTensor()
                                        ])
    img = preprocessing(img)
    if img.shape[0] != 3:
        return True
    
    return False

In [None]:
# Build Caltech101 dataset objects
def build_caltech101_datasets(path, train_split=0.8, transforms=None):
    """
    Caltech is composed of 101 classes, total of 8711 data points.
    Builds a train and validation dataset according to the train_split.
    
    args:
    -----
    
    path (str): path/to/class/folders/
    train_split (float): proportion of dataset that belongs to training dataset.
    
    returns:
    --------
    
    train dataset and val dataset objects.
    """
    
    train_counter = 0
    val_counter = 0
    
    train_path_dict = dict()
    val_path_dict = dict()
    
    train_labels = []
    val_labels = []
    
    class_to_idx_mapper_dict = dict()
    
    class_folders = os.listdir(path)
    # remove "Faces_easy" class, and use only the "Faces" class
    class_folders = [c for c in class_folders if c != 'Faces_easy'] 
    
    # map class names (strings) to class indices
    for i in range(len(class_folders)):
        class_to_idx_mapper_dict[class_folders[i]] = i
        
    # let's split up the data class by class
    for class_folder in class_folders:
        class_idx = class_to_idx_mapper_dict[class_folder]
        
        # get number of instanes for this class
        this_class_path = os.path.join(path, class_folder)
        files = os.listdir(this_class_path)
        files = [f for f in files if '.ipy' not in f]
        files = [f for f in files if is_single_channel_images(Image.open(os.path.join(this_class_path,f))) is False]
        num_instances = len(files)
        
        # compute number in train/val splits
        num_train = int(num_instances*train_split)
        num_val = num_instances - num_train
        
        # random sample
        random.shuffle(files)
        train_instances = files[:num_train]
        val_instances = files[num_train:]
        
        # add train instances
        for inst in train_instances:
            inst_path = os.path.join(this_class_path, inst)
            train_path_dict[train_counter] = inst_path
            train_counter += 1
            
        # add train labels
        train_labels.extend([class_idx]*num_train)
        
        for inst in val_instances:
            inst_path = os.path.join(this_class_path, inst)
            val_path_dict[val_counter] = inst_path
            val_counter += 1
            
        # add val labels
        val_labels.extend([class_idx]*num_val)
    
    # build the datasets
    train = CALTECH101_DATASET(train_path_dict, 
                               torch.tensor(train_labels), 
                               transforms)
    val = CALTECH101_DATASET(val_path_dict, 
                             torch.tensor(val_labels), 
                             transforms)
    
    return train, val
    



In [None]:
# build the datasets
train, val = build_caltech101_datasets(PATH, TRAIN_SPLIT, preprocessing)

In [None]:
# dataloaders
train_dataloader = torch.utils.data.DataLoader(train,
                                              batch_size = BATCH_SIZE,
                                              shuffle = True
                                              )

val_dataloader = torch.utils.data.DataLoader(val,
                                              batch_size = BATCH_SIZE,
                                              shuffle = False
                                              )

In [None]:
# pick our pre-trained model architecture
from torchvision.models import resnet50, resnet18

# Using pretrained weights:
net = resnet18(pretrained=True)
print(net)

params = filter(lambda p: p.requires_grad, net.parameters())
num_params = sum([np.prod(p.size()) for p in params])
print("Model parameters: ", num_params)




In [None]:
# freeze the weights
for p in net.parameters():
    p.requires_grad = False

params = filter(lambda p: p.requires_grad, net.parameters())
num_params = sum([np.prod(p.size()) for p in params])
print("Model parameters: ", num_params)


In [None]:
"""
either change net.fc to something else (single layer, or a  Sequential)
OR modify source code?

modules = list(net.children())[:-1]
resnet = nn.Sequential(*modules)



OR


class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x
        
net.fc = Identity()



OR


net.fc = nn.Linear(EMBEDDING_DIM, NUM_CLASSES)



OR


net.fc = nn.Sequential(put,
                        my,
                        layers,
                        and,
                        activations,
                        here
                        )

"""


In [None]:
net.fc = nn.Linear(EMBEDDING_DIM, NUM_CLASSES)

params = filter(lambda p: p.requires_grad, net.parameters())
num_params = sum([np.prod(p.size()) for p in params])
print("Model parameters: ", num_params)


In [None]:
print(net)

In [None]:
# optimizer
optimizer = torch.optim.AdamW(net.parameters(), 
                              lr=LEARNING_RATE)

In [None]:
# loss
criteria = nn.CrossEntropyLoss()

In [None]:
# tracking metrics (let's use scikitlearn now)
train_accs = []
val_accs = []
best_acc = -1.0

In [None]:
# training loop
for epoch in range(1, NUM_EPOCHS+1):
    print("Epoch: ", epoch, " of ", NUM_EPOCHS+1)
    
    # train
    labels, preds = train_epoch(net, optimizer, criteria, train_dataloader)
    
    # metrics
    cm, acc = confusion_matrix(labels, preds), accuracy_score(labels, preds)
    print("Train accuracy: ", acc)
    train_accs.append(acc)
    
    
    # validation
    labels, preds = val_epoch(net, val_dataloader)
    
    # metrics
    cm, acc = confusion_matrix(labels, preds), accuracy_score(labels, preds)
    print("Validation accuracy: ", acc)
    val_accs.append(acc)
    
    
    # update best model
    if acc > best_acc:
        best_acc = acc
        print("We've got ourselves a winner over here!")
        print(cm)
        state_dict = {'weights': net.state_dict(),
                     'epoch': epoch,
                      'val_acc': acc.item()
                     }
        torch.save(state_dict, MODEL_WEIGHT_SAVE_PATH)

In [None]:
results = {'train': train_accs, 'val': val_accs}
torch.save(results,'/home/jovyan/multimodal-vol-1/MLTS/resnet18_pretrained_frozen_results.pth')

In [None]:
# plotting
plt.plot(train_accs, color='r', label="Train")
plt.plot(val_accs, color='k', label="Validation")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title()
plt.legend()
plt.show()


In [None]:
# ok, what about if we trained from scratch? what if we fine-tune?

