<center><h1>VISION TRANSFORMER (ViT)</h1></center>
Because there is a presentation associated with this code, I will not be extensively annotating it. If you have any questions, please do not hesitate to contact me. 

In [None]:

#########################################
######   INSTALLATIONS FOR COLAB   ######
#########################################

!pip install torch_lr_finder
!pip install timm

###########################
######   LIBRARIES   ######
###########################

# ------ STANDARD ------ #
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import time
import copy
import statistics 
from tqdm.notebook import tqdm  
import random
%matplotlib inline

# ------ GOOGLE COLAB ------ #
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# ------ TORCH MODULES ------ #
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import ToTensor, Compose, Normalize, Resize
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
from torchvision.datasets import ImageFolder
from torch.utils.data import random_split
import torch.optim as optim
from torch.optim import lr_scheduler
from torch_lr_finder import LRFinder

# ------ TIMM ------ #
import timm

# ------ SKLEARN MODULES ------ #
import scipy.io
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report, confusion_matrix

# ------  IMAGE DISPLAY ------ #
from sklearn.metrics import confusion_matrix
from mpl_toolkits.axes_grid1 import ImageGrid
import seaborn as sns
from IPython.display import Image
from IPython.core.display import HTML 

# ----- CUDA ------ #
device = 'cuda' if torch.cuda.is_available() else 'cpu'


<h2>READING IN THE DATA AND CREATING THE DATA LOADERS</h2>
All rather straight forward here.

In [None]:
# ----- PATH -----
root_dir = "/content/drive/My Drive/"
training_directory = root_dir + 'ML_FinalProject/data/train'
validation_directory = root_dir + 'ML_FinalProject/data/validation'
test_directory = root_dir + 'ML_FinalProject/data/test'

# ----- TRANSFORMS / STANDARDIZATION -----

imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std = [0.229, 0.224, 0.225]
    
standard_transform = Compose([
                        Resize((224, 224)),
                        ToTensor(), 
                        Normalize(mean = imagenet_mean, 
                                  std = imagenet_std)
                    ])

test_transform = Compose([
                        Resize((224, 224)),
                        ToTensor()
                    ])

# ----- BUILD IMAGE FOLDERS -----

training_data = ImageFolder(training_directory, 
                            transform = standard_transform)

validation_data = ImageFolder(validation_directory, 
                              transform = standard_transform)

test_data = ImageFolder(test_directory, 
                              transform = test_transform)

# ----- DATA LOADERS -----

# Batch size 
batch_size = 32

train_loader = DataLoader(training_data, 
                          batch_size, 
                          shuffle=True, 
                          num_workers=4, 
                          pin_memory=True)

val_loader = DataLoader(validation_data, 
                        batch_size, 
                        num_workers=4, 
                        pin_memory=True)

test_loader = DataLoader(test_data, 
                        1, 
                        num_workers=4, 
                        pin_memory=True)

# ----- NAMING THEM FOR MODEL TRAINING -----

image_datasets = {}
image_datasets['train'] = training_data
image_datasets['val'] = validation_data

dataloaders = {}
dataloaders['train'] = train_loader
dataloaders['val'] = val_loader

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes


<H2>LOADING THE PRE-TRAINED ViT MODEL</h2>
Using the Pytorch implementation through the Timm library. 

In [None]:
# ------ DEFINING THE MODEL ----- # 

'''
Vision Transformer
    paper: https://arxiv.org/abs/2010.11929
    model last updated: May 5th, 2021
    
    notes: 
      Parameters are frozen. Tests after unfreezing the parameters led to 
      unfavorable results, even after trials using dynamic learning rates.
      Best results from model with unfrozen weights:
          Training accuracy: 97%
          Validation accracy: 45%
'''

ViT = timm.create_model('vit_base_patch16_224', pretrained = True)

for param in ViT.parameters():
  param.requires_grad = False

ViT.head = nn.Linear(ViT.head.in_features, 15)
ViT = ViT.to(device)
ViT.eval()


<h2> SELECTING THE HYPERPARAMETERS</h2>

In [None]:

# ------ PARARMETERS ------ #
epochs = 25
gamma = 0.1
step_size = 5

# ----- LOSS FUNCTION AND OPTIMIZER ------ # 

'''
Cross entropy loss was selected because it was the most frequently used in transfer
learning in the papers I read. The literature used both Adam and SGD as optimization
functions; the original ViT paper stated that Adam performed better across the board
on Transfer learning datasets. compared to SGD. However, follow up papers did not mention 
this finding and primarily use SGD. I tried both optimizers and found that Adam provided
a trivial increase over SGD; there was no substantial difference. 

A learning rate search was conducted to find the best learning rate for the model.
'''

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(ViT.parameters(), lr = 1e-7)

'''
What was the best learning rate? 
1e-2
'''

#lr_finder = LRFinder(ViT, optimizer, criterion, device = "cuda")
#lr_finder.range_test(train_loader, end_lr = 1, num_iter = 50)
#lr_finder.plot()

# Updating optimizer with new learning rate
lr = 1e-2
optimizer = optim.SGD(ViT.parameters(), lr = lr)

# ----- MODEL SCHEDULER ------ # 

scheduler = lr_scheduler.StepLR(optimizer, step_size = step_size, gamma = gamma)



<h1>TRAINING THE MODEL</h1>

In [None]:

best_accuracy = 0
for epoch in range(epochs):
    epoch_loss = 0
    epoch_accuracy = 0

    for data, label in tqdm(train_loader):
        data = data.to(device)
        label = label.to(device)

        output = ViT(data)
        loss = criterion(output, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        acc = (output.argmax(dim=1) == label).float().mean()
        epoch_accuracy += acc / len(train_loader)
        epoch_loss += loss / len(train_loader)

    with torch.no_grad():
        epoch_val_accuracy = 0
        epoch_val_loss = 0
        for data, label in val_loader:
            data = data.to(device)
            label = label.to(device)

            val_output = ViT(data)
            val_loss = criterion(val_output, label)

            acc = (val_output.argmax(dim=1) == label).float().mean()
            epoch_val_accuracy += acc / len(val_loader)
            epoch_val_loss += val_loss / len(val_loader)
    
    if epoch_val_accuracy > best_accuracy:
      torch.save(ViT.state_dict(), 'bestViT.pt')
      best_accuracy = epoch_val_accuracy

    print(
        f"Epoch : {epoch+1} - loss : {epoch_loss:.4f} - acc: {epoch_accuracy:.4f} - val_loss : {epoch_val_loss:.4f} - val_acc: {epoch_val_accuracy:.4f}\n"
    )

<h2>Testing the Model</h2>

In [None]:
# ------ Load in the Best BabyNet ------ #

ViT.load_state_dict(torch.load('bestViT.pt'))


<All keys matched successfully>

In [None]:

def predict(model, images):
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        return predicted

acc_list = []
misclassified_image = []
misclassified_label = []
y = []
y_pred = []

acc_list = []
for image, label in test_loader:
    image = image.to(device)
    label = label.to(device)
    preds = predict(ViT, image)
    y.append(label.item())
    y_pred.append(preds.item())
    if preds != label:
      misclassified_image.append(image)
      misclassified_label.append(preds)
      test_accuracy = 0
    else:
      test_accuracy = 1
    acc_list.append(test_accuracy)
    
statistics.mean(acc_list)

<h2>INVESTIGATING AND VISUALIZING THE RESULTS</h2>

In [None]:
def class_mapper(li):
  ''' 
  A function to replace the numeric Pytorch class identifiers with the associated breed name to aid in visualization clarity. 
  
  Input: 
    li: List of predicted/true responses encoded as 0 - 15, each of which corresponds to a distinct cat breed
  Output: 
    label_li: List of the predicted/true responses with the code replaced with the name of the breed
  ''' 
    classes = ['Abyssinian', 'Bengal', 'Birman', 'Bombay', 'British Shorthair', 'Egyptian Mau', 'Maine Coon', 'Oriental Shorthair', 'Persian', 'Ragdoll', 'Russian Blue', 'Scottish Fold', 'Siamese', 'Somali', 'Sphynx']
    label_li = [classes[i] for i in li]
    return label_li

<b>Updating the labels for visualization</b>

In [None]:
misclassified_label = class_mapper(misclassified_label)
misclassified_true = class_mapper(misclassified_true)

<b>Viewing Misclassified Test Observations</b>

In [None]:

test = [image[0].cpu() for image in misclassified_image]

fig = plt.figure(1, figsize=(15, 15))
grid = ImageGrid(fig, 111, nrows_ncols=(2, 5), axes_pad=0.05)

for i,j in enumerate(random.sample(range(0, 50), 10)):
    image = test[j]
    label = misclassified_label[j]
    true = misclassified_true[j]
    ax = grid[i]
    ax.imshow(image.permute(1,2,0))
    ax.text(10, 210, f"P: {label} \nT: {true}", color='w', backgroundcolor='k')
    ax.axis(False)

plt.show()


In [None]:

y_true = class_mapper(y)
y_predictions = class_mapper(y_pred)
classes = ['Abyssinian', 'Bengal', 'Birman', 'Bombay', 'British Shorthair', 'Egyptian Mau', 'Maine Coon', 'Oriental Shorthair', 
           'Persian', 'Ragdoll', 'Russian Blue', 'Scottish Fold', 'Siamese', 'Somali', 'Sphynx']
cf_matrix = confusion_matrix(y_true, y_predictions)
fig, ax = plt.subplots(figsize=(15,10)) 
sns.heatmap(cf_matrix, linewidths=1, xticklabels=classes,yticklabels=classes, annot=True, ax=ax, fmt='g')
