## Requirements

#### Google Collab specifications

This notebook was executed in a Google Collab environment, and it is recommended to run it in the same environment. To do so, the following cell contents need to be uncommented and executed.

In case you want to run it locally, you will need to create a virtual environment and install the required packages. The `requirements.txt` file is provided in the repository. Some file and module imports may need to be adjusted.

In [None]:
#! pip install torch torchvision avalanche-lib tqdm timm tensorflow_addons --quiet
#! pip install tfds-nightly==4.4.0.dev202201080107 --quiet

#! git clone https://github.com/lennartp03/promptingDistilledViT.git

#from google.colab import drive
#drive.mount('/content/drive')

### General imports

In [None]:
import torch
from torch import nn
from torch.optim import AdamW
from torch.nn import functional as F

from avalanche.evaluation.metrics.accuracy import Accuracy

from tqdm import tqdm

import timm
from timm.models import create_model
from timm.models.layers import DropPath
from timm.scheduler.cosine_lr import CosineLRScheduler

import math
import random
import os
import numpy as np
import time

from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, random_split, Dataset
from torch.utils.data.sampler import SubsetRandomSampler, BatchSampler

from src.utils.helpers import set_seed

set_seed()

## Training logic

In [None]:
%mkdir ./models/
%mkdir ./models/convpass/
%mkdir ./models/head/
%mkdir ./models/full/

In [None]:
def save(method, dataset, modelname, model, acc, ep):
    model.cpu()
    save_path = f'./models/{method}/{modelname}_{dataset}.pt'
    torch.save(model.state_dict(), save_path)
    log_path = f'./models/{method}/{modelname}_{dataset}.log'
    with open(log_path, 'w') as f:
        f.write(f'{ep} {acc}\n')

def load(method, dataset, modelname, model):
    load_path = f'./models/{method}/{modelname}_{dataset}.pt'
    model.load_state_dict(torch.load(load_path))
    model.cpu()
    return model

In [None]:
#Training details
lr = 1e-3
wd = 1e-4
epoch = 100

def train(model, dl,
          val_dl, dataset, modelname, method,
          opt, scheduler, epoch = 100):
    model.train()
    model = model.cuda()
    best_acc = 0
    for ep in tqdm(range(epoch)):
        model.train()
        model = model.cuda()
        for i, batch in enumerate(dl):
            x, y = batch[0].cuda(), batch[1].cuda()
            out = model(x)
            loss = F.cross_entropy(out, y)
            opt.zero_grad()
            loss.backward()
            opt.step()
        if scheduler is not None:
            scheduler.step(ep)
        if ep % 10 == 9:
            acc, _, _ = test(model, val_dl)
            print('Best Acc: ', best_acc, ' Current Acc: ', acc)
            if acc > best_acc:
                best_acc = acc
                print('New Best Acc: ', best_acc)
                save(method, dataset, modelname, model, acc, ep)
    model = model.cpu()
    return model, best_acc


@torch.no_grad()
def test(model, dl, topn=5):
    model.eval()
    acc = Accuracy()
    total_time = 0
    top5, total = 0, 0
    model = model.cuda()
    for batch in dl:  
        x, y = batch[0].cuda(), batch[1].cuda()
        start_time = time.time()
        out = model(x).data
        inference_time = time.time() - start_time
        total_time += inference_time

        _, pred = out.topk(topn, 1, True, True)
        pred = pred.t()
        correct = pred.eq(y.view(1, -1).expand_as(pred))
        top5 += correct[:topn].reshape(-1).float().sum(0, keepdim=True)
        total += y.size(0)

        acc.update(out.argmax(dim=1).view(-1), y)

    print(acc.result())
    top5_acc = top5 / total
    mean_inference_time = total_time / len(dl)

    return acc.result(), mean_inference_time, top5_acc

## FGVC

In [None]:
VIT_BASE = 'vit_base_patch16_224' #768
VIT_SMALL = 'vit_small_patch16_224' #384
VIT_TINY = 'vit_tiny_patch16_224' #192
DEIT_SMALL = 'deit_small_distilled_patch16_224' #384
DEIT_TINY = 'deit_tiny_distilled_patch16_224' #192

MODEL = 'vit_tiny_patch16_224'
TOP_N = 5

### Stanford Cars

In [None]:
%mkdir ./data/
%mkdir ./data/stanford_cars/

In [None]:
# Google Collab Commands for adding Stanford Cars Dataset from Google Drive
#!cp -r drive/MyDrive/Bachelor/stanfordcars/devkit/ data/stanford_cars/
#!cp -r drive/MyDrive/Bachelor/stanfordcars/cars_test_annos_withlabels.mat data/stanford_cars

#!unzip drive/MyDrive/Bachelor/stanfordcars/archive.zip -d data/stanford_cars/

#!mv data/stanford_cars/cars_test/cars_test/* data/stanford_cars/cars_test/
#!mv data/stanford_cars/cars_train/cars_train/* data/stanford_cars/cars_train/

#!rmdir data/stanfordcars/cars_test/cars_test/
#!rmdir data/stanfordcars/cars_train/cars_train/

### FGVC - Convpass

In [None]:
from src.fgvc.blueprint import FGVCDataPytorch
from src.convpass.utils import set_Convpass

DATASET_LIST = {
    'Aircraft': 100,
    'Food101': 101,
    'Flowers': 102,
    'Pets': 37,
    'StanfordCars': 196,
}

SCALING_PARAM = 1
SAMPLE_LIST = [2,4,8,16]

for DATA in DATASET_LIST.keys():
  print(DATA, DATASET_LIST[DATA])
  for num in SAMPLE_LIST:

    fgvc = FGVCDataPytorch(dataset=DATA, samples_per_class=num, pin_memory=False)
    train_fgvc, val_fgvc, test_fgvc = fgvc.get_loaders()

    train_loader = train_fgvc
    val_loader = val_fgvc
    test_loader = test_fgvc

    model = create_model(MODEL, pretrained=True,
                        drop_path_rate=0.1)
    set_Convpass(model, distilled=True, adapt_dim=192, s=SCALING_PARAM, xavier_init=True)

    trainable = []
    model.reset_classifier(DATASET_LIST[DATA])

    for n, p in model.named_parameters():
        if 'adapter' in n or 'head' in n:
            trainable.append(p)
        else:
            p.requires_grad = False

    opt = AdamW(trainable, lr=lr, weight_decay=wd)
    scheduler = CosineLRScheduler(opt, t_initial=100,
                                      warmup_t=10, lr_min=1e-5, warmup_lr_init=1e-6)


    model_trained, acc = train(model, train_fgvc, val_fgvc,
                                DATA, MODEL, "convpass", opt, scheduler)

    model_trained_best = load("convpass", DATA, MODEL, model)
    acc, inference_mean, top5_acc = test(model_trained_best, test_fgvc, TOP_N)
    print('Num samples per class:', num)
    print('Accuracy:', acc, '\nInference:', inference_mean, '\nTop5Acc:', top5_acc)

### FGVC - Head

In [None]:
from src.fgvc.blueprint import FGVCDataPytorch

DATASET_LIST = {
    'Aircraft': 100,
    'Food101': 101,
    'Flowers': 102,
    'Pets': 37,
    'StanfordCars': 196,
}

SCALING_PARAM = 1
SAMPLE_LIST = [2,4,8,16]

for DATA in DATASET_LIST.keys():
  print(DATA, DATASET_LIST[DATA])
  for num in SAMPLE_LIST:

    fgvc = FGVCDataPytorch(dataset=DATA, samples_per_class=num, pin_memory=False)
    train_fgvc, val_fgvc, test_fgvc = fgvc.get_loaders()

    print(len(train_fgvc.dataset), len(val_fgvc.dataset), len(test_fgvc.dataset))

    model_head = create_model(MODEL, pretrained=True,
                        drop_path_rate=0.1)

    trainable = []
    model_head.reset_classifier(DATASET_LIST[DATA])

    for n, p in model_head.named_parameters():
      if 'head' in n:
        trainable.append(p)
      else:
        p.requires_grad = False

    opt = AdamW(trainable, lr=lr, weight_decay=wd)
    scheduler = CosineLRScheduler(opt, t_initial=100,
                                      warmup_t=10, lr_min=1e-5, warmup_lr_init=1e-6)


    model_head_trained, acc = train(model_head, train_fgvc, val_fgvc,
                                DATA, MODEL, "head", opt, scheduler)

    model_head_trained_best = load("head", DATA, MODEL, model_head)
    acc, inference_mean, top5_acc = test(model_head_trained_best, test_fgvc, TOP_N)

    print('Accuracy:', acc, '\nInference:', inference_mean, '\nTop5Acc:', top5_acc)

## VTAB

In [None]:
# Google collab command for manually transferring RESISC45 dataset from Google Drive
# !mkdir -p /content/testing/src/data/datasets/downloads/manual/resisc45
# !unrar x drive/MyDrive/Bachelor/resisc45/NWPU-RESISC45.rar /content/testing/src/data/datasets/downloads/manual/

In [None]:
from src.data.loader import construct_train_loader, construct_test_loader, construct_val_loader
from src.convpass.utils import set_Convpass

DATASETS = {
    'cifar(num_classes=100)': {'num_classes': 100, 'scaling_param': 0.1, 'xavier_init': False},
    'dtd': {'num_classes': 47, 'scaling_param': 0.01, 'xavier_init': True},
    'oxford_flowers102': {'num_classes': 102, 'scaling_param': 0.1, 'xavier_init': True},
    'patch_camelyon': {'num_classes': 2, 'scaling_param': 10, 'xavier_init': False},
    'resisc45': {'num_classes': 45, 'scaling_param': 10, 'xavier_init': False},
    'eurosat': {'num_classes': 10, 'scaling_param': 10, 'xavier_init': False},
    'kitti(task="closest_vehicle_distance")': {'num_classes': 4, 'scaling_param': 10, 'xavier_init': True},
    'smallnorb(predicted_attribute="label_elevation")': {'num_classes': 9, 'scaling_param': 1, 'xavier_init': True},
    'clevr(task="count_all")': {'num_classes': 8, 'scaling_param': 1, 'xavier_init': False},
}

DATASET_NAME = 'smallnorb(predicted_attribute="label_elevation")'
DATA_PATH = './src/data/datasets/'
NUM_CLS = DATASETS[DATASET_NAME]['num_classes']
SCALE_PARAM = DATASETS[DATASET_NAME]['scaling_param']
XAVIER_INIT = DATASETS[DATASET_NAME]['xavier_init']
TOP_N = 2

print(DATASET_NAME, NUM_CLS, SCALE_PARAM)

train_loader = construct_train_loader(f'vtab-{DATASET_NAME}', DATA_PATH, NUM_CLS, pin_memory=False)
val_loader = construct_val_loader(f'vtab-{DATASET_NAME}', DATA_PATH, NUM_CLS, pin_memory=False)
test_loader = construct_test_loader(f'vtab-{DATASET_NAME}', DATA_PATH, NUM_CLS, pin_memory=False)

### Models

In [1]:
VIT_BASE = 'vit_base_patch16_224' #768
VIT_SMALL = 'vit_small_patch16_224' #384
VIT_TINY = 'vit_tiny_patch16_224' #192
DEIT_SMALL = 'deit_small_distilled_patch16_224' #384
DEIT_TINY = 'deit_tiny_distilled_patch16_224' #192

MODEL = None

### VTAB - Convpass

In [None]:
print('Train: ', len(train_loader.dataset), 'Val: ', len(val_loader.dataset),
      'Test: ', len(test_loader.dataset))

model_convpass = create_model(MODEL, pretrained=True,
                      drop_path_rate=0.1)
set_Convpass(model_convpass, distilled=True, adapt_dim=192, s=SCALE_PARAM, xavier_init=XAVIER_INIT)

trainable = []
model_convpass.reset_classifier(NUM_CLS)

for n, p in model_convpass.named_parameters():
    if 'adapter' in n or 'head' in n:
        trainable.append(p)
    else:
        p.requires_grad = False

opt = AdamW(trainable, lr=lr, weight_decay=wd)
scheduler = CosineLRScheduler(opt, t_initial=100,
                                    warmup_t=10, lr_min=1e-5, warmup_lr_init=1e-6)


model_convpass_trained, acc = train(model_convpass, train_loader, val_loader,
                                    DATASET_NAME, MODEL, "convpass", opt, scheduler)

model_convpass_trained_best = load("convpass", DATASET_NAME, MODEL, model_convpass)
acc, inference_mean, top5_acc = test(model_convpass_trained_best, test_loader, TOP_N)

print('Accuracy:', acc, '\nInference:', inference_mean, '\nTop5Acc:', top5_acc)

### VTAB - Head

In [None]:
model_head = create_model(MODEL, pretrained=True,
                      drop_path_rate=0.1)
model_head.reset_classifier(NUM_CLS)

trainable = []

for n, p in model_head.named_parameters():
    if 'head' in n:
        trainable.append(p)
    else:
        p.requires_grad = False

opt = AdamW(trainable, lr=lr, weight_decay=wd)
scheduler = CosineLRScheduler(opt, t_initial=100,
                                    warmup_t=10, lr_min=1e-5, warmup_lr_init=1e-6)


model_head_trained, acc = train(model_head, train_loader, val_loader,
                                DATASET_NAME, MODEL, "head", opt, scheduler)

model_head_trained_best = load("head", DATASET_NAME, MODEL, model_head)
acc, inference_mean, top5_acc = test(model_head_trained_best, test_loader, TOP_N)

print('Accuracy:', acc, '\nInference:', inference_mean, '\nTop5Acc:', top5_acc)

### VTAB - Full

In [None]:
model_full = create_model(MODEL, pretrained=True,
                     drop_path_rate=0.1)
model_full.reset_classifier(NUM_CLS)

for n, p in model_full.named_parameters():
        p.requires_grad = True

opt = AdamW(model_full.parameters(), lr=lr, weight_decay=wd)
scheduler = CosineLRScheduler(opt, t_initial=100,
                                  warmup_t=10, lr_min=1e-5, warmup_lr_init=1e-6)


model_full_trained, acc = train(model_full, train_loader, val_loader,
                                DATASET_NAME, MODEL, "full", opt, scheduler)

model_full_trained_best = load("full", DATASET_NAME, MODEL, model_head)
acc, inference_mean, top5_acc = test(model_full_trained_best, test_loader, TOP_N)

print('Accuracy:', acc, '\nInference:', inference_mean, '\nTop5Acc:', top5_acc)

## Param Utils

In [None]:
from src.utils.helpers import count_total_params, count_finetuned_params

MODEL = None

total = count_total_params(MODEL)
tuned = count_finetuned_params(MODEL)
share = tuned/total

print(f"Number of parameters fine-tuned: {tuned}")
print(f"Total number of parameters: {total}")
print(f"Share: {share}")