# Experiment 2: ViT-Lite-7/4 for Small Scale Writer Recognition
Note: All (hyper)parameters according to [1][2]

In [None]:
import os
import random
import sys

import matplotlib.pyplot as plt
import numpy as np
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from timm.loss.cross_entropy import LabelSmoothingCrossEntropy

# add parent directory of this report to path, in order to import files from the `src` folder
pardir = os.path.join(os.getcwd(), os.pardir)
if pardir not in sys.path:
    sys.path.append(pardir)

from src.datasets import CVLCroppedDataset
from src.lr_schedulers import WarmUpLR
from src.model_variants import vit_lite_7_4
from src.preprocessing import TransformationPipeline, SIFTPatchExtractor, OtsuBinarization
from src.pytorch_utils import seed_worker, set_all_seeds, Trainer, ClassificationTester, RetrievalTester

%matplotlib inline

In [None]:
SEED = 417 # generated with random.org (range 0 to 2^16)
EXPERIMENT_NAME = f"experiment-2_writer-recognition_seed-{SEED}"

LOG_DIR = os.path.join(os.curdir, "runs")
SAVED_MODELS_DIR = os.path.join(os.curdir, "saved_models")
DATA_DIR = os.path.join(os.pardir, "data")
PREPROCESSED_FILES_DIRNAME = "cvl-1-1_with-enrollment_experiment_pages"

NUM_EPOCHS = 50
LEARNING_RATE = 5e-4
WEIGHT_DECAY = 3e-2
NUM_EPOCHS_WARMUP = 10
BATCH_SIZE = 128

NUM_CLASSES = 50
IMG_SIZE = 32

NUM_WORKERS = 8

DIM_SECOND_LAST_LAYER = 256

## Preprocessing
If not already done, we download and preprocess the desired dataset.

In [None]:
transformation_pipeline = TransformationPipeline(
    os.path.join(os.pardir, "dataset_splits", "cvl-1-1_with-enrollment_experiment_pages.csv"), 
    SIFTPatchExtractor(sigma=3.75),
    PREPROCESSED_FILES_DIRNAME, 
    pipeline_items=[OtsuBinarization()]
)

cvl = CVLCroppedDataset(NUM_CLASSES, transformation_pipeline=transformation_pipeline, root_dir=DATA_DIR)
cvl()

## Setup
Setup the experiment (reset seeds, load datasets, create model, criterion, optimizer and scheduler)

In [None]:
set_all_seeds(SEED)

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

Load the training and validation set

In [None]:
train_set = datasets.ImageFolder(os.path.join(DATA_DIR, "preprocessed", PREPROCESSED_FILES_DIRNAME, "train"),
                                           transform=transforms.Compose([transforms.ToTensor(), 
                                           transforms.RandomRotation(degrees=(-25, 25),fill=1)]))

train_set_loader = DataLoader(dataset=train_set, shuffle=True, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, 
                              worker_init_fn=seed_worker, generator=torch.Generator().manual_seed(SEED))


val_set = datasets.ImageFolder(os.path.join(DATA_DIR, "preprocessed", PREPROCESSED_FILES_DIRNAME, "val"),
                                           transform=transforms.ToTensor())

val_set_loader = DataLoader(dataset=val_set, shuffle=False, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)

Create model, criterion, optimizer and scheduler

In [None]:
model =  vit_lite_7_4(NUM_CLASSES).to(device=device)
criterion = LabelSmoothingCrossEntropy().to(device=device)
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE,
                                  weight_decay=WEIGHT_DECAY)
scheduler = WarmUpLR(optimizer, LEARNING_RATE, num_epochs_warm_up=NUM_EPOCHS_WARMUP)

In [None]:
trainer = Trainer(model, criterion, optimizer, scheduler, NUM_EPOCHS, train_set_loader, val_set_loader, 
                  experiment_name=EXPERIMENT_NAME,
                  hyper_params={
                                 "optimizer": "adamw", 
                                 "weight decay": WEIGHT_DECAY, 
                                 "lr": LEARNING_RATE,
                                 "num_epochs_warmup": NUM_EPOCHS_WARMUP,
                                 "batch_size": BATCH_SIZE
                               },
                  log_dir=LOG_DIR,
                  saved_models_dir=SAVED_MODELS_DIR)

## Train
Start the training process

In [None]:
trainer()

## Evaluation
We evaluate our trained model for writer identification and writer retrieval

In [None]:
model.load_state_dict(torch.load(os.path.join("saved_models", EXPERIMENT_NAME, "epoch_33.pth")))
model.eval()

### Classification-based evaluation

In [None]:
test_set_path = os.path.join(os.pardir, "data", "preprocessed", PREPROCESSED_FILES_DIRNAME, "test")

In [None]:
classifcation_tester = ClassificationTester(test_set_path, model)

classification_results = classifcation_tester(device, 1, NUM_WORKERS, top_k=[1, 2, 3, 5, 10])

print(f"{classification_results=}")

### Retrieval-based evaluation

In [None]:
retrieval_tester = RetrievalTester((DIM_SECOND_LAST_LAYER, NUM_CLASSES),
                                           test_set_path, model)

retrieval_results = retrieval_tester(device, 1, NUM_WORKERS, soft_top_k=[1, 2, 3, 5, 10],
                               hard_top_k=[1], metrics=["canberra", "chebyshev", "cityblock", "correlation", 
                                                        "cosine", "euclidean", "seuclidean", "sqeuclidean"])


print(f"{retrieval_results=}")

## References
[1] A. Hassani, S. Walton, N. Shah, A. Abuduweili, J. Li, and H. Shi, ‘Escaping the Big Data Paradigm with
Compact Transformers’, arXiv:2104.05704 [cs], Jun. 2021, Accessed: 2021-07-19. [Online]. Available:
http://arxiv.org/abs/2104.05704

[2] https://github.com/SHI-Labs/Compact-Transformers/,
Accessed: 2021-07-19