# Classification on CIFAR and ImageNet

In [1]:
import sys

# check whether run in Colab
root = "."
if "google.colab" in sys.modules:
    print("Running in Colab.")
    !pip3 install matplotlib
    !pip3 install einops==0.3.0
    !pip3 install timm==0.4.9
    !git clone https://github.com/xxxnell/how-do-vits-work.git
    root = "./how-do-vits-work"
    sys.path.append(root)

In [2]:
import os
import time
import yaml
import copy
from pathlib import Path
import datetime

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import models
import ops.trains as trains
import ops.tests as tests
import ops.datasets as datasets
import ops.schedulers as schedulers

In [3]:
# config_path = "%s/configs/cifar10_vit.yaml" % root
config_path = "%s/configs/cifar10_vit.yaml" % root
# config_path = "%s/configs/imagenet_vit.yaml" % root

with open(config_path) as f:
    args = yaml.safe_load(f)
    print(args)

{'dataset': {'name': 'cifar10', 'root': '../data', 'mean': [0.4914, 0.4822, 0.4465], 'std': [0.2023, 0.1994, 0.201], 'padding': 4, 'color_jitter': 0.0, 'auto_augment': 'rand-m9-n2-mstd1.0', 're_prob': 0.0}, 'train': {'warmup_epochs': 5, 'epochs': 300, 'batch_size': 96, 'max_norm': 5, 'smoothing': 0.1, 'mixup': {'num_classes': 10, 'mixup_alpha': 1.0, 'cutmix_alpha': 0.8, 'prob': 1.0}}, 'val': {'batch_size': 256, 'n_ff': 1}, 'model': {'stem': False, 'block': {'image_size': 32, 'patch_size': 2, 'sd': 0.1}}, 'optim': {'name': 'AdamW', 'lr': 0.000125, 'weight_decay': 0.05, 'scheduler': {'name': 'CosineAnnealingLR', 'T_max': 300, 'eta_min': 0}}, 'env': {}}


In [4]:
dataset_args = copy.deepcopy(args).get("dataset")
train_args = copy.deepcopy(args).get("train")
val_args = copy.deepcopy(args).get("val")
model_args = copy.deepcopy(args).get("model")
optim_args = copy.deepcopy(args).get("optim")
env_args = copy.deepcopy(args).get("env")

In [5]:
train_args['epochs'] = 1

In [6]:
dataset_train, dataset_test = datasets.get_dataset(**dataset_args, download=True)
dataset_name = dataset_args["name"]
num_classes = len(dataset_train.classes)

dataset_train = DataLoader(dataset_train, 
                           shuffle=True, 
                           num_workers=train_args.get("num_workers", 4), 
                           batch_size=train_args.get("batch_size", 128))
dataset_test = DataLoader(dataset_test, 
                          num_workers=val_args.get("num_workers", 4), 
                          batch_size=val_args.get("batch_size", 128))

print("Train: %s, Test: %s, Classes: %s" % (
    len(dataset_train.dataset), 
    len(dataset_test.dataset), 
    num_classes
))

Files already downloaded and verified
Files already downloaded and verified
Train: 50000, Test: 10000, Classes: 10


## Model

Use provided models:

In [7]:
# ResNet
# name = "resnet_dnn_50"
# name = "resnet_dnn_101"

# ViT
name = "vit_ti"
# name = "vit_s"

vit_kwargs = {  # for CIFAR
    "image_size": 32, 
    "patch_size": 2,
}

model = models.get_model(name, num_classes=num_classes, 
                         stem=model_args.get("stem", False), **vit_kwargs)
# models.load(model, dataset_name, uid=current_time)

AttributeError: module 'keras.backend' has no attribute 'is_tensor'

Or use `timm`:

In [8]:
import timm

model = timm.models.vision_transformer.VisionTransformer(
    img_size=32, patch_size=2, num_classes=num_classes,  # for CIFAR
    embed_dim=192, depth=12, num_heads=3, qkv_bias=False,  # ViT-Ti
)
model.name = "vit_ti"
models.stats(model)

model: vit_ti , params: 5.4M


Parallelize the given `moodel` by splitting the input:

In [9]:
name = model.name
model = nn.DataParallel(model)
model.name = name

## Train

Define a TensorBoard writer:

In [10]:
current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
log_dir = os.path.join("runs", dataset_name, model.name, current_time)
writer = SummaryWriter(log_dir)

with open("%s/config.yaml" % log_dir, "w") as f:
    yaml.dump(args, f)
with open("%s/model.log" % log_dir, "w") as f:
    f.write(repr(model))

print("Create TensorBoard log dir: ", log_dir)

Create TensorBoard log dir:  runs/cifar10/vit_ti/20230806_174610


Train the model:

In [12]:
gpu = torch.cuda.is_available()
optimizer, train_scheduler = trains.get_optimizer(model, **optim_args)
warmup_scheduler = schedulers.WarmupScheduler(optimizer, len(dataset_train) * train_args.get("warmup_epochs", 0))

trains.train(model, optimizer,
             dataset_train, dataset_test,
             train_scheduler, warmup_scheduler,
             train_args, val_args, gpu,
             writer, 
             snapshot=-1, dataset_name=dataset_name, uid=current_time)  # Set `snapshot=N` to save snapshots every N epochs.

(123.52 sec/epoch) Warmup epoch: 0, Loss: 2.2404, lr: 2.500e-05
NLL: 2.0406, Cutoffs: 0.0 %, 90.0 %, Accs: 25.340 %, 0.000 %, Uncs: 0.000 %, 100.000 %, IoUs: 13.140 %, 0.000 %, Freqs: 100.000 %, 0.000 %, Top-5: 80.180 %, Brier: 0.843, ECE: 6.321 %, ECE±: -6.321 %
(122.99 sec/epoch) Warmup epoch: 1, Loss: 2.1964, lr: 5.000e-05
NLL: 1.9675, Cutoffs: 0.0 %, 90.0 %, Accs: 27.670 %, 0.000 %, Uncs: 0.000 %, 100.000 %, IoUs: 14.207 %, 0.000 %, Freqs: 100.000 %, 0.000 %, Top-5: 82.790 %, Brier: 0.824, ECE: 5.975 %, ECE±: -5.975 %
(123.55 sec/epoch) Warmup epoch: 2, Loss: 2.1604, lr: 7.500e-05
NLL: 1.8647, Cutoffs: 0.0 %, 90.0 %, Accs: 32.510 %, 0.000 %, Uncs: 0.000 %, 100.000 %, IoUs: 18.140 %, 0.000 %, Freqs: 100.000 %, 0.000 %, Top-5: 85.630 %, Brier: 0.797, ECE: 7.864 %, ECE±: -7.864 %
(123.42 sec/epoch) Warmup epoch: 3, Loss: 2.1186, lr: 1.000e-04
NLL: 1.7607, Cutoffs: 0.0 %, 90.0 %, Accs: 39.830 %, 0.000 %, Uncs: 0.000 %, 100.000 %, IoUs: 24.252 %, 0.000 %, Freqs: 100.000 %, 0.000 %, Top-

Save the model:

In [13]:
models.save(model, dataset_name, current_time, optimizer=optimizer)

## Test

In [14]:
gpu = torch.cuda.is_available()

model = model.cuda() if gpu else model.cpu()
metrics_list = []
for n_ff in [1]:
    print("N: %s, " % n_ff, end="")
    *metrics, cal_diag = tests.test(model, n_ff, dataset_test, verbose=False, gpu=gpu)
    metrics_list.append([n_ff, *metrics])

leaderboard_path = os.path.join("leaderboard", "logs", dataset_name, model.name)
Path(leaderboard_path).mkdir(parents=True, exist_ok=True)
metrics_dir = os.path.join(leaderboard_path, "%s_%s_%s.csv" % (dataset_name, model.name, current_time))
tests.save_metrics(metrics_dir, metrics_list)

N: 1, NLL: 1.5733, Cutoffs: 0.0 %, 90.0 %, Accs: 47.420 %, 0.000 %, Uncs: 0.000 %, 100.000 %, IoUs: 30.278 %, 0.000 %, Freqs: 100.000 %, 0.000 %, Top-5: 92.050 %, Brier: 0.695, ECE: 15.074 %, ECE±: -15.074 %
