# Classification on CIFAR and ImageNet

In [1]:
import sys

# check whether run in Colab
root = "."
if "google.colab" in sys.modules:
    print("Running in Colab.")
    !pip3 install matplotlib
    !pip3 install einops==0.3.0
    !pip3 install timm==0.4.9
    !git clone https://github.com/xxxnell/how-do-vits-work.git
    root = "./how-do-vits-work"
    sys.path.append(root)

Running in Colab.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting einops==0.3.0
  Downloading einops-0.3.0-py2.py3-none-any.whl (25 kB)
Installing collected packages: einops
Successfully installed einops-0.3.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm==0.4.9
  Downloading timm-0.4.9-py3-none-any.whl (346 kB)
[K     |████████████████████████████████| 346 kB 24.5 MB/s 
Installing collected packages: timm
Successfully installed timm-0.4.9
Cloning into 'how-do-vits-work'...
remote: Enumerating objects: 714, done.[K
remote: Counting objects: 100% (714/714), done.[K
remote: Compressing objects: 100% (355/355), done.[K
remote: Total 714 (delta 481), reused 588 (delta 355), pack-reused 0[K
Receiving objects: 100% (714/714), 17.29 MiB | 35.56 MiB/s, done.
Reso

In [2]:
import os
import time
import yaml
import copy
from pathlib import Path
import datetime

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import models
import ops.trains as trains
import ops.tests as tests
import ops.datasets as datasets
import ops.schedulers as schedulers

In [3]:
# config_path = "%s/configs/cifar10_vit.yaml" % root
config_path = "%s/configs/cifar100_vit.yaml" % root
# config_path = "%s/configs/imagenet_vit.yaml" % root

with open(config_path) as f:
    args = yaml.load(f)
    print(args)

{'dataset': {'name': 'cifar100', 'root': '../data', 'mean': [0.5071, 0.4867, 0.4408], 'std': [0.2675, 0.2565, 0.2761], 'padding': 4, 'color_jitter': 0.0, 'auto_augment': 'rand-m9-n2-mstd1.0', 're_prob': 0.0}, 'train': {'warmup_epochs': 5, 'epochs': 300, 'batch_size': 96, 'max_norm': 5, 'smoothing': 0.1, 'mixup': {'num_classes': 100, 'mixup_alpha': 1.0, 'cutmix_alpha': 0.8, 'prob': 1.0}}, 'val': {'batch_size': 256, 'n_ff': 1}, 'model': {'stem': False, 'block': {'image_size': 32, 'patch_size': 2, 'sd': 0.1}}, 'optim': {'name': 'AdamW', 'lr': 0.000125, 'weight_decay': 0.05, 'scheduler': {'name': 'CosineAnnealingLR', 'T_max': 300, 'eta_min': 0}}, 'env': {}}


In [4]:
dataset_args = copy.deepcopy(args).get("dataset")
train_args = copy.deepcopy(args).get("train")
val_args = copy.deepcopy(args).get("val")
model_args = copy.deepcopy(args).get("model")
optim_args = copy.deepcopy(args).get("optim")
env_args = copy.deepcopy(args).get("env")

In [5]:
dataset_train, dataset_test = datasets.get_dataset(**dataset_args, download=True)
dataset_name = dataset_args["name"]
num_classes = len(dataset_train.classes)

dataset_train = DataLoader(dataset_train, 
                           shuffle=True, 
                           num_workers=train_args.get("num_workers", 4), 
                           batch_size=train_args.get("batch_size", 128))
dataset_test = DataLoader(dataset_test, 
                          num_workers=val_args.get("num_workers", 4), 
                          batch_size=val_args.get("batch_size", 128))

print("Train: %s, Test: %s, Classes: %s" % (
    len(dataset_train.dataset), 
    len(dataset_test.dataset), 
    num_classes
))

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ../data/cifar-100-python.tar.gz


  "Argument interpolation should be of type InterpolationMode instead of int. "


  0%|          | 0/169001437 [00:00<?, ?it/s]

Extracting ../data/cifar-100-python.tar.gz to ../data
Files already downloaded and verified
Train: 50000, Test: 10000, Classes: 100


  cpuset_checked))


## Model

Use provided models:

In [6]:
# ResNet
# name = "resnet_dnn_50"
# name = "resnet_dnn_101"

# ViT
name = "vit_ti"
# name = "vit_s"

vit_kwargs = {  # for CIFAR
    "image_size": 32, 
    "patch_size": 2,
}

model = models.get_model(name, num_classes=num_classes, 
                         stem=model_args.get("stem", False), **vit_kwargs)
# models.load(model, dataset_name, uid=current_time)

model: vit_ti , params: 5.4M, output: [3, 100]


Or use `timm`:

In [7]:
import timm

model = timm.models.vision_transformer.VisionTransformer(
    img_size=32, patch_size=2, num_classes=num_classes,  # for CIFAR
    embed_dim=192, depth=12, num_heads=3, qkv_bias=False,  # ViT-Ti
)
model.name = "vit_ti"
models.stats(model)

model: vit_ti , params: 5.4M


Parallelize the given `moodel` by splitting the input:

In [8]:
name = model.name
model = nn.DataParallel(model)
model.name = name

## Train

Define a TensorBoard writer:

In [9]:
current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
log_dir = os.path.join("runs", dataset_name, model.name, current_time)
writer = SummaryWriter(log_dir)

with open("%s/config.yaml" % log_dir, "w") as f:
    yaml.dump(args, f)
with open("%s/model.log" % log_dir, "w") as f:
    f.write(repr(model))

print("Create TensorBoard log dir: ", log_dir)

Create TensorBoard log dir:  runs/cifar100/vit_ti/20220624_142839


Train the model:

In [10]:
gpu = torch.cuda.is_available()
optimizer, train_scheduler = trains.get_optimizer(model, **optim_args)
warmup_scheduler = schedulers.WarmupScheduler(optimizer, len(dataset_train) * train_args.get("warmup_epochs", 0))

trains.train(model, optimizer,
             dataset_train, dataset_test,
             train_scheduler, warmup_scheduler,
             train_args, val_args, gpu,
             writer, 
             snapshot=-1, dataset_name=dataset_name, uid=current_time)  # Set `snapshot=N` to save snapshots every N epochs.

  cpuset_checked))


RuntimeError: ignored

Save the model:

In [None]:
models.save(model, dataset_name, current_time, optimizer=optimizer)

## Test

In [None]:
gpu = torch.cuda.is_available()

model = model.cuda() if gpu else model.cpu()
metrics_list = []
for n_ff in [1]:
    print("N: %s, " % n_ff, end="")
    *metrics, cal_diag = tests.test(model, n_ff, dataset_test, verbose=False, gpu=gpu)
    metrics_list.append([n_ff, *metrics])

leaderboard_path = os.path.join("leaderboard", "logs", dataset_name, model.name)
Path(leaderboard_path).mkdir(parents=True, exist_ok=True)
metrics_dir = os.path.join(leaderboard_path, "%s_%s_%s.csv" % (dataset_name, model.name, current_time))
tests.save_metrics(metrics_dir, metrics_list)