In [1]:
from models.image_classification.vanilla_vit import ViT
from models.image_classification.swin_transformer import SwinTransformer
from models.image_classification.deit import DeiT

# Loading Data
from utils.load_data import get_train_test_loaders
from utils.args import get_args

# PyTorch
import torch
import torch.nn as nn

# Visualization
from utils.visualization import plot_patches
from utils.visualization import plot_attention_maps
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
args = get_args('deit_tinydistil_cifar100')

train_loader, val_loader, test_loader = get_train_test_loaders(dataset_name="cifar100", batch_size=256,
                                                               val_split=0.2, num_workers=4)

deit = DeiT(image_size=args["image_size"], patch_size=args["patch_size"], num_layers=args["num_layers"],
            num_heads=args["num_heads"], hidden_dim=args["embed_dim"], mlp_ratio=args["mlp_ratio"],
            dropout=args["dropout"], attention_dropout=args["attention_dropout"],
            num_classes=args["num_classes"])

Files already downloaded and verified
Files already downloaded and verified


# Swin Transformer

In [3]:
train_loader, val_loader, test_loader = get_train_test_loaders(dataset_name="cifar100", batch_size=256, val_split=0.2, num_workers=4)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
args = get_args("swin_tiny_cifar100")
swin_tiny = SwinTransformer(patch_size=args["patch_size"], embed_dim=args["embed_dim"], depths=args["depths"], 
                            num_heads=args["num_heads"], window_size=args["window_size"], mlp_ratio=args["mlp_ratio"],
                            dropout=args["dropout"], attention_dropout=args["attention_dropout"], stochastic_depth_prob=args["stochastic_depth_prob"], num_classes=args["num_classes"])

# swin_tiny.to("cpu")
# for images, labels in train_loader:
#     images = images.to("cpu")
#     labels = labels.to("cpu")
#     outputs = swin_tiny(images)
#     break

swin_tiny.to("cuda")
metrics = swin_tiny.train_model(swin_tiny, train_loader, test_loader, 50, val_loader)

Epoch 1/50: 100%|██████████| 157/157 [00:09<00:00, 15.87batch/s, Train Loss=4.14, Train Acc=0.0669]


Epoch 1/50 - Train Loss: 4.1374, Train Acc: 0.0669, Val Loss: 3.8700868843078613, Val Acc: 0.1007, Test Loss: 3.9174, Test Acc: 0.0910


Epoch 2/50: 100%|██████████| 157/157 [00:09<00:00, 16.13batch/s, Train Loss=3.75, Train Acc=0.119]


Epoch 2/50 - Train Loss: 3.7463, Train Acc: 0.1191, Val Loss: 3.590481332397461, Val Acc: 0.1499, Test Loss: 3.5889, Test Acc: 0.1504


Epoch 3/50: 100%|██████████| 157/157 [00:09<00:00, 15.82batch/s, Train Loss=3.5, Train Acc=0.165] 


Epoch 3/50 - Train Loss: 3.4976, Train Acc: 0.1653, Val Loss: 3.357745849990845, Val Acc: 0.1892, Test Loss: 3.3568, Test Acc: 0.1891


Epoch 4/50: 100%|██████████| 157/157 [00:09<00:00, 15.92batch/s, Train Loss=3.28, Train Acc=0.202]


Epoch 4/50 - Train Loss: 3.2772, Train Acc: 0.2017, Val Loss: 3.163640968322754, Val Acc: 0.2237, Test Loss: 3.1311, Test Acc: 0.2349


Epoch 5/50: 100%|██████████| 157/157 [00:09<00:00, 15.88batch/s, Train Loss=3.09, Train Acc=0.235]


Epoch 5/50 - Train Loss: 3.0875, Train Acc: 0.2352, Val Loss: 3.0286095443725585, Val Acc: 0.242, Test Loss: 2.9860, Test Acc: 0.2577


Epoch 6/50:  13%|█▎        | 21/157 [00:01<00:08, 16.75batch/s, Train Loss=2.99, Train Acc=0.256]

# Vanilla ViT

In [None]:
train_loader, val_loader, test_loader = get_train_test_loaders(dataset_name="cifar100", batch_size=256, val_split=0.2, num_workers=4)

In [None]:
args = get_args("vit_tiny_cifar100")
vanilla_vit = ViT(image_size=args["image_size"], patch_size=args["patch_size"], num_layers=args["num_layers"], 
                  num_heads=args["num_heads"], hidden_dim=args["hidden_dim"], mlp_dim=args["mlp_dim"], 
                  dropout=args["dropout"], attention_dropout=args["attention_dropout"], num_classes=args["num_classes"])

# vanilla_vit.to("cpu")
# for images, labels in train_loader:
#     images = images.to("cpu")
#     labels = labels.to("cpu")
#     output = vanilla_vit(images)
#     break

vanilla_vit.to("cuda")
metrics = vanilla_vit.train_model(vanilla_vit, train_loader, test_loader, 50, val_loader)