# Vit for classification in CIFAR-10-4x

In [1]:
%load_ext autoreload
%autoreload 2
!nvidia-smi
!which python

Sat Oct  5 10:10:44 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.86.10              Driver Version: 535.86.10    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla V100-SXM2-32GB           On  | 00000004:04:00.0 Off |                    0 |
| N/A   45C    P0              42W / 300W |      0MiB / 32768MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2-32GB           On  | 00000004:05:00.0 Off |  

In [2]:
try:
    from ViT.train import train
    from ViT.utils import load_cifar10_4x
    from ViT.model import *
except:
    from train import train
    from utils import load_cifar10_4x
    from model import *

from torch.utils.data import DataLoader

import torch
import torch.nn as nn
import torch.nn.functional as F

import datetime

train_loader, val_loader = load_cifar10_4x(dir="/nobackup/users/dcao2028/data", train_batch_size=128, valid_batch_size=256, augment=True)

import os
if not os.path.exists("ViT/log"):
    os.makedirs("ViT/log")

def timestr():
    now = datetime.datetime.now()
    return now.strftime("%Y%m%d_%H%M%S")

def get_outdir(time_str):
    outdir = f"ViT/log/{time_str}.out"
    return outdir

In [3]:
# Here is the hyperparameters

epochs = 50
patch_size = 16
embed_dim = 512
n_layers = 4
heads = 8
attn_dim = 1024
mlp_dim = None # default to 4*embed_dim
pool = 'cls'
dropout = 0.0
mlp_dropout = 0.0

model = ViT(image_size=128, patch_size=patch_size, num_classes=10, embed_dim=embed_dim, n_layers=n_layers, heads=heads, attn_dim=attn_dim, mlp_dim=mlp_dim, pool=pool, dropout=dropout, mlp_dropout=mlp_dropout)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), weight_decay=0)

time_str = timestr()

print(f"Time string: {time_str}")

# print the model and the number of parameters
# print(model.transformer)
print(f'The model has {sum(p.numel() for p in model.parameters() if p.requires_grad):,} trainable parameters')

train(epochs=epochs, model=model, optimizer=optimizer, criterion=nn.CrossEntropyLoss(), 
      train_loader=train_loader, val_loader=val_loader, outdir=get_outdir(time_str))\

# save model
torch.save(model.state_dict(), f"ViT/models/{time_str}.pth")

Time string: 20241005_101047
The model has 17,221,130 trainable parameters


Epoch 1/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Using 4 GPUs


Epoch 2/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 1/50, Train Loss: 1.9129, Train Accuracy: 28.00%, Val Loss: 1.6747, Val Accuracy: 39.81%


Epoch 3/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 2/50, Train Loss: 1.5247, Train Accuracy: 44.75%, Val Loss: 1.4321, Val Accuracy: 48.48%


Epoch 4/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 3/50, Train Loss: 1.3614, Train Accuracy: 50.93%, Val Loss: 1.3088, Val Accuracy: 52.86%


Epoch 5/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 4/50, Train Loss: 1.2702, Train Accuracy: 54.17%, Val Loss: 1.2431, Val Accuracy: 55.58%


Epoch 6/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 5/50, Train Loss: 1.2038, Train Accuracy: 56.91%, Val Loss: 1.2065, Val Accuracy: 57.20%


Epoch 7/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 6/50, Train Loss: 1.1576, Train Accuracy: 58.13%, Val Loss: 1.1520, Val Accuracy: 59.12%


Epoch 8/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 7/50, Train Loss: 1.1190, Train Accuracy: 59.80%, Val Loss: 1.1013, Val Accuracy: 61.10%


Epoch 9/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 8/50, Train Loss: 1.0709, Train Accuracy: 61.30%, Val Loss: 1.0888, Val Accuracy: 61.39%


Epoch 10/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 9/50, Train Loss: 1.0282, Train Accuracy: 63.13%, Val Loss: 1.0648, Val Accuracy: 62.20%


Epoch 11/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 10/50, Train Loss: 0.9989, Train Accuracy: 64.39%, Val Loss: 1.0187, Val Accuracy: 64.11%


Epoch 12/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 11/50, Train Loss: 0.9590, Train Accuracy: 65.84%, Val Loss: 1.0016, Val Accuracy: 65.06%


Epoch 13/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 12/50, Train Loss: 0.9294, Train Accuracy: 66.96%, Val Loss: 0.9730, Val Accuracy: 65.88%


Epoch 14/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 13/50, Train Loss: 0.9069, Train Accuracy: 67.51%, Val Loss: 0.9739, Val Accuracy: 65.48%


Epoch 15/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 14/50, Train Loss: 0.8715, Train Accuracy: 68.91%, Val Loss: 0.9635, Val Accuracy: 65.72%


Epoch 16/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 15/50, Train Loss: 0.8521, Train Accuracy: 69.70%, Val Loss: 0.9226, Val Accuracy: 67.99%


Epoch 17/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 16/50, Train Loss: 0.8226, Train Accuracy: 70.74%, Val Loss: 0.8970, Val Accuracy: 68.63%


Epoch 18/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 17/50, Train Loss: 0.7960, Train Accuracy: 71.62%, Val Loss: 0.8931, Val Accuracy: 69.50%


Epoch 19/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 18/50, Train Loss: 0.7760, Train Accuracy: 72.44%, Val Loss: 0.9062, Val Accuracy: 68.73%


Epoch 20/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 19/50, Train Loss: 0.7545, Train Accuracy: 72.95%, Val Loss: 0.8809, Val Accuracy: 69.59%


Epoch 21/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 20/50, Train Loss: 0.7323, Train Accuracy: 73.91%, Val Loss: 0.8965, Val Accuracy: 69.09%


Epoch 22/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 21/50, Train Loss: 0.7118, Train Accuracy: 74.60%, Val Loss: 0.8744, Val Accuracy: 70.26%


Epoch 23/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 22/50, Train Loss: 0.6840, Train Accuracy: 75.43%, Val Loss: 0.8566, Val Accuracy: 70.73%


Epoch 24/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 23/50, Train Loss: 0.6643, Train Accuracy: 76.45%, Val Loss: 0.8516, Val Accuracy: 71.43%


Epoch 25/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 24/50, Train Loss: 0.6464, Train Accuracy: 76.78%, Val Loss: 0.8318, Val Accuracy: 71.65%


Epoch 26/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 25/50, Train Loss: 0.6258, Train Accuracy: 77.52%, Val Loss: 0.8236, Val Accuracy: 72.08%


Epoch 27/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 26/50, Train Loss: 0.5959, Train Accuracy: 78.75%, Val Loss: 0.8426, Val Accuracy: 72.23%


Epoch 28/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 27/50, Train Loss: 0.5815, Train Accuracy: 79.08%, Val Loss: 0.8320, Val Accuracy: 72.25%


Epoch 29/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 28/50, Train Loss: 0.5632, Train Accuracy: 79.89%, Val Loss: 0.8509, Val Accuracy: 71.80%


Epoch 30/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 29/50, Train Loss: 0.5417, Train Accuracy: 80.65%, Val Loss: 0.8653, Val Accuracy: 71.84%


Epoch 31/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 30/50, Train Loss: 0.5216, Train Accuracy: 81.31%, Val Loss: 0.8572, Val Accuracy: 71.94%


Epoch 32/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 31/50, Train Loss: 0.5056, Train Accuracy: 81.92%, Val Loss: 0.8222, Val Accuracy: 73.19%


Epoch 33/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 32/50, Train Loss: 0.4790, Train Accuracy: 82.72%, Val Loss: 0.8340, Val Accuracy: 73.27%


Epoch 34/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 33/50, Train Loss: 0.4649, Train Accuracy: 83.29%, Val Loss: 0.8549, Val Accuracy: 72.91%


Epoch 35/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 34/50, Train Loss: 0.4499, Train Accuracy: 83.84%, Val Loss: 0.8624, Val Accuracy: 72.79%


Epoch 36/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 35/50, Train Loss: 0.4364, Train Accuracy: 84.24%, Val Loss: 0.8325, Val Accuracy: 73.70%


Epoch 37/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 36/50, Train Loss: 0.4119, Train Accuracy: 85.13%, Val Loss: 0.8730, Val Accuracy: 73.22%


Epoch 38/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 37/50, Train Loss: 0.3946, Train Accuracy: 85.80%, Val Loss: 0.8571, Val Accuracy: 74.26%


Epoch 39/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 38/50, Train Loss: 0.3843, Train Accuracy: 86.26%, Val Loss: 0.8583, Val Accuracy: 73.99%


Epoch 40/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 39/50, Train Loss: 0.3653, Train Accuracy: 86.79%, Val Loss: 0.8781, Val Accuracy: 73.35%


Epoch 41/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 40/50, Train Loss: 0.3485, Train Accuracy: 87.52%, Val Loss: 0.8901, Val Accuracy: 74.01%


Epoch 42/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 41/50, Train Loss: 0.3334, Train Accuracy: 87.99%, Val Loss: 0.9053, Val Accuracy: 73.64%


Epoch 43/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 42/50, Train Loss: 0.3221, Train Accuracy: 88.50%, Val Loss: 0.9161, Val Accuracy: 73.27%


Epoch 44/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 43/50, Train Loss: 0.3057, Train Accuracy: 88.82%, Val Loss: 0.9151, Val Accuracy: 73.89%


Epoch 45/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 44/50, Train Loss: 0.2888, Train Accuracy: 89.55%, Val Loss: 0.9275, Val Accuracy: 73.74%


Epoch 46/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 45/50, Train Loss: 0.2786, Train Accuracy: 90.00%, Val Loss: 0.9357, Val Accuracy: 74.33%


Epoch 47/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 46/50, Train Loss: 0.2597, Train Accuracy: 90.62%, Val Loss: 0.9842, Val Accuracy: 73.18%


Epoch 48/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 47/50, Train Loss: 0.2528, Train Accuracy: 90.97%, Val Loss: 0.9281, Val Accuracy: 74.59%


Epoch 49/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 48/50, Train Loss: 0.2420, Train Accuracy: 91.33%, Val Loss: 0.9725, Val Accuracy: 74.29%


Epoch 50/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 49/50, Train Loss: 0.2303, Train Accuracy: 91.63%, Val Loss: 0.9825, Val Accuracy: 73.67%


                                                                                                                        

Epoch 50/50, Train Loss: 0.2202, Train Accuracy: 91.97%, Val Loss: 0.9788, Val Accuracy: 74.24%
