# Vit for classification in CIFAR-10-4x

In [1]:
%load_ext autoreload
%autoreload 2
!nvidia-smi
!which python

Sat Oct  5 10:10:44 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.86.10              Driver Version: 535.86.10    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla V100-SXM2-32GB           On  | 00000004:04:00.0 Off |                    0 |
| N/A   45C    P0              42W / 300W |      0MiB / 32768MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2-32GB           On  | 00000004:05:00.0 Off |  

In [2]:
try:
    from ViT.train import train
    from ViT.utils import load_cifar10_4x
    from ViT.model import *
except:
    from train import train
    from utils import load_cifar10_4x
    from model import *

from torch.utils.data import DataLoader

import torch
import torch.nn as nn
import torch.nn.functional as F

import datetime

train_loader, val_loader = load_cifar10_4x(dir="/nobackup/users/dcao2028/data", train_batch_size=128, valid_batch_size=256, augment=True)

import os
if not os.path.exists("ViT/log"):
    os.makedirs("ViT/log")

def timestr():
    now = datetime.datetime.now()
    return now.strftime("%Y%m%d_%H%M%S")

def get_outdir(time_str):
    outdir = f"ViT/log/{time_str}.out"
    return outdir

In [5]:
# Here is the hyperparameters

epochs = 50
patch_size = 16
embed_dim = 512
n_layers = 6
heads = 8
attn_dim = 512
mlp_dim = None # default to 4*embed_dim
dropout = 0.0
mlp_dropout = 0.0
embedding = 'sinousoidal'

model = ViT(image_size=128, patch_size=patch_size, num_classes=10, embed_dim=embed_dim, n_layers=n_layers, heads=heads, attn_dim=attn_dim, mlp_dim=mlp_dim, dropout=dropout, mlp_dropout=mlp_dropout, embedding=embedding)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), weight_decay=0)

time_str = timestr()

print(f"Time string: {time_str}")

# print the model and the number of parameters
# print(model.transformer)
print(f'The model has {sum(p.numel() for p in model.parameters() if p.requires_grad):,} trainable parameters')

train(epochs=epochs, model=model, optimizer=optimizer, criterion=nn.CrossEntropyLoss(), 
      train_loader=train_loader, val_loader=val_loader, outdir=get_outdir(time_str))\

# save model
torch.save(model.state_dict(), f"ViT/models/{time_str}.pth")
print(f"Model saved to ViT/models/{time_str}.pth")



Time string: 20241005_110731
The model has 19,323,402 trainable parameters
Using 4 GPUs




Epoch 1/50, Train Loss: 1.8861, Train Accuracy: 29.14%, Val Loss: 1.6295, Val Accuracy: 41.44%




Epoch 2/50, Train Loss: 1.5032, Train Accuracy: 45.22%, Val Loss: 1.3885, Val Accuracy: 49.61%




Epoch 3/50, Train Loss: 1.3247, Train Accuracy: 52.30%, Val Loss: 1.2715, Val Accuracy: 54.01%




Epoch 4/50, Train Loss: 1.2243, Train Accuracy: 56.12%, Val Loss: 1.1956, Val Accuracy: 57.18%




Epoch 5/50, Train Loss: 1.1528, Train Accuracy: 58.64%, Val Loss: 1.1530, Val Accuracy: 58.59%




Epoch 6/50, Train Loss: 1.0968, Train Accuracy: 60.86%, Val Loss: 1.0911, Val Accuracy: 61.10%




Epoch 7/50, Train Loss: 1.0526, Train Accuracy: 62.35%, Val Loss: 1.0583, Val Accuracy: 62.74%




Epoch 8/50, Train Loss: 1.0094, Train Accuracy: 64.05%, Val Loss: 1.0169, Val Accuracy: 64.33%




Epoch 9/50, Train Loss: 0.9661, Train Accuracy: 65.47%, Val Loss: 1.0164, Val Accuracy: 63.88%




Epoch 10/50, Train Loss: 0.9422, Train Accuracy: 66.45%, Val Loss: 1.0018, Val Accuracy: 64.53%




Epoch 11/50, Train Loss: 0.8990, Train Accuracy: 68.12%, Val Loss: 0.9553, Val Accuracy: 66.30%




Epoch 12/50, Train Loss: 0.8671, Train Accuracy: 69.22%, Val Loss: 0.9640, Val Accuracy: 66.92%




Epoch 13/50, Train Loss: 0.8399, Train Accuracy: 70.14%, Val Loss: 0.9091, Val Accuracy: 68.50%




Epoch 14/50, Train Loss: 0.8128, Train Accuracy: 71.31%, Val Loss: 0.9116, Val Accuracy: 67.56%




Epoch 15/50, Train Loss: 0.7888, Train Accuracy: 72.00%, Val Loss: 0.9056, Val Accuracy: 68.48%




Epoch 16/50, Train Loss: 0.7598, Train Accuracy: 73.20%, Val Loss: 0.8657, Val Accuracy: 69.89%




Epoch 17/50, Train Loss: 0.7375, Train Accuracy: 73.72%, Val Loss: 0.8655, Val Accuracy: 70.14%




Epoch 18/50, Train Loss: 0.7115, Train Accuracy: 74.72%, Val Loss: 0.8885, Val Accuracy: 69.57%




Epoch 19/50, Train Loss: 0.6860, Train Accuracy: 75.45%, Val Loss: 0.8529, Val Accuracy: 70.48%




Epoch 20/50, Train Loss: 0.6617, Train Accuracy: 76.46%, Val Loss: 0.8409, Val Accuracy: 71.40%




Epoch 21/50, Train Loss: 0.6381, Train Accuracy: 77.21%, Val Loss: 0.8359, Val Accuracy: 71.82%




Epoch 22/50, Train Loss: 0.6141, Train Accuracy: 77.97%, Val Loss: 0.8641, Val Accuracy: 71.26%




Epoch 23/50, Train Loss: 0.5893, Train Accuracy: 78.84%, Val Loss: 0.8345, Val Accuracy: 72.48%




Epoch 24/50, Train Loss: 0.5644, Train Accuracy: 79.84%, Val Loss: 0.8434, Val Accuracy: 72.68%




Epoch 25/50, Train Loss: 0.5372, Train Accuracy: 80.81%, Val Loss: 0.8568, Val Accuracy: 72.22%




Epoch 26/50, Train Loss: 0.5178, Train Accuracy: 81.50%, Val Loss: 0.8510, Val Accuracy: 72.20%




Epoch 27/50, Train Loss: 0.4941, Train Accuracy: 82.18%, Val Loss: 0.8342, Val Accuracy: 72.57%




Epoch 28/50, Train Loss: 0.4729, Train Accuracy: 83.10%, Val Loss: 0.8428, Val Accuracy: 72.97%




Epoch 29/50, Train Loss: 0.4442, Train Accuracy: 84.05%, Val Loss: 0.8681, Val Accuracy: 72.71%




Epoch 30/50, Train Loss: 0.4249, Train Accuracy: 84.69%, Val Loss: 0.8486, Val Accuracy: 73.18%




Epoch 31/50, Train Loss: 0.4082, Train Accuracy: 85.28%, Val Loss: 0.8687, Val Accuracy: 73.15%




Epoch 32/50, Train Loss: 0.3839, Train Accuracy: 86.11%, Val Loss: 0.8616, Val Accuracy: 74.20%




Epoch 33/50, Train Loss: 0.3640, Train Accuracy: 86.83%, Val Loss: 0.8502, Val Accuracy: 74.36%




Epoch 34/50, Train Loss: 0.3483, Train Accuracy: 87.44%, Val Loss: 0.8962, Val Accuracy: 73.09%




Epoch 35/50, Train Loss: 0.3315, Train Accuracy: 88.02%, Val Loss: 0.8989, Val Accuracy: 73.83%




Epoch 36/50, Train Loss: 0.3067, Train Accuracy: 88.83%, Val Loss: 0.9408, Val Accuracy: 73.36%




Epoch 37/50, Train Loss: 0.2914, Train Accuracy: 89.55%, Val Loss: 0.9407, Val Accuracy: 73.97%




Epoch 38/50, Train Loss: 0.2775, Train Accuracy: 90.09%, Val Loss: 0.9594, Val Accuracy: 73.41%




Epoch 39/50, Train Loss: 0.2620, Train Accuracy: 90.67%, Val Loss: 0.9468, Val Accuracy: 74.48%




Epoch 40/50, Train Loss: 0.2484, Train Accuracy: 91.11%, Val Loss: 0.9583, Val Accuracy: 74.04%




Epoch 41/50, Train Loss: 0.2294, Train Accuracy: 91.72%, Val Loss: 0.9662, Val Accuracy: 74.09%




Epoch 42/50, Train Loss: 0.2232, Train Accuracy: 91.98%, Val Loss: 1.0026, Val Accuracy: 74.44%




Epoch 43/50, Train Loss: 0.2039, Train Accuracy: 92.60%, Val Loss: 1.0539, Val Accuracy: 74.32%




Epoch 44/50, Train Loss: 0.1874, Train Accuracy: 93.32%, Val Loss: 1.0507, Val Accuracy: 74.22%




Epoch 45/50, Train Loss: 0.1872, Train Accuracy: 93.28%, Val Loss: 1.0320, Val Accuracy: 74.21%




Epoch 46/50, Train Loss: 0.1779, Train Accuracy: 93.72%, Val Loss: 1.0601, Val Accuracy: 74.32%




Epoch 47/50, Train Loss: 0.1634, Train Accuracy: 94.17%, Val Loss: 1.0584, Val Accuracy: 73.86%




Epoch 48/50, Train Loss: 0.1649, Train Accuracy: 94.08%, Val Loss: 1.0558, Val Accuracy: 74.87%




Epoch 49/50, Train Loss: 0.1458, Train Accuracy: 94.81%, Val Loss: 1.1389, Val Accuracy: 74.20%




Epoch 50/50, Train Loss: 0.1506, Train Accuracy: 94.69%, Val Loss: 1.1255, Val Accuracy: 74.23%
