# Vit for classification in CIFAR-10-4x

In [1]:
%load_ext autoreload
%autoreload 2
!nvidia-smi
!which python

Sat Oct  5 16:26:48 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.86.10              Driver Version: 535.86.10    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla V100-SXM2-32GB           On  | 00000004:04:00.0 Off |                    0 |
| N/A   41C    P0              40W / 300W |      0MiB / 32768MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2-32GB           On  | 00000004:05:00.0 Off |  

In [2]:
try:
    from ViT.train import train
    from ViT.utils import load_cifar10_4x
    from ViT.model import *
except:
    from train import train
    from utils import load_cifar10_4x
    from model import *

from torch.utils.data import DataLoader

import torch
import torch.nn as nn
import torch.nn.functional as F

import datetime

train_loader, val_loader = load_cifar10_4x(dir="/nobackup/users/dcao2028/data", train_batch_size=128, valid_batch_size=256, augment=True)

import os
if not os.path.exists("ViT/log"):
    os.makedirs("ViT/log")

def timestr():
    now = datetime.datetime.now()
    return now.strftime("%Y%m%d_%H%M%S")

def get_outdir(time_str):
    outdir = f"ViT/log/{time_str}.out"
    return outdir

In [3]:
# Here is the hyperparameters

epochs = 50
patch_size = 16
embed_dim = 512
n_layers = 6
heads = 8
attn_dim = 1024
mlp_dim = None # default to 4*embed_dim
dropout = 0.2
mlp_dropout = 0.2
embedding = 'sinousoidal'

model = ViT(image_size=128, patch_size=patch_size, num_classes=10, embed_dim=embed_dim, n_layers=n_layers, heads=heads, attn_dim=attn_dim, mlp_dim=mlp_dim, dropout=dropout, mlp_dropout=mlp_dropout, embedding=embedding)

# last_time_str = "20241003_124314"

# # load model
# model.load_state_dict(torch.load(f"ViT/models/{last_time_str}.pth"))

# print(f"models loaded from ViT/models/{last_time_str}.pth")

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), weight_decay=0)

time_str = timestr()

print(f"Time string: {time_str}")

# print the model and the number of parameters
# print(model.transformer)
print(f'The model has {sum(p.numel() for p in model.parameters() if p.requires_grad):,} trainable parameters')

train(epochs=epochs, model=model, optimizer=optimizer, criterion=nn.CrossEntropyLoss(), 
      train_loader=train_loader, val_loader=val_loader, outdir=get_outdir(time_str))\

# save model
torch.save(model.state_dict(), f"ViT/models/{time_str}.pth")
print(f"Model saved to ViT/models/{time_str}.pth")

Time string: 20241005_151849
The model has 25,581,578 trainable parameters


Epoch 1/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Using 4 GPUs


Epoch 2/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 1/50, Train Loss: 1.8629, Train Accuracy: 29.87%, Val Loss: 1.6260, Val Accuracy: 39.22%


Epoch 3/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 2/50, Train Loss: 1.5088, Train Accuracy: 44.58%, Val Loss: 1.4679, Val Accuracy: 47.35%


Epoch 4/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 3/50, Train Loss: 1.3289, Train Accuracy: 51.85%, Val Loss: 1.2509, Val Accuracy: 55.08%


Epoch 5/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 4/50, Train Loss: 1.2207, Train Accuracy: 56.10%, Val Loss: 1.1800, Val Accuracy: 57.13%


Epoch 6/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 5/50, Train Loss: 1.1544, Train Accuracy: 58.54%, Val Loss: 1.1720, Val Accuracy: 58.59%


Epoch 7/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 6/50, Train Loss: 1.0924, Train Accuracy: 60.57%, Val Loss: 1.0930, Val Accuracy: 61.72%


Epoch 8/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 7/50, Train Loss: 1.0470, Train Accuracy: 62.48%, Val Loss: 1.0515, Val Accuracy: 63.45%


Epoch 9/50:   0%|                                                                               | 0/313 [00:00<?, ?it/s]

Epoch 8/50, Train Loss: 1.0059, Train Accuracy: 64.11%, Val Loss: 0.9989, Val Accuracy: 65.17%


Epoch 10/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 9/50, Train Loss: 0.9644, Train Accuracy: 65.45%, Val Loss: 0.9863, Val Accuracy: 65.29%


Epoch 11/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 10/50, Train Loss: 0.9282, Train Accuracy: 66.98%, Val Loss: 0.9456, Val Accuracy: 67.01%


Epoch 12/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 11/50, Train Loss: 0.8930, Train Accuracy: 68.53%, Val Loss: 0.9541, Val Accuracy: 67.20%


Epoch 13/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 12/50, Train Loss: 0.8652, Train Accuracy: 69.48%, Val Loss: 0.9113, Val Accuracy: 68.00%


Epoch 14/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 13/50, Train Loss: 0.8379, Train Accuracy: 70.22%, Val Loss: 0.8889, Val Accuracy: 69.33%


Epoch 15/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 14/50, Train Loss: 0.8087, Train Accuracy: 71.19%, Val Loss: 0.8358, Val Accuracy: 70.56%


Epoch 16/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 15/50, Train Loss: 0.7881, Train Accuracy: 72.20%, Val Loss: 0.8812, Val Accuracy: 70.24%


Epoch 17/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 16/50, Train Loss: 0.7631, Train Accuracy: 73.17%, Val Loss: 0.8335, Val Accuracy: 71.37%


Epoch 18/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 17/50, Train Loss: 0.7387, Train Accuracy: 73.97%, Val Loss: 0.8195, Val Accuracy: 72.35%


Epoch 19/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 18/50, Train Loss: 0.7231, Train Accuracy: 74.58%, Val Loss: 0.8530, Val Accuracy: 70.84%


Epoch 20/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 19/50, Train Loss: 0.7034, Train Accuracy: 75.12%, Val Loss: 0.8083, Val Accuracy: 72.73%


Epoch 21/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 20/50, Train Loss: 0.6833, Train Accuracy: 75.96%, Val Loss: 0.8171, Val Accuracy: 72.71%


Epoch 22/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 21/50, Train Loss: 0.6661, Train Accuracy: 76.31%, Val Loss: 0.8074, Val Accuracy: 72.90%


Epoch 23/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 22/50, Train Loss: 0.6465, Train Accuracy: 77.04%, Val Loss: 0.8304, Val Accuracy: 72.58%


Epoch 24/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 23/50, Train Loss: 0.6330, Train Accuracy: 77.64%, Val Loss: 0.7877, Val Accuracy: 73.81%


Epoch 25/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 24/50, Train Loss: 0.6159, Train Accuracy: 78.38%, Val Loss: 0.8211, Val Accuracy: 73.36%


Epoch 26/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 25/50, Train Loss: 0.6020, Train Accuracy: 78.81%, Val Loss: 0.7776, Val Accuracy: 74.77%


Epoch 27/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 26/50, Train Loss: 0.5868, Train Accuracy: 79.30%, Val Loss: 0.7443, Val Accuracy: 75.44%


Epoch 28/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 27/50, Train Loss: 0.5740, Train Accuracy: 79.73%, Val Loss: 0.7393, Val Accuracy: 75.12%


Epoch 29/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 28/50, Train Loss: 0.5578, Train Accuracy: 80.26%, Val Loss: 0.7999, Val Accuracy: 74.25%


Epoch 30/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 29/50, Train Loss: 0.5427, Train Accuracy: 80.72%, Val Loss: 0.7488, Val Accuracy: 75.64%


Epoch 31/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 30/50, Train Loss: 0.5275, Train Accuracy: 81.19%, Val Loss: 0.7735, Val Accuracy: 75.67%


Epoch 32/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 31/50, Train Loss: 0.5142, Train Accuracy: 81.73%, Val Loss: 0.7847, Val Accuracy: 75.42%


Epoch 33/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 32/50, Train Loss: 0.4995, Train Accuracy: 82.14%, Val Loss: 0.7756, Val Accuracy: 75.54%


Epoch 34/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 33/50, Train Loss: 0.4903, Train Accuracy: 82.66%, Val Loss: 0.7730, Val Accuracy: 75.61%


Epoch 35/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 34/50, Train Loss: 0.4769, Train Accuracy: 83.09%, Val Loss: 0.7649, Val Accuracy: 75.41%


Epoch 36/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 35/50, Train Loss: 0.4682, Train Accuracy: 83.27%, Val Loss: 0.7966, Val Accuracy: 75.75%


Epoch 37/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 36/50, Train Loss: 0.4544, Train Accuracy: 83.66%, Val Loss: 0.8000, Val Accuracy: 75.11%


Epoch 38/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 37/50, Train Loss: 0.4457, Train Accuracy: 83.94%, Val Loss: 0.7926, Val Accuracy: 76.22%


Epoch 39/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 38/50, Train Loss: 0.4284, Train Accuracy: 84.58%, Val Loss: 0.8107, Val Accuracy: 74.96%


Epoch 40/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 39/50, Train Loss: 0.4140, Train Accuracy: 85.41%, Val Loss: 0.8040, Val Accuracy: 76.35%


Epoch 41/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 40/50, Train Loss: 0.4103, Train Accuracy: 85.32%, Val Loss: 0.7876, Val Accuracy: 76.67%


Epoch 42/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 41/50, Train Loss: 0.3962, Train Accuracy: 85.82%, Val Loss: 0.7937, Val Accuracy: 76.36%


Epoch 43/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 42/50, Train Loss: 0.3790, Train Accuracy: 86.39%, Val Loss: 0.7799, Val Accuracy: 76.25%


Epoch 44/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 43/50, Train Loss: 0.3708, Train Accuracy: 86.85%, Val Loss: 0.8005, Val Accuracy: 76.17%


Epoch 45/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 44/50, Train Loss: 0.3619, Train Accuracy: 87.03%, Val Loss: 0.7882, Val Accuracy: 77.14%


Epoch 46/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 45/50, Train Loss: 0.3480, Train Accuracy: 87.57%, Val Loss: 0.7975, Val Accuracy: 77.16%


Epoch 47/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 46/50, Train Loss: 0.3476, Train Accuracy: 87.33%, Val Loss: 0.7878, Val Accuracy: 76.69%


Epoch 48/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 47/50, Train Loss: 0.3275, Train Accuracy: 88.34%, Val Loss: 0.8076, Val Accuracy: 77.07%


Epoch 49/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 48/50, Train Loss: 0.3228, Train Accuracy: 88.42%, Val Loss: 0.8119, Val Accuracy: 76.76%


Epoch 50/50:   0%|                                                                              | 0/313 [00:00<?, ?it/s]

Epoch 49/50, Train Loss: 0.3147, Train Accuracy: 88.64%, Val Loss: 0.8210, Val Accuracy: 76.85%


                                                                                                                        

Epoch 50/50, Train Loss: 0.3067, Train Accuracy: 88.92%, Val Loss: 0.8205, Val Accuracy: 77.24%
Model saved to ViT/models/20241005_151849.pth


In [4]:
# Here is the hyperparameters

epochs = 50
patch_size = 16
embed_dim = 512
n_layers = 6
heads = 8
attn_dim = 1024
mlp_dim = None # default to 4*embed_dim
dropout = 0.2
mlp_dropout = 0.2
embedding = 'sinousoidal'

model = ViT(image_size=128, patch_size=patch_size, num_classes=10, embed_dim=embed_dim, n_layers=n_layers, heads=heads, attn_dim=attn_dim, mlp_dim=mlp_dim, dropout=dropout, mlp_dropout=mlp_dropout, embedding=embedding)

last_time_str = "20241005_151849"

# load model
model.load_state_dict(torch.load(f"ViT/models/{last_time_str}.pth"))

# print(f"models loaded from ViT/models/{last_time_str}.pth")

optimizer = torch.optim.Adam(model.parameters(), lr=5e-5, betas=(0.9, 0.999), weight_decay=0)

time_str = timestr()

print(f"Time string: {time_str}")

# print the model and the number of parameters
# print(model.transformer)
print(f'The model has {sum(p.numel() for p in model.parameters() if p.requires_grad):,} trainable parameters')

train(epochs=epochs, model=model, optimizer=optimizer, criterion=nn.CrossEntropyLoss(), 
      train_loader=train_loader, val_loader=val_loader, outdir=get_outdir(time_str))\

# save model
torch.save(model.state_dict(), f"ViT/models/{time_str}.pth")
print(f"Model saved to ViT/models/{time_str}.pth")



Time string: 20241005_171514
The model has 25,581,578 trainable parameters
Using 4 GPUs




Epoch 1/50, Train Loss: 0.2315, Train Accuracy: 91.65%, Val Loss: 0.8384, Val Accuracy: 77.78%




Epoch 2/50, Train Loss: 0.2079, Train Accuracy: 92.67%, Val Loss: 0.8439, Val Accuracy: 77.83%




Epoch 3/50, Train Loss: 0.1959, Train Accuracy: 92.97%, Val Loss: 0.8696, Val Accuracy: 77.99%




Epoch 4/50, Train Loss: 0.1851, Train Accuracy: 93.43%, Val Loss: 0.8874, Val Accuracy: 78.03%




Epoch 5/50, Train Loss: 0.1796, Train Accuracy: 93.62%, Val Loss: 0.9081, Val Accuracy: 78.00%




Epoch 6/50, Train Loss: 0.1675, Train Accuracy: 94.15%, Val Loss: 0.9340, Val Accuracy: 77.53%




Epoch 7/50, Train Loss: 0.1646, Train Accuracy: 94.10%, Val Loss: 0.9308, Val Accuracy: 77.81%




Epoch 8/50, Train Loss: 0.1573, Train Accuracy: 94.42%, Val Loss: 0.9299, Val Accuracy: 77.95%




Epoch 9/50, Train Loss: 0.1520, Train Accuracy: 94.64%, Val Loss: 0.9960, Val Accuracy: 76.70%




Epoch 10/50, Train Loss: 0.1434, Train Accuracy: 94.91%, Val Loss: 0.9675, Val Accuracy: 77.82%




Epoch 11/50, Train Loss: 0.1411, Train Accuracy: 94.93%, Val Loss: 0.9653, Val Accuracy: 77.95%




Epoch 12/50, Train Loss: 0.1332, Train Accuracy: 95.21%, Val Loss: 0.9672, Val Accuracy: 77.90%




Epoch 13/50, Train Loss: 0.1378, Train Accuracy: 95.02%, Val Loss: 0.9852, Val Accuracy: 77.82%




Epoch 14/50, Train Loss: 0.1274, Train Accuracy: 95.39%, Val Loss: 1.0034, Val Accuracy: 77.91%




Epoch 15/50, Train Loss: 0.1231, Train Accuracy: 95.59%, Val Loss: 0.9817, Val Accuracy: 77.85%




Epoch 16/50, Train Loss: 0.1166, Train Accuracy: 95.92%, Val Loss: 1.0299, Val Accuracy: 77.80%




Epoch 17/50, Train Loss: 0.1149, Train Accuracy: 95.93%, Val Loss: 1.0035, Val Accuracy: 78.15%




Epoch 18/50, Train Loss: 0.1141, Train Accuracy: 95.99%, Val Loss: 1.0324, Val Accuracy: 77.72%




Epoch 19/50, Train Loss: 0.1114, Train Accuracy: 96.04%, Val Loss: 1.0489, Val Accuracy: 77.38%




Epoch 20/50, Train Loss: 0.1119, Train Accuracy: 96.09%, Val Loss: 1.0246, Val Accuracy: 78.19%




Epoch 21/50, Train Loss: 0.1062, Train Accuracy: 96.19%, Val Loss: 1.0432, Val Accuracy: 77.67%




Epoch 22/50, Train Loss: 0.1011, Train Accuracy: 96.39%, Val Loss: 1.0457, Val Accuracy: 77.60%




Epoch 23/50, Train Loss: 0.0971, Train Accuracy: 96.64%, Val Loss: 1.0370, Val Accuracy: 78.21%




Epoch 24/50, Train Loss: 0.0976, Train Accuracy: 96.59%, Val Loss: 1.0960, Val Accuracy: 77.94%




Epoch 25/50, Train Loss: 0.0941, Train Accuracy: 96.69%, Val Loss: 1.0868, Val Accuracy: 77.90%




Epoch 26/50, Train Loss: 0.0900, Train Accuracy: 96.81%, Val Loss: 1.1376, Val Accuracy: 77.85%




Epoch 27/50, Train Loss: 0.0865, Train Accuracy: 96.91%, Val Loss: 1.1087, Val Accuracy: 77.85%




Epoch 28/50, Train Loss: 0.0911, Train Accuracy: 96.81%, Val Loss: 1.0942, Val Accuracy: 77.68%




Epoch 29/50, Train Loss: 0.0857, Train Accuracy: 96.99%, Val Loss: 1.1127, Val Accuracy: 77.87%




Epoch 30/50, Train Loss: 0.0794, Train Accuracy: 97.22%, Val Loss: 1.1126, Val Accuracy: 78.18%




Epoch 31/50, Train Loss: 0.0760, Train Accuracy: 97.30%, Val Loss: 1.1286, Val Accuracy: 77.94%




Epoch 32/50, Train Loss: 0.0751, Train Accuracy: 97.40%, Val Loss: 1.1020, Val Accuracy: 78.24%




Epoch 33/50, Train Loss: 0.0755, Train Accuracy: 97.30%, Val Loss: 1.1025, Val Accuracy: 78.21%




Epoch 34/50, Train Loss: 0.0738, Train Accuracy: 97.40%, Val Loss: 1.1189, Val Accuracy: 78.14%




Epoch 35/50, Train Loss: 0.0754, Train Accuracy: 97.39%, Val Loss: 1.1071, Val Accuracy: 78.80%




Epoch 36/50, Train Loss: 0.0662, Train Accuracy: 97.66%, Val Loss: 1.2029, Val Accuracy: 77.49%




Epoch 37/50, Train Loss: 0.0692, Train Accuracy: 97.56%, Val Loss: 1.1611, Val Accuracy: 77.95%




Epoch 38/50, Train Loss: 0.0637, Train Accuracy: 97.70%, Val Loss: 1.1780, Val Accuracy: 77.75%




Epoch 39/50, Train Loss: 0.0594, Train Accuracy: 97.93%, Val Loss: 1.1597, Val Accuracy: 78.63%




Epoch 40/50, Train Loss: 0.0687, Train Accuracy: 97.59%, Val Loss: 1.2188, Val Accuracy: 77.56%




Epoch 41/50, Train Loss: 0.0633, Train Accuracy: 97.81%, Val Loss: 1.2023, Val Accuracy: 77.96%




Epoch 42/50, Train Loss: 0.0601, Train Accuracy: 97.81%, Val Loss: 1.1853, Val Accuracy: 77.91%




Epoch 43/50, Train Loss: 0.0615, Train Accuracy: 97.86%, Val Loss: 1.2078, Val Accuracy: 77.91%




Epoch 44/50, Train Loss: 0.0576, Train Accuracy: 97.90%, Val Loss: 1.1993, Val Accuracy: 77.88%




Epoch 45/50, Train Loss: 0.0507, Train Accuracy: 98.26%, Val Loss: 1.2318, Val Accuracy: 77.74%




Epoch 46/50, Train Loss: 0.0548, Train Accuracy: 98.11%, Val Loss: 1.2670, Val Accuracy: 77.57%




Epoch 47/50, Train Loss: 0.0567, Train Accuracy: 97.94%, Val Loss: 1.2085, Val Accuracy: 78.33%




Epoch 48/50, Train Loss: 0.0542, Train Accuracy: 98.11%, Val Loss: 1.2425, Val Accuracy: 78.11%




Epoch 49/50, Train Loss: 0.0518, Train Accuracy: 98.22%, Val Loss: 1.2237, Val Accuracy: 78.06%




Epoch 50/50, Train Loss: 0.0537, Train Accuracy: 98.17%, Val Loss: 1.2659, Val Accuracy: 77.82%
Model saved to ViT/models/20241005_171514.pth
