# Vit for classification in CIFAR-10

In [1]:
%load_ext autoreload
%autoreload 2
!nvidia-smi
!which python

Fri Oct  4 19:02:43 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.86.10              Driver Version: 535.86.10    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla V100-SXM2-32GB           On  | 00000004:04:00.0 Off |                    0 |
| N/A   41C    P0              41W / 300W |      0MiB / 32768MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
try:
    from ViT.train import train
    from ViT.utils import cifar_train_set, cifar_val_set
    from ViT.model import *
except:
    from train import train
    from utils import cifar_train_set, cifar_val_set
    from model import *

from torch.utils.data import DataLoader

import torch
import torch.nn as nn
import torch.nn.functional as F

import datetime

train_loader = DataLoader(cifar_train_set, 256, shuffle=True, drop_last=False, pin_memory=True)
val_loader = DataLoader(cifar_val_set, 500, shuffle=True, drop_last=False, pin_memory=True)

import os
if not os.path.exists("ViT/log"):
    os.makedirs("ViT/log")

def timestr():
    now = datetime.datetime.now()
    return now.strftime("%Y%m%d_%H%M%S")

def get_outdir(time_str):
    outdir = f"ViT/log/{time_str}.out"
    return outdir

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# Here is the hyperparameters

epochs = 50
patch_size = 8
embed_dim = 256
n_layers = 6
heads = 8
attn_dim = 512
mlp_dim = None # default to 4*embed_dim
pool = 'cls'
dropout = 0.1

model = ViT(image_size=32, patch_size=patch_size, num_classes=10, embed_dim=embed_dim, n_layers=n_layers, heads=heads, attn_dim=attn_dim, mlp_dim=mlp_dim, pool=pool, dropout=dropout)

optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, betas=(0.9, 0.999), weight_decay=5e-4)

time_str = timestr()

print(f"Time string: {time_str}")

# print the model and the number of parameters
# print(model.transformer)
print(f'The model has {sum(p.numel() for p in model.parameters() if p.requires_grad):,} trainable parameters')

train(epochs=epochs, model=model, optimizer=optimizer, criterion=nn.CrossEntropyLoss(), 
      train_loader=train_loader, val_loader=val_loader, outdir=get_outdir(time_str))\

# save model
torch.save(model.state_dict(), f"ViT/models/{time_str}.pth")

Time string: 20241004_133312
The model has 6,359,562 trainable parameters


Epoch 2/50:   0%|                                                                               | 0/196 [00:00<?, ?it/s]

Epoch 1/50, Train Loss: 1.9669, Train Accuracy: 26.23%, Val Loss: 1.7118, Val Accuracy: 37.17%


Epoch 3/50:   0%|                                                                               | 0/196 [00:00<?, ?it/s]

Epoch 2/50, Train Loss: 1.6896, Train Accuracy: 37.97%, Val Loss: 1.5440, Val Accuracy: 44.06%


Epoch 4/50:   0%|                                                                               | 0/196 [00:00<?, ?it/s]

Epoch 3/50, Train Loss: 1.6042, Train Accuracy: 41.44%, Val Loss: 1.4898, Val Accuracy: 47.19%


Epoch 5/50:   0%|                                                                               | 0/196 [00:00<?, ?it/s]

Epoch 4/50, Train Loss: 1.5500, Train Accuracy: 43.60%, Val Loss: 1.4661, Val Accuracy: 46.92%


Epoch 6/50:   0%|                                                                               | 0/196 [00:00<?, ?it/s]

Epoch 5/50, Train Loss: 1.5178, Train Accuracy: 44.67%, Val Loss: 1.4180, Val Accuracy: 48.83%


Epoch 7/50:   0%|                                                                               | 0/196 [00:00<?, ?it/s]

Epoch 6/50, Train Loss: 1.4886, Train Accuracy: 46.10%, Val Loss: 1.3669, Val Accuracy: 51.26%


Epoch 8/50:   0%|                                                                               | 0/196 [00:00<?, ?it/s]

Epoch 7/50, Train Loss: 1.4547, Train Accuracy: 46.95%, Val Loss: 1.3577, Val Accuracy: 51.31%


Epoch 9/50:   1%|▎                                                     | 1/196 [00:00<00:38,  5.12it/s, Train Loss=1.33]

Epoch 8/50, Train Loss: 1.4381, Train Accuracy: 47.78%, Val Loss: 1.3719, Val Accuracy: 50.86%


Epoch 10/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 9/50, Train Loss: 1.4172, Train Accuracy: 48.61%, Val Loss: 1.3351, Val Accuracy: 51.55%


Epoch 11/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.13it/s, Train Loss=1.41]

Epoch 10/50, Train Loss: 1.3918, Train Accuracy: 49.85%, Val Loss: 1.3652, Val Accuracy: 51.80%


Epoch 12/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 11/50, Train Loss: 1.3840, Train Accuracy: 49.91%, Val Loss: 1.3084, Val Accuracy: 52.82%


Epoch 13/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 12/50, Train Loss: 1.3704, Train Accuracy: 50.40%, Val Loss: 1.2775, Val Accuracy: 53.90%


Epoch 14/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 13/50, Train Loss: 1.3542, Train Accuracy: 51.16%, Val Loss: 1.2541, Val Accuracy: 55.00%


Epoch 15/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 14/50, Train Loss: 1.3304, Train Accuracy: 52.06%, Val Loss: 1.2384, Val Accuracy: 55.77%


Epoch 16/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 15/50, Train Loss: 1.3251, Train Accuracy: 52.35%, Val Loss: 1.2379, Val Accuracy: 55.36%


Epoch 17/50:   1%|▎                                                     | 1/196 [00:00<00:38,  5.12it/s, Train Loss=1.3]

Epoch 16/50, Train Loss: 1.3095, Train Accuracy: 52.42%, Val Loss: 1.2401, Val Accuracy: 55.10%


Epoch 18/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 17/50, Train Loss: 1.2963, Train Accuracy: 53.14%, Val Loss: 1.2234, Val Accuracy: 55.93%


Epoch 19/50:   1%|▎                                                     | 1/196 [00:00<00:38,  5.12it/s, Train Loss=1.2]

Epoch 18/50, Train Loss: 1.2865, Train Accuracy: 53.74%, Val Loss: 1.2258, Val Accuracy: 56.06%


Epoch 20/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 19/50, Train Loss: 1.2736, Train Accuracy: 54.31%, Val Loss: 1.1872, Val Accuracy: 57.14%


Epoch 21/50:   1%|▎                                                    | 1/196 [00:00<00:37,  5.15it/s, Train Loss=1.34]

Epoch 20/50, Train Loss: 1.2591, Train Accuracy: 54.54%, Val Loss: 1.1940, Val Accuracy: 56.88%


Epoch 22/50:   1%|▎                                                    | 1/196 [00:00<00:37,  5.13it/s, Train Loss=1.41]

Epoch 21/50, Train Loss: 1.2496, Train Accuracy: 54.81%, Val Loss: 1.2242, Val Accuracy: 56.41%


Epoch 23/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.08it/s, Train Loss=1.15]

Epoch 22/50, Train Loss: 1.2370, Train Accuracy: 55.45%, Val Loss: 1.1940, Val Accuracy: 57.42%


Epoch 24/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 23/50, Train Loss: 1.2343, Train Accuracy: 55.57%, Val Loss: 1.1976, Val Accuracy: 56.76%


Epoch 25/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 24/50, Train Loss: 1.2133, Train Accuracy: 56.51%, Val Loss: 1.1350, Val Accuracy: 59.70%


Epoch 26/50:   1%|▎                                                    | 1/196 [00:00<00:37,  5.14it/s, Train Loss=1.19]

Epoch 25/50, Train Loss: 1.2108, Train Accuracy: 56.56%, Val Loss: 1.1437, Val Accuracy: 59.18%


Epoch 27/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.12it/s, Train Loss=1.21]

Epoch 26/50, Train Loss: 1.2083, Train Accuracy: 56.61%, Val Loss: 1.1670, Val Accuracy: 57.68%


Epoch 28/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.13it/s, Train Loss=1.21]

Epoch 27/50, Train Loss: 1.1885, Train Accuracy: 57.24%, Val Loss: 1.1580, Val Accuracy: 58.33%


Epoch 29/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 28/50, Train Loss: 1.1833, Train Accuracy: 57.38%, Val Loss: 1.1123, Val Accuracy: 59.54%


Epoch 30/50:   1%|▎                                                    | 1/196 [00:00<00:37,  5.15it/s, Train Loss=1.24]

Epoch 29/50, Train Loss: 1.1789, Train Accuracy: 57.63%, Val Loss: 1.1496, Val Accuracy: 58.99%


Epoch 31/50:   1%|▎                                                    | 1/196 [00:00<00:37,  5.13it/s, Train Loss=1.14]

Epoch 30/50, Train Loss: 1.1718, Train Accuracy: 57.87%, Val Loss: 1.1618, Val Accuracy: 59.15%


Epoch 32/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 31/50, Train Loss: 1.1708, Train Accuracy: 57.95%, Val Loss: 1.1034, Val Accuracy: 60.85%


Epoch 33/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 32/50, Train Loss: 1.1559, Train Accuracy: 58.50%, Val Loss: 1.0916, Val Accuracy: 60.69%


Epoch 34/50:   0%|                                                             | 0/196 [00:00<?, ?it/s, Train Loss=1.03]

Epoch 33/50, Train Loss: 1.1532, Train Accuracy: 58.68%, Val Loss: 1.1199, Val Accuracy: 59.84%


Epoch 35/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 34/50, Train Loss: 1.1382, Train Accuracy: 59.26%, Val Loss: 1.0802, Val Accuracy: 61.54%


Epoch 36/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.11it/s, Train Loss=1.25]

Epoch 35/50, Train Loss: 1.1345, Train Accuracy: 59.31%, Val Loss: 1.1131, Val Accuracy: 60.47%


Epoch 37/50:   1%|▎                                                    | 1/196 [00:00<00:37,  5.14it/s, Train Loss=1.05]

Epoch 36/50, Train Loss: 1.1343, Train Accuracy: 59.49%, Val Loss: 1.0811, Val Accuracy: 61.49%


Epoch 38/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 37/50, Train Loss: 1.1301, Train Accuracy: 59.63%, Val Loss: 1.0684, Val Accuracy: 61.89%


Epoch 39/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.13it/s, Train Loss=1.11]

Epoch 38/50, Train Loss: 1.1211, Train Accuracy: 59.73%, Val Loss: 1.1246, Val Accuracy: 60.24%


Epoch 40/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 39/50, Train Loss: 1.1164, Train Accuracy: 60.16%, Val Loss: 1.0518, Val Accuracy: 62.17%


Epoch 41/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 40/50, Train Loss: 1.1102, Train Accuracy: 60.27%, Val Loss: 1.0503, Val Accuracy: 62.70%


Epoch 42/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 41/50, Train Loss: 1.1061, Train Accuracy: 60.32%, Val Loss: 1.0140, Val Accuracy: 64.33%


Epoch 43/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.10it/s, Train Loss=1.04]

Epoch 42/50, Train Loss: 1.1017, Train Accuracy: 60.69%, Val Loss: 1.0326, Val Accuracy: 63.54%


Epoch 44/50:   1%|▎                                                     | 1/196 [00:00<00:38,  5.12it/s, Train Loss=1.1]

Epoch 43/50, Train Loss: 1.0962, Train Accuracy: 60.97%, Val Loss: 1.0429, Val Accuracy: 62.45%


Epoch 45/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.12it/s, Train Loss=1.15]

Epoch 44/50, Train Loss: 1.0932, Train Accuracy: 60.88%, Val Loss: 1.0419, Val Accuracy: 62.99%


Epoch 46/50:   0%|                                                                | 0/196 [00:00<?, ?it/s, Train Loss=1]

Epoch 45/50, Train Loss: 1.0879, Train Accuracy: 61.10%, Val Loss: 1.0425, Val Accuracy: 62.53%


Epoch 47/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.12it/s, Train Loss=1.06]

Epoch 46/50, Train Loss: 1.0823, Train Accuracy: 61.38%, Val Loss: 1.0473, Val Accuracy: 62.32%


Epoch 48/50:   1%|▎                                                    | 1/196 [00:00<00:37,  5.13it/s, Train Loss=1.04]

Epoch 47/50, Train Loss: 1.0784, Train Accuracy: 61.52%, Val Loss: 1.0472, Val Accuracy: 62.54%


Epoch 49/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.12it/s, Train Loss=1.04]

Epoch 48/50, Train Loss: 1.0690, Train Accuracy: 61.86%, Val Loss: 1.0343, Val Accuracy: 63.00%


Epoch 50/50:   1%|▎                                                    | 1/196 [00:00<00:37,  5.14it/s, Train Loss=1.05]

Epoch 49/50, Train Loss: 1.0728, Train Accuracy: 61.53%, Val Loss: 1.0479, Val Accuracy: 62.79%


                                                                                                                        

Epoch 50/50, Train Loss: 1.0618, Train Accuracy: 62.00%, Val Loss: 0.9984, Val Accuracy: 63.92%


In [3]:
# 50-100
# Here is the hyperparameters

epochs = 50
patch_size = 8
embed_dim = 256
n_layers = 6
heads = 8
attn_dim = 512
mlp_dim = None # default to 4*embed_dim
pool = 'cls'
dropout = 0.1

model = ViT(image_size=32, patch_size=patch_size, num_classes=10, embed_dim=embed_dim, n_layers=n_layers, heads=heads, attn_dim=attn_dim, mlp_dim=mlp_dim, pool=pool, dropout=dropout)

last_time_str = "20241004_133312"

# load model
model.load_state_dict(torch.load(f"ViT/models/{last_time_str}.pth"))

print(f"models loaded from ViT/models/{last_time_str}.pth")

optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, betas=(0.9, 0.999), weight_decay=5e-4)

time_str = timestr()

print(f"Time string: {time_str}")

# print the model and the number of parameters
# print(model.transformer)
print(f'The model has {sum(p.numel() for p in model.parameters() if p.requires_grad):,} trainable parameters')

train(epochs=epochs, model=model, optimizer=optimizer, criterion=nn.CrossEntropyLoss(), 
      train_loader=train_loader, val_loader=val_loader, outdir=get_outdir(time_str))

# save model
torch.save(model.state_dict(), f"ViT/models/{time_str}.pth")

print(f"models saved to ViT/models/{time_str}.pth")

Epoch 1/50:   0%|                                                                               | 0/196 [00:00<?, ?it/s]

models loaded from ViT/models/20241004_133312.pth
Time string: 20241004_144451
The model has 6,359,562 trainable parameters


Epoch 2/50:   0%|                                                                               | 0/196 [00:00<?, ?it/s]

Epoch 1/50, Train Loss: 1.0639, Train Accuracy: 61.96%, Val Loss: 1.0799, Val Accuracy: 61.26%


Epoch 3/50:   0%|                                                                               | 0/196 [00:00<?, ?it/s]

Epoch 2/50, Train Loss: 1.0559, Train Accuracy: 62.45%, Val Loss: 1.0296, Val Accuracy: 63.72%


Epoch 4/50:   1%|▎                                                     | 1/196 [00:00<00:38,  5.12it/s, Train Loss=1.19]

Epoch 3/50, Train Loss: 1.0530, Train Accuracy: 62.34%, Val Loss: 1.0361, Val Accuracy: 63.36%


Epoch 5/50:   0%|                                                                               | 0/196 [00:00<?, ?it/s]

Epoch 4/50, Train Loss: 1.0491, Train Accuracy: 62.52%, Val Loss: 0.9905, Val Accuracy: 64.49%


Epoch 6/50:   1%|▎                                                     | 1/196 [00:00<00:38,  5.12it/s, Train Loss=1.07]

Epoch 5/50, Train Loss: 1.0454, Train Accuracy: 62.80%, Val Loss: 1.0388, Val Accuracy: 63.36%


Epoch 7/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.963]

Epoch 6/50, Train Loss: 1.0421, Train Accuracy: 63.03%, Val Loss: 1.0210, Val Accuracy: 63.49%


Epoch 8/50:   1%|▎                                                        | 1/196 [00:00<00:37,  5.14it/s, Train Loss=1]

Epoch 7/50, Train Loss: 1.0447, Train Accuracy: 62.54%, Val Loss: 0.9971, Val Accuracy: 63.85%


Epoch 9/50:   0%|                                                                               | 0/196 [00:00<?, ?it/s]

Epoch 8/50, Train Loss: 1.0336, Train Accuracy: 62.81%, Val Loss: 0.9845, Val Accuracy: 64.56%


Epoch 10/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 9/50, Train Loss: 1.0322, Train Accuracy: 63.02%, Val Loss: 0.9600, Val Accuracy: 65.40%


Epoch 11/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.09it/s, Train Loss=1.02]

Epoch 10/50, Train Loss: 1.0303, Train Accuracy: 63.19%, Val Loss: 0.9956, Val Accuracy: 64.39%


Epoch 12/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.938]

Epoch 11/50, Train Loss: 1.0248, Train Accuracy: 63.37%, Val Loss: 0.9949, Val Accuracy: 64.72%


Epoch 13/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.96]

Epoch 12/50, Train Loss: 1.0204, Train Accuracy: 63.64%, Val Loss: 0.9726, Val Accuracy: 65.10%


Epoch 14/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 13/50, Train Loss: 1.0217, Train Accuracy: 63.41%, Val Loss: 0.9563, Val Accuracy: 65.84%


Epoch 15/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 14/50, Train Loss: 1.0181, Train Accuracy: 63.69%, Val Loss: 0.9506, Val Accuracy: 65.92%


Epoch 16/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.983]

Epoch 15/50, Train Loss: 1.0093, Train Accuracy: 64.06%, Val Loss: 1.0187, Val Accuracy: 63.25%


Epoch 17/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.10it/s, Train Loss=1.08]

Epoch 16/50, Train Loss: 1.0118, Train Accuracy: 63.89%, Val Loss: 1.0337, Val Accuracy: 62.68%


Epoch 18/50:   1%|▎                                                     | 1/196 [00:00<00:38,  5.11it/s, Train Loss=1.1]

Epoch 17/50, Train Loss: 1.0066, Train Accuracy: 64.24%, Val Loss: 0.9954, Val Accuracy: 64.12%


Epoch 19/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.96]

Epoch 18/50, Train Loss: 1.0006, Train Accuracy: 64.49%, Val Loss: 0.9841, Val Accuracy: 64.45%


Epoch 20/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.09it/s, Train Loss=0.937]

Epoch 19/50, Train Loss: 1.0007, Train Accuracy: 64.34%, Val Loss: 0.9766, Val Accuracy: 65.39%


Epoch 21/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 20/50, Train Loss: 1.0022, Train Accuracy: 64.02%, Val Loss: 0.9353, Val Accuracy: 66.44%


Epoch 22/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.13it/s, Train Loss=1.06]

Epoch 21/50, Train Loss: 0.9926, Train Accuracy: 64.44%, Val Loss: 0.9565, Val Accuracy: 66.13%


Epoch 23/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.946]

Epoch 22/50, Train Loss: 0.9932, Train Accuracy: 64.80%, Val Loss: 1.0387, Val Accuracy: 63.37%


Epoch 24/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.12it/s, Train Loss=1.02]

Epoch 23/50, Train Loss: 0.9926, Train Accuracy: 64.54%, Val Loss: 0.9406, Val Accuracy: 66.11%


Epoch 25/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.978]

Epoch 24/50, Train Loss: 0.9922, Train Accuracy: 64.64%, Val Loss: 0.9581, Val Accuracy: 66.10%


Epoch 26/50:   1%|▎                                                   | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.935]

Epoch 25/50, Train Loss: 0.9905, Train Accuracy: 64.63%, Val Loss: 0.9710, Val Accuracy: 65.01%


Epoch 27/50:   1%|▎                                                    | 1/196 [00:00<00:37,  5.14it/s, Train Loss=1.01]

Epoch 26/50, Train Loss: 0.9897, Train Accuracy: 64.70%, Val Loss: 0.9950, Val Accuracy: 64.59%


Epoch 28/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.11it/s, Train Loss=1.03]

Epoch 27/50, Train Loss: 0.9824, Train Accuracy: 65.00%, Val Loss: 0.9699, Val Accuracy: 65.66%


Epoch 29/50:   1%|▎                                                    | 1/196 [00:00<00:38,  5.12it/s, Train Loss=1.01]

Epoch 28/50, Train Loss: 0.9774, Train Accuracy: 65.08%, Val Loss: 0.9421, Val Accuracy: 66.40%


Epoch 30/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 29/50, Train Loss: 0.9759, Train Accuracy: 65.16%, Val Loss: 0.9730, Val Accuracy: 65.41%


Epoch 31/50:   1%|▎                                                       | 1/196 [00:00<00:38,  5.13it/s, Train Loss=1]

Epoch 30/50, Train Loss: 0.9824, Train Accuracy: 65.01%, Val Loss: 1.0110, Val Accuracy: 64.44%


Epoch 32/50:   1%|▎                                                       | 1/196 [00:00<00:37,  5.13it/s, Train Loss=1]

Epoch 31/50, Train Loss: 0.9757, Train Accuracy: 65.33%, Val Loss: 0.9627, Val Accuracy: 65.87%


Epoch 33/50:   0%|                                                            | 0/196 [00:00<?, ?it/s, Train Loss=0.929]

Epoch 32/50, Train Loss: 0.9709, Train Accuracy: 65.33%, Val Loss: 0.9389, Val Accuracy: 66.14%


Epoch 34/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.863]

Epoch 33/50, Train Loss: 0.9703, Train Accuracy: 65.30%, Val Loss: 0.9572, Val Accuracy: 66.33%


Epoch 35/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.09it/s, Train Loss=0.944]

Epoch 34/50, Train Loss: 0.9660, Train Accuracy: 65.50%, Val Loss: 0.9375, Val Accuracy: 66.43%


Epoch 36/50:   1%|▎                                                     | 1/196 [00:00<00:38,  5.11it/s, Train Loss=1.1]

Epoch 35/50, Train Loss: 0.9643, Train Accuracy: 65.66%, Val Loss: 0.9726, Val Accuracy: 65.22%


Epoch 37/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 36/50, Train Loss: 0.9665, Train Accuracy: 65.32%, Val Loss: 0.9204, Val Accuracy: 67.29%


Epoch 38/50:   1%|▎                                                   | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.867]

Epoch 37/50, Train Loss: 0.9595, Train Accuracy: 65.72%, Val Loss: 0.9208, Val Accuracy: 67.32%


Epoch 39/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.938]

Epoch 38/50, Train Loss: 0.9628, Train Accuracy: 65.87%, Val Loss: 0.9743, Val Accuracy: 65.68%


Epoch 40/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.887]

Epoch 39/50, Train Loss: 0.9610, Train Accuracy: 65.73%, Val Loss: 0.9338, Val Accuracy: 67.06%


Epoch 41/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.923]

Epoch 40/50, Train Loss: 0.9474, Train Accuracy: 66.17%, Val Loss: 0.9430, Val Accuracy: 66.98%


Epoch 42/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.917]

Epoch 41/50, Train Loss: 0.9540, Train Accuracy: 66.01%, Val Loss: 0.9429, Val Accuracy: 66.47%


Epoch 43/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.837]

Epoch 42/50, Train Loss: 0.9525, Train Accuracy: 65.96%, Val Loss: 0.9286, Val Accuracy: 66.94%


Epoch 44/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.963]

Epoch 43/50, Train Loss: 0.9483, Train Accuracy: 66.62%, Val Loss: 0.9564, Val Accuracy: 66.42%


Epoch 45/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 44/50, Train Loss: 0.9450, Train Accuracy: 66.22%, Val Loss: 0.9357, Val Accuracy: 66.75%


Epoch 46/50:   0%|                                                                              | 0/196 [00:00<?, ?it/s]

Epoch 45/50, Train Loss: 0.9440, Train Accuracy: 66.36%, Val Loss: 0.9039, Val Accuracy: 68.27%


Epoch 47/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.08it/s, Train Loss=0.904]

Epoch 46/50, Train Loss: 0.9432, Train Accuracy: 66.33%, Val Loss: 0.9304, Val Accuracy: 67.32%


Epoch 48/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.927]

Epoch 47/50, Train Loss: 0.9492, Train Accuracy: 66.31%, Val Loss: 0.9180, Val Accuracy: 67.01%


Epoch 49/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.13it/s, Train Loss=0.968]

Epoch 48/50, Train Loss: 0.9410, Train Accuracy: 66.43%, Val Loss: 0.9224, Val Accuracy: 67.19%


Epoch 50/50:   1%|▎                                                   | 1/196 [00:00<00:37,  5.13it/s, Train Loss=0.957]

Epoch 49/50, Train Loss: 0.9414, Train Accuracy: 66.54%, Val Loss: 0.9193, Val Accuracy: 67.14%


                                                                                                                        

Epoch 50/50, Train Loss: 0.9337, Train Accuracy: 66.81%, Val Loss: 0.9378, Val Accuracy: 67.03%
models saved to ViT/models/20241004_144451.pth


In [3]:
# 100-150
# Here is the hyperparameters

epochs = 50
patch_size = 8
embed_dim = 256
n_layers = 6
heads = 8
attn_dim = 512
mlp_dim = None # default to 4*embed_dim
pool = 'cls'
dropout = 0.1

model = ViT(image_size=32, patch_size=patch_size, num_classes=10, embed_dim=embed_dim, n_layers=n_layers, heads=heads, attn_dim=attn_dim, mlp_dim=mlp_dim, pool=pool, dropout=dropout)

last_time_str = "20241004_144451"

# load model
model.load_state_dict(torch.load(f"ViT/models/{last_time_str}.pth"))

print(f"models loaded from ViT/models/{last_time_str}.pth")

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), weight_decay=5e-4)

time_str = timestr()

print(f"Time string: {time_str}")

# print the model and the number of parameters
# print(model.transformer)
print(f'The model has {sum(p.numel() for p in model.parameters() if p.requires_grad):,} trainable parameters')

train(epochs=epochs, model=model, optimizer=optimizer, criterion=nn.CrossEntropyLoss(), 
      train_loader=train_loader, val_loader=val_loader, outdir=get_outdir(time_str))

# save model
torch.save(model.state_dict(), f"ViT/models/{time_str}.pth")

print(f"models saved to ViT/models/{time_str}.pth")

Epoch 1/50:   0%|                                                                             | 0/196 [00:00<?, ?it/s]

models loaded from ViT/models/20241004_144451.pth
Time string: 20241004_174316
The model has 6,359,562 trainable parameters


Epoch 2/50:   0%|                                                                             | 0/196 [00:00<?, ?it/s]

Epoch 1/50, Train Loss: 0.8605, Train Accuracy: 69.26%, Val Loss: 0.8951, Val Accuracy: 68.22%


Epoch 3/50:   0%|                                                                             | 0/196 [00:00<?, ?it/s]

Epoch 2/50, Train Loss: 0.8349, Train Accuracy: 70.30%, Val Loss: 0.8695, Val Accuracy: 68.96%


Epoch 4/50:   0%|                                                                             | 0/196 [00:00<?, ?it/s]

Epoch 3/50, Train Loss: 0.8236, Train Accuracy: 70.95%, Val Loss: 0.8571, Val Accuracy: 69.23%


Epoch 5/50:   1%|▎                                                  | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.886]

Epoch 4/50, Train Loss: 0.8200, Train Accuracy: 70.88%, Val Loss: 0.8775, Val Accuracy: 69.08%


Epoch 6/50:   1%|▎                                                  | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.887]

Epoch 5/50, Train Loss: 0.8182, Train Accuracy: 70.92%, Val Loss: 0.8691, Val Accuracy: 69.54%


Epoch 7/50:   0%|                                                                             | 0/196 [00:00<?, ?it/s]

Epoch 6/50, Train Loss: 0.8121, Train Accuracy: 71.13%, Val Loss: 0.8711, Val Accuracy: 69.36%


Epoch 8/50:   1%|▎                                                  | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.732]

Epoch 7/50, Train Loss: 0.8102, Train Accuracy: 71.23%, Val Loss: 0.8667, Val Accuracy: 69.36%


Epoch 9/50:   1%|▎                                                  | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.703]

Epoch 8/50, Train Loss: 0.8024, Train Accuracy: 71.30%, Val Loss: 0.8691, Val Accuracy: 69.64%


Epoch 10/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.911]

Epoch 9/50, Train Loss: 0.7993, Train Accuracy: 71.62%, Val Loss: 0.8743, Val Accuracy: 69.50%


Epoch 11/50:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 10/50, Train Loss: 0.7996, Train Accuracy: 71.48%, Val Loss: 0.8513, Val Accuracy: 69.62%


Epoch 12/50:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 11/50, Train Loss: 0.7928, Train Accuracy: 71.95%, Val Loss: 0.8418, Val Accuracy: 70.40%


Epoch 13/50:   1%|▎                                                  | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.74]

Epoch 12/50, Train Loss: 0.7901, Train Accuracy: 71.86%, Val Loss: 0.8465, Val Accuracy: 70.55%


Epoch 14/50:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 13/50, Train Loss: 0.7896, Train Accuracy: 71.82%, Val Loss: 0.8240, Val Accuracy: 70.98%


Epoch 15/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.725]

Epoch 14/50, Train Loss: 0.7888, Train Accuracy: 72.01%, Val Loss: 0.8621, Val Accuracy: 69.99%


Epoch 16/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.13it/s, Train Loss=0.799]

Epoch 15/50, Train Loss: 0.7857, Train Accuracy: 72.10%, Val Loss: 0.8380, Val Accuracy: 70.58%


Epoch 17/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.13it/s, Train Loss=0.921]

Epoch 16/50, Train Loss: 0.7841, Train Accuracy: 72.06%, Val Loss: 0.8613, Val Accuracy: 69.95%


Epoch 18/50:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 17/50, Train Loss: 0.7778, Train Accuracy: 72.47%, Val Loss: 0.8219, Val Accuracy: 70.95%


Epoch 19/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.13it/s, Train Loss=0.758]

Epoch 18/50, Train Loss: 0.7827, Train Accuracy: 72.07%, Val Loss: 0.8370, Val Accuracy: 70.57%


Epoch 20/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.07it/s, Train Loss=0.707]

Epoch 19/50, Train Loss: 0.7756, Train Accuracy: 72.38%, Val Loss: 0.8452, Val Accuracy: 70.60%


Epoch 21/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.13it/s, Train Loss=0.821]

Epoch 20/50, Train Loss: 0.7788, Train Accuracy: 72.09%, Val Loss: 0.8441, Val Accuracy: 70.28%


Epoch 22/50:   1%|▎                                                  | 1/196 [00:00<00:38,  5.07it/s, Train Loss=0.85]

Epoch 21/50, Train Loss: 0.7714, Train Accuracy: 72.42%, Val Loss: 0.8607, Val Accuracy: 69.97%


Epoch 23/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.758]

Epoch 22/50, Train Loss: 0.7757, Train Accuracy: 72.45%, Val Loss: 0.8814, Val Accuracy: 69.36%


Epoch 24/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.13it/s, Train Loss=0.774]

Epoch 23/50, Train Loss: 0.7700, Train Accuracy: 72.56%, Val Loss: 0.8563, Val Accuracy: 70.19%


Epoch 25/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.698]

Epoch 24/50, Train Loss: 0.7669, Train Accuracy: 72.66%, Val Loss: 0.8281, Val Accuracy: 70.93%


Epoch 26/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.15it/s, Train Loss=0.791]

Epoch 25/50, Train Loss: 0.7680, Train Accuracy: 72.63%, Val Loss: 0.8445, Val Accuracy: 70.49%


Epoch 27/50:   1%|▎                                                  | 1/196 [00:00<00:37,  5.16it/s, Train Loss=0.83]

Epoch 26/50, Train Loss: 0.7669, Train Accuracy: 72.64%, Val Loss: 0.8423, Val Accuracy: 70.74%


Epoch 28/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.13it/s, Train Loss=0.768]

Epoch 27/50, Train Loss: 0.7668, Train Accuracy: 72.81%, Val Loss: 0.8616, Val Accuracy: 69.73%


Epoch 29/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.745]

Epoch 28/50, Train Loss: 0.7672, Train Accuracy: 72.71%, Val Loss: 0.8556, Val Accuracy: 70.16%


Epoch 30/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.771]

Epoch 29/50, Train Loss: 0.7575, Train Accuracy: 73.05%, Val Loss: 0.8465, Val Accuracy: 70.04%


Epoch 31/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.16it/s, Train Loss=0.785]

Epoch 30/50, Train Loss: 0.7587, Train Accuracy: 72.90%, Val Loss: 0.8371, Val Accuracy: 70.45%


Epoch 32/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.771]

Epoch 31/50, Train Loss: 0.7600, Train Accuracy: 72.74%, Val Loss: 0.8397, Val Accuracy: 70.73%


Epoch 33/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.13it/s, Train Loss=0.794]

Epoch 32/50, Train Loss: 0.7601, Train Accuracy: 72.91%, Val Loss: 0.8519, Val Accuracy: 70.04%


Epoch 34/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.09it/s, Train Loss=0.698]

Epoch 33/50, Train Loss: 0.7589, Train Accuracy: 72.76%, Val Loss: 0.8314, Val Accuracy: 71.01%


Epoch 35/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.793]

Epoch 34/50, Train Loss: 0.7582, Train Accuracy: 73.07%, Val Loss: 0.8460, Val Accuracy: 70.39%


Epoch 36/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.902]

Epoch 35/50, Train Loss: 0.7541, Train Accuracy: 72.79%, Val Loss: 0.8229, Val Accuracy: 71.19%


Epoch 37/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.771]

Epoch 36/50, Train Loss: 0.7535, Train Accuracy: 73.18%, Val Loss: 0.8371, Val Accuracy: 70.87%


Epoch 38/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.15it/s, Train Loss=0.736]

Epoch 37/50, Train Loss: 0.7507, Train Accuracy: 73.25%, Val Loss: 0.8451, Val Accuracy: 70.48%


Epoch 39/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.10it/s, Train Loss=0.843]

Epoch 38/50, Train Loss: 0.7472, Train Accuracy: 73.35%, Val Loss: 0.8295, Val Accuracy: 71.22%


Epoch 40/50:   1%|▎                                                  | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.71]

Epoch 39/50, Train Loss: 0.7493, Train Accuracy: 73.26%, Val Loss: 0.8354, Val Accuracy: 70.13%


Epoch 41/50:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 40/50, Train Loss: 0.7419, Train Accuracy: 73.42%, Val Loss: 0.8033, Val Accuracy: 71.96%


Epoch 42/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.688]

Epoch 41/50, Train Loss: 0.7445, Train Accuracy: 73.18%, Val Loss: 0.8096, Val Accuracy: 71.81%


Epoch 43/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.763]

Epoch 42/50, Train Loss: 0.7463, Train Accuracy: 73.49%, Val Loss: 0.8212, Val Accuracy: 71.12%


Epoch 44/50:   1%|▎                                                  | 1/196 [00:00<00:38,  5.13it/s, Train Loss=0.76]

Epoch 43/50, Train Loss: 0.7412, Train Accuracy: 73.50%, Val Loss: 0.8283, Val Accuracy: 71.04%


Epoch 45/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.801]

Epoch 44/50, Train Loss: 0.7426, Train Accuracy: 73.45%, Val Loss: 0.8413, Val Accuracy: 70.55%


Epoch 46/50:   1%|▎                                                  | 1/196 [00:00<00:37,  5.16it/s, Train Loss=0.64]

Epoch 45/50, Train Loss: 0.7402, Train Accuracy: 73.54%, Val Loss: 0.8341, Val Accuracy: 70.99%


Epoch 47/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.662]

Epoch 46/50, Train Loss: 0.7399, Train Accuracy: 73.41%, Val Loss: 0.8182, Val Accuracy: 71.55%


Epoch 48/50:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 47/50, Train Loss: 0.7373, Train Accuracy: 73.91%, Val Loss: 0.8229, Val Accuracy: 71.44%


Epoch 49/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.755]

Epoch 48/50, Train Loss: 0.7370, Train Accuracy: 73.65%, Val Loss: 0.8591, Val Accuracy: 70.25%


Epoch 50/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.759]

Epoch 49/50, Train Loss: 0.7296, Train Accuracy: 74.25%, Val Loss: 0.8585, Val Accuracy: 70.34%


                                                                                                                      

Epoch 50/50, Train Loss: 0.7340, Train Accuracy: 73.94%, Val Loss: 0.8455, Val Accuracy: 70.55%
models saved to ViT/models/20241004_174316.pth


In [4]:
# 150-200
# Here is the hyperparameters

epochs = 50
patch_size = 8
embed_dim = 256
n_layers = 6
heads = 8
attn_dim = 512
mlp_dim = None # default to 4*embed_dim
pool = 'cls'
dropout = 0.1

model = ViT(image_size=32, patch_size=patch_size, num_classes=10, embed_dim=embed_dim, n_layers=n_layers, heads=heads, attn_dim=attn_dim, mlp_dim=mlp_dim, pool=pool, dropout=dropout)

last_time_str = "20241004_174316"

# load model
model.load_state_dict(torch.load(f"ViT/models/{last_time_str}.pth"))

print(f"models loaded from ViT/models/{last_time_str}.pth")

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), weight_decay=5e-4)

time_str = timestr()

print(f"Time string: {time_str}")

# print the model and the number of parameters
# print(model.transformer)
print(f'The model has {sum(p.numel() for p in model.parameters() if p.requires_grad):,} trainable parameters')

train(epochs=epochs, model=model, optimizer=optimizer, criterion=nn.CrossEntropyLoss(), 
      train_loader=train_loader, val_loader=val_loader, outdir=get_outdir(time_str))

# save model
torch.save(model.state_dict(), f"ViT/models/{time_str}.pth")

print(f"models saved to ViT/models/{time_str}.pth")

Epoch 1/50:   0%|                                                                             | 0/196 [00:00<?, ?it/s]

models loaded from ViT/models/20241004_174316.pth
Time string: 20241004_182219
The model has 6,359,562 trainable parameters


Epoch 2/50:   0%|                                                                             | 0/196 [00:00<?, ?it/s]

Epoch 1/50, Train Loss: 0.7364, Train Accuracy: 73.70%, Val Loss: 0.8635, Val Accuracy: 70.24%


Epoch 3/50:   0%|                                                                             | 0/196 [00:00<?, ?it/s]

Epoch 2/50, Train Loss: 0.7324, Train Accuracy: 73.97%, Val Loss: 0.8493, Val Accuracy: 70.58%


Epoch 4/50:   1%|▎                                                  | 1/196 [00:00<00:37,  5.13it/s, Train Loss=0.799]

Epoch 3/50, Train Loss: 0.7281, Train Accuracy: 74.09%, Val Loss: 0.8581, Val Accuracy: 70.08%


Epoch 5/50:   1%|▎                                                  | 1/196 [00:00<00:38,  5.10it/s, Train Loss=0.754]

Epoch 4/50, Train Loss: 0.7337, Train Accuracy: 73.68%, Val Loss: 0.8612, Val Accuracy: 70.34%


Epoch 6/50:   0%|                                                                             | 0/196 [00:00<?, ?it/s]

Epoch 5/50, Train Loss: 0.7298, Train Accuracy: 73.88%, Val Loss: 0.8138, Val Accuracy: 71.62%


Epoch 7/50:   1%|▎                                                  | 1/196 [00:00<00:38,  5.09it/s, Train Loss=0.675]

Epoch 6/50, Train Loss: 0.7269, Train Accuracy: 74.07%, Val Loss: 0.8690, Val Accuracy: 70.30%


Epoch 8/50:   1%|▎                                                  | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.683]

Epoch 7/50, Train Loss: 0.7250, Train Accuracy: 74.13%, Val Loss: 0.8300, Val Accuracy: 70.81%


Epoch 9/50:   1%|▎                                                  | 1/196 [00:00<00:38,  5.13it/s, Train Loss=0.635]

Epoch 8/50, Train Loss: 0.7227, Train Accuracy: 74.30%, Val Loss: 0.8447, Val Accuracy: 70.78%


Epoch 10/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.07it/s, Train Loss=0.761]

Epoch 9/50, Train Loss: 0.7283, Train Accuracy: 74.11%, Val Loss: 0.8540, Val Accuracy: 70.37%


Epoch 11/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.13it/s, Train Loss=0.754]

Epoch 10/50, Train Loss: 0.7164, Train Accuracy: 74.37%, Val Loss: 0.8421, Val Accuracy: 70.72%


Epoch 12/50:   1%|▎                                                   | 1/196 [00:00<00:38,  5.10it/s, Train Loss=0.7]

Epoch 11/50, Train Loss: 0.7214, Train Accuracy: 74.18%, Val Loss: 0.8540, Val Accuracy: 70.27%


Epoch 13/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.629]

Epoch 12/50, Train Loss: 0.7209, Train Accuracy: 74.21%, Val Loss: 0.8204, Val Accuracy: 71.50%


Epoch 14/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.826]

Epoch 13/50, Train Loss: 0.7149, Train Accuracy: 74.44%, Val Loss: 0.8191, Val Accuracy: 71.16%


Epoch 15/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.09it/s, Train Loss=0.707]

Epoch 14/50, Train Loss: 0.7193, Train Accuracy: 74.30%, Val Loss: 0.8363, Val Accuracy: 71.35%


Epoch 16/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.745]

Epoch 15/50, Train Loss: 0.7168, Train Accuracy: 74.33%, Val Loss: 0.8185, Val Accuracy: 71.48%


Epoch 17/50:   1%|▎                                                  | 1/196 [00:00<00:37,  5.13it/s, Train Loss=0.77]

Epoch 16/50, Train Loss: 0.7081, Train Accuracy: 74.93%, Val Loss: 0.8201, Val Accuracy: 71.64%


Epoch 18/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.15it/s, Train Loss=0.685]

Epoch 17/50, Train Loss: 0.7099, Train Accuracy: 74.64%, Val Loss: 0.8210, Val Accuracy: 71.69%


Epoch 19/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.07it/s, Train Loss=0.786]

Epoch 18/50, Train Loss: 0.7177, Train Accuracy: 74.52%, Val Loss: 0.8271, Val Accuracy: 71.53%


Epoch 20/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.622]

Epoch 19/50, Train Loss: 0.7112, Train Accuracy: 74.39%, Val Loss: 0.8328, Val Accuracy: 71.05%


Epoch 21/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.16it/s, Train Loss=0.804]

Epoch 20/50, Train Loss: 0.7118, Train Accuracy: 74.42%, Val Loss: 0.8459, Val Accuracy: 70.60%


Epoch 22/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.827]

Epoch 21/50, Train Loss: 0.7107, Train Accuracy: 74.64%, Val Loss: 0.8745, Val Accuracy: 70.06%


Epoch 23/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.704]

Epoch 22/50, Train Loss: 0.7107, Train Accuracy: 74.53%, Val Loss: 0.8491, Val Accuracy: 70.68%


Epoch 24/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.683]

Epoch 23/50, Train Loss: 0.7070, Train Accuracy: 74.74%, Val Loss: 0.8794, Val Accuracy: 69.76%


Epoch 25/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.15it/s, Train Loss=0.665]

Epoch 24/50, Train Loss: 0.7033, Train Accuracy: 74.90%, Val Loss: 0.8407, Val Accuracy: 71.12%


Epoch 26/50:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 25/50, Train Loss: 0.7000, Train Accuracy: 74.90%, Val Loss: 0.8102, Val Accuracy: 71.59%


Epoch 27/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.757]

Epoch 26/50, Train Loss: 0.7036, Train Accuracy: 74.83%, Val Loss: 0.8457, Val Accuracy: 71.20%


Epoch 28/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.15it/s, Train Loss=0.721]

Epoch 27/50, Train Loss: 0.7009, Train Accuracy: 74.90%, Val Loss: 0.8654, Val Accuracy: 69.93%


Epoch 29/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.667]

Epoch 28/50, Train Loss: 0.6983, Train Accuracy: 75.15%, Val Loss: 0.8324, Val Accuracy: 71.41%


Epoch 30/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.15it/s, Train Loss=0.722]

Epoch 29/50, Train Loss: 0.6978, Train Accuracy: 75.12%, Val Loss: 0.8421, Val Accuracy: 71.03%


Epoch 31/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.13it/s, Train Loss=0.737]

Epoch 30/50, Train Loss: 0.6989, Train Accuracy: 75.05%, Val Loss: 0.8321, Val Accuracy: 71.48%


Epoch 32/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.10it/s, Train Loss=0.787]

Epoch 31/50, Train Loss: 0.6978, Train Accuracy: 74.84%, Val Loss: 0.8362, Val Accuracy: 71.13%


Epoch 33/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.746]

Epoch 32/50, Train Loss: 0.7025, Train Accuracy: 74.98%, Val Loss: 0.8127, Val Accuracy: 72.16%


Epoch 34/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.637]

Epoch 33/50, Train Loss: 0.6960, Train Accuracy: 75.11%, Val Loss: 0.8302, Val Accuracy: 71.42%


Epoch 35/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.693]

Epoch 34/50, Train Loss: 0.6921, Train Accuracy: 75.36%, Val Loss: 0.8387, Val Accuracy: 70.96%


Epoch 36/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.799]

Epoch 35/50, Train Loss: 0.6906, Train Accuracy: 75.32%, Val Loss: 0.8290, Val Accuracy: 71.53%


Epoch 37/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.15it/s, Train Loss=0.625]

Epoch 36/50, Train Loss: 0.6905, Train Accuracy: 75.34%, Val Loss: 0.8251, Val Accuracy: 71.33%


Epoch 38/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.11it/s, Train Loss=0.673]

Epoch 37/50, Train Loss: 0.6899, Train Accuracy: 75.41%, Val Loss: 0.8159, Val Accuracy: 71.75%


Epoch 39/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.745]

Epoch 38/50, Train Loss: 0.6933, Train Accuracy: 75.19%, Val Loss: 0.8208, Val Accuracy: 71.70%


Epoch 40/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.15it/s, Train Loss=0.622]

Epoch 39/50, Train Loss: 0.6884, Train Accuracy: 75.54%, Val Loss: 0.8193, Val Accuracy: 71.57%


Epoch 41/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.742]

Epoch 40/50, Train Loss: 0.6853, Train Accuracy: 75.52%, Val Loss: 0.8106, Val Accuracy: 71.98%


Epoch 42/50:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 41/50, Train Loss: 0.6857, Train Accuracy: 75.22%, Val Loss: 0.8079, Val Accuracy: 72.32%


Epoch 43/50:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 42/50, Train Loss: 0.6872, Train Accuracy: 75.44%, Val Loss: 0.8058, Val Accuracy: 72.13%


Epoch 44/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.12it/s, Train Loss=0.713]

Epoch 43/50, Train Loss: 0.6877, Train Accuracy: 75.48%, Val Loss: 0.8119, Val Accuracy: 71.79%


Epoch 45/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.15it/s, Train Loss=0.722]

Epoch 44/50, Train Loss: 0.6831, Train Accuracy: 75.63%, Val Loss: 0.8330, Val Accuracy: 71.13%


Epoch 46/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.15it/s, Train Loss=0.662]

Epoch 45/50, Train Loss: 0.6784, Train Accuracy: 75.88%, Val Loss: 0.8311, Val Accuracy: 71.68%


Epoch 47/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.14it/s, Train Loss=0.636]

Epoch 46/50, Train Loss: 0.6837, Train Accuracy: 75.45%, Val Loss: 0.8186, Val Accuracy: 72.21%


Epoch 48/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.13it/s, Train Loss=0.662]

Epoch 47/50, Train Loss: 0.6827, Train Accuracy: 75.55%, Val Loss: 0.8258, Val Accuracy: 71.82%


Epoch 49/50:   1%|▎                                                 | 1/196 [00:00<00:38,  5.10it/s, Train Loss=0.675]

Epoch 48/50, Train Loss: 0.6779, Train Accuracy: 75.69%, Val Loss: 0.8794, Val Accuracy: 69.96%


Epoch 50/50:   1%|▎                                                 | 1/196 [00:00<00:37,  5.15it/s, Train Loss=0.666]

Epoch 49/50, Train Loss: 0.6817, Train Accuracy: 75.68%, Val Loss: 0.8323, Val Accuracy: 71.15%


                                                                                                                      

Epoch 50/50, Train Loss: 0.6719, Train Accuracy: 76.03%, Val Loss: 0.8108, Val Accuracy: 72.18%
models saved to ViT/models/20241004_182219.pth


In [3]:
# 200-300
# hyperparameters

epochs = 100
patch_size = 8
embed_dim = 256
n_layers = 6
heads = 8
attn_dim = 512
mlp_dim = None # default to 4*embed_dim
pool = 'cls'
dropout = 0.1

model = ViT(image_size=32, patch_size=patch_size, num_classes=10, embed_dim=embed_dim, n_layers=n_layers, heads=heads, attn_dim=attn_dim, mlp_dim=mlp_dim, pool=pool, dropout=dropout)

last_time_str = "20241004_182219"

# load model
model.load_state_dict(torch.load(f"ViT/models/{last_time_str}.pth"))

print(f"models loaded from ViT/models/{last_time_str}.pth")

optimizer = torch.optim.Adam(model.parameters(), lr=5e-5, betas=(0.9, 0.999), weight_decay=5e-4)

time_str = timestr()

print(f"Time string: {time_str}")

# print the model and the number of parameters
# print(model.transformer)
print(f'The model has {sum(p.numel() for p in model.parameters() if p.requires_grad):,} trainable parameters')

train(epochs=epochs, model=model, optimizer=optimizer, criterion=nn.CrossEntropyLoss(), 
      train_loader=train_loader, val_loader=val_loader, outdir=get_outdir(time_str))

# save model
torch.save(model.state_dict(), f"ViT/models/{time_str}.pth")

print(f"models saved to ViT/models/{time_str}.pth")

Epoch 1/100:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

models loaded from ViT/models/20241004_182219.pth
Time string: 20241004_190300
The model has 6,359,562 trainable parameters


Epoch 2/100:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 1/100, Train Loss: 0.6596, Train Accuracy: 76.51%, Val Loss: 0.8304, Val Accuracy: 71.82%


Epoch 3/100:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 2/100, Train Loss: 0.6493, Train Accuracy: 76.84%, Val Loss: 0.8198, Val Accuracy: 72.00%


Epoch 4/100:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 3/100, Train Loss: 0.6410, Train Accuracy: 77.14%, Val Loss: 0.8168, Val Accuracy: 72.19%


Epoch 5/100:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 4/100, Train Loss: 0.6470, Train Accuracy: 76.93%, Val Loss: 0.8082, Val Accuracy: 72.43%


Epoch 6/100:   1%|▎                                                 | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.644]

Epoch 5/100, Train Loss: 0.6457, Train Accuracy: 77.00%, Val Loss: 0.8288, Val Accuracy: 71.73%


Epoch 7/100:   0%|                                                                            | 0/196 [00:00<?, ?it/s]

Epoch 6/100, Train Loss: 0.6363, Train Accuracy: 77.08%, Val Loss: 0.8080, Val Accuracy: 72.43%


Epoch 8/100:   1%|▎                                                 | 1/196 [00:00<00:32,  5.97it/s, Train Loss=0.661]

Epoch 7/100, Train Loss: 0.6368, Train Accuracy: 77.16%, Val Loss: 0.8222, Val Accuracy: 72.36%


Epoch 9/100:   1%|▎                                                 | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.564]

Epoch 8/100, Train Loss: 0.6405, Train Accuracy: 76.89%, Val Loss: 0.8212, Val Accuracy: 72.01%


Epoch 10/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.711]

Epoch 9/100, Train Loss: 0.6345, Train Accuracy: 77.20%, Val Loss: 0.8392, Val Accuracy: 71.74%


Epoch 11/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.638]

Epoch 10/100, Train Loss: 0.6373, Train Accuracy: 77.21%, Val Loss: 0.8113, Val Accuracy: 72.58%


Epoch 12/100:   1%|▏                                                | 1/196 [00:00<00:33,  5.91it/s, Train Loss=0.639]

Epoch 11/100, Train Loss: 0.6349, Train Accuracy: 77.16%, Val Loss: 0.8354, Val Accuracy: 71.53%


Epoch 13/100:   1%|▏                                                | 1/196 [00:00<00:33,  5.88it/s, Train Loss=0.563]

Epoch 12/100, Train Loss: 0.6338, Train Accuracy: 77.48%, Val Loss: 0.8290, Val Accuracy: 71.89%


Epoch 14/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.98it/s, Train Loss=0.651]

Epoch 13/100, Train Loss: 0.6336, Train Accuracy: 77.31%, Val Loss: 0.8284, Val Accuracy: 71.84%


Epoch 15/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.563]

Epoch 14/100, Train Loss: 0.6343, Train Accuracy: 77.31%, Val Loss: 0.8265, Val Accuracy: 71.97%


Epoch 16/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.98it/s, Train Loss=0.631]

Epoch 15/100, Train Loss: 0.6298, Train Accuracy: 77.39%, Val Loss: 0.8139, Val Accuracy: 72.38%


Epoch 17/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.92it/s, Train Loss=0.669]

Epoch 16/100, Train Loss: 0.6256, Train Accuracy: 77.61%, Val Loss: 0.8227, Val Accuracy: 72.13%


Epoch 18/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.628]

Epoch 17/100, Train Loss: 0.6287, Train Accuracy: 77.65%, Val Loss: 0.8188, Val Accuracy: 72.43%


Epoch 19/100:   1%|▏                                                | 1/196 [00:00<00:33,  5.90it/s, Train Loss=0.603]

Epoch 18/100, Train Loss: 0.6297, Train Accuracy: 77.57%, Val Loss: 0.8413, Val Accuracy: 71.88%


Epoch 20/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.623]

Epoch 19/100, Train Loss: 0.6316, Train Accuracy: 77.31%, Val Loss: 0.8440, Val Accuracy: 71.44%


Epoch 21/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.95it/s, Train Loss=0.632]

Epoch 20/100, Train Loss: 0.6291, Train Accuracy: 77.43%, Val Loss: 0.8235, Val Accuracy: 72.18%


Epoch 22/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.97it/s, Train Loss=0.707]

Epoch 21/100, Train Loss: 0.6199, Train Accuracy: 77.68%, Val Loss: 0.8387, Val Accuracy: 71.89%


Epoch 23/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.673]

Epoch 22/100, Train Loss: 0.6232, Train Accuracy: 77.61%, Val Loss: 0.8274, Val Accuracy: 71.91%


Epoch 24/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.584]

Epoch 23/100, Train Loss: 0.6253, Train Accuracy: 77.44%, Val Loss: 0.8252, Val Accuracy: 72.11%


Epoch 25/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.621]

Epoch 24/100, Train Loss: 0.6190, Train Accuracy: 77.93%, Val Loss: 0.8106, Val Accuracy: 72.61%


Epoch 26/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.575]

Epoch 25/100, Train Loss: 0.6194, Train Accuracy: 77.71%, Val Loss: 0.8162, Val Accuracy: 72.55%


Epoch 27/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.96it/s, Train Loss=0.619]

Epoch 26/100, Train Loss: 0.6239, Train Accuracy: 77.75%, Val Loss: 0.8269, Val Accuracy: 72.14%


Epoch 28/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.657]

Epoch 27/100, Train Loss: 0.6139, Train Accuracy: 78.15%, Val Loss: 0.8349, Val Accuracy: 71.81%


Epoch 29/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.584]

Epoch 28/100, Train Loss: 0.6194, Train Accuracy: 77.84%, Val Loss: 0.8137, Val Accuracy: 72.64%


Epoch 30/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.615]

Epoch 29/100, Train Loss: 0.6185, Train Accuracy: 77.73%, Val Loss: 0.8303, Val Accuracy: 71.98%


Epoch 31/100:   1%|▎                                                 | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.62]

Epoch 30/100, Train Loss: 0.6141, Train Accuracy: 78.04%, Val Loss: 0.8478, Val Accuracy: 71.68%


Epoch 32/100:   1%|▎                                                 | 1/196 [00:00<00:32,  5.97it/s, Train Loss=0.62]

Epoch 31/100, Train Loss: 0.6147, Train Accuracy: 77.92%, Val Loss: 0.8380, Val Accuracy: 71.88%


Epoch 33/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.97it/s, Train Loss=0.656]

Epoch 32/100, Train Loss: 0.6223, Train Accuracy: 77.51%, Val Loss: 0.8375, Val Accuracy: 71.81%


Epoch 34/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.615]

Epoch 33/100, Train Loss: 0.6195, Train Accuracy: 77.79%, Val Loss: 0.8306, Val Accuracy: 71.79%


Epoch 35/100:   1%|▎                                                 | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.64]

Epoch 34/100, Train Loss: 0.6156, Train Accuracy: 77.94%, Val Loss: 0.8240, Val Accuracy: 72.23%


Epoch 36/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.03it/s, Train Loss=0.689]

Epoch 35/100, Train Loss: 0.6133, Train Accuracy: 77.91%, Val Loss: 0.8358, Val Accuracy: 72.20%


Epoch 37/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.95it/s, Train Loss=0.518]

Epoch 36/100, Train Loss: 0.6069, Train Accuracy: 78.28%, Val Loss: 0.8398, Val Accuracy: 71.88%


Epoch 38/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.595]

Epoch 37/100, Train Loss: 0.6110, Train Accuracy: 78.05%, Val Loss: 0.8333, Val Accuracy: 72.07%


Epoch 39/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.579]

Epoch 38/100, Train Loss: 0.6112, Train Accuracy: 77.92%, Val Loss: 0.8423, Val Accuracy: 71.98%


Epoch 40/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.583]

Epoch 39/100, Train Loss: 0.6102, Train Accuracy: 78.13%, Val Loss: 0.8325, Val Accuracy: 71.93%


Epoch 41/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.591]

Epoch 40/100, Train Loss: 0.6045, Train Accuracy: 78.34%, Val Loss: 0.8214, Val Accuracy: 72.47%


Epoch 42/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.95it/s, Train Loss=0.493]

Epoch 41/100, Train Loss: 0.6082, Train Accuracy: 78.17%, Val Loss: 0.8223, Val Accuracy: 72.57%


Epoch 43/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.97it/s, Train Loss=0.618]

Epoch 42/100, Train Loss: 0.6070, Train Accuracy: 78.35%, Val Loss: 0.8142, Val Accuracy: 72.75%


Epoch 44/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.599]

Epoch 43/100, Train Loss: 0.6037, Train Accuracy: 78.40%, Val Loss: 0.8318, Val Accuracy: 71.98%


Epoch 45/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.587]

Epoch 44/100, Train Loss: 0.6105, Train Accuracy: 78.00%, Val Loss: 0.8333, Val Accuracy: 72.16%


Epoch 46/100:   1%|▏                                                | 1/196 [00:00<00:33,  5.89it/s, Train Loss=0.595]

Epoch 45/100, Train Loss: 0.6057, Train Accuracy: 78.49%, Val Loss: 0.8471, Val Accuracy: 72.00%


Epoch 47/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.585]

Epoch 46/100, Train Loss: 0.6033, Train Accuracy: 78.27%, Val Loss: 0.8355, Val Accuracy: 72.31%


Epoch 48/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.98it/s, Train Loss=0.516]

Epoch 47/100, Train Loss: 0.5957, Train Accuracy: 78.68%, Val Loss: 0.8337, Val Accuracy: 72.24%


Epoch 49/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.95it/s, Train Loss=0.558]

Epoch 48/100, Train Loss: 0.6072, Train Accuracy: 78.12%, Val Loss: 0.8689, Val Accuracy: 71.04%


Epoch 50/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.97it/s, Train Loss=0.575]

Epoch 49/100, Train Loss: 0.6049, Train Accuracy: 78.48%, Val Loss: 0.8443, Val Accuracy: 71.89%


Epoch 51/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.94it/s, Train Loss=0.555]

Epoch 50/100, Train Loss: 0.5995, Train Accuracy: 78.55%, Val Loss: 0.8315, Val Accuracy: 72.03%


Epoch 52/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.97it/s, Train Loss=0.626]

Epoch 51/100, Train Loss: 0.6038, Train Accuracy: 78.34%, Val Loss: 0.8506, Val Accuracy: 72.14%


Epoch 53/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.98it/s, Train Loss=0.661]

Epoch 52/100, Train Loss: 0.6069, Train Accuracy: 78.18%, Val Loss: 0.8877, Val Accuracy: 70.60%


Epoch 54/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.96it/s, Train Loss=0.639]

Epoch 53/100, Train Loss: 0.6050, Train Accuracy: 78.50%, Val Loss: 0.8589, Val Accuracy: 71.51%


Epoch 55/100:   1%|▎                                                 | 1/196 [00:00<00:32,  5.96it/s, Train Loss=0.57]

Epoch 54/100, Train Loss: 0.6068, Train Accuracy: 78.40%, Val Loss: 0.8591, Val Accuracy: 71.61%


Epoch 56/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.517]

Epoch 55/100, Train Loss: 0.6046, Train Accuracy: 78.34%, Val Loss: 0.8469, Val Accuracy: 72.10%


Epoch 57/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.584]

Epoch 56/100, Train Loss: 0.6019, Train Accuracy: 78.52%, Val Loss: 0.8264, Val Accuracy: 72.33%


Epoch 58/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.496]

Epoch 57/100, Train Loss: 0.5939, Train Accuracy: 78.45%, Val Loss: 0.8424, Val Accuracy: 72.06%


Epoch 59/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.606]

Epoch 58/100, Train Loss: 0.6010, Train Accuracy: 78.49%, Val Loss: 0.8282, Val Accuracy: 72.53%


Epoch 60/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.02it/s, Train Loss=0.647]

Epoch 59/100, Train Loss: 0.6041, Train Accuracy: 78.25%, Val Loss: 0.8360, Val Accuracy: 72.32%


Epoch 61/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.638]

Epoch 60/100, Train Loss: 0.6000, Train Accuracy: 78.30%, Val Loss: 0.8454, Val Accuracy: 72.05%


Epoch 62/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.587]

Epoch 61/100, Train Loss: 0.5996, Train Accuracy: 78.43%, Val Loss: 0.8204, Val Accuracy: 72.83%


Epoch 63/100:   1%|▏                                                | 1/196 [00:00<00:33,  5.86it/s, Train Loss=0.544]

Epoch 62/100, Train Loss: 0.5957, Train Accuracy: 78.62%, Val Loss: 0.8224, Val Accuracy: 72.87%


Epoch 64/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.95it/s, Train Loss=0.514]

Epoch 63/100, Train Loss: 0.5983, Train Accuracy: 78.62%, Val Loss: 0.8248, Val Accuracy: 72.51%


Epoch 65/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.94it/s, Train Loss=0.508]

Epoch 64/100, Train Loss: 0.5966, Train Accuracy: 78.48%, Val Loss: 0.8238, Val Accuracy: 72.45%


Epoch 66/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.617]

Epoch 65/100, Train Loss: 0.5965, Train Accuracy: 78.44%, Val Loss: 0.8408, Val Accuracy: 71.80%


Epoch 67/100:   1%|▎                                                 | 1/196 [00:00<00:32,  5.95it/s, Train Loss=0.62]

Epoch 66/100, Train Loss: 0.5973, Train Accuracy: 78.58%, Val Loss: 0.8255, Val Accuracy: 72.42%


Epoch 68/100:   1%|▏                                                | 1/196 [00:00<00:33,  5.88it/s, Train Loss=0.698]

Epoch 67/100, Train Loss: 0.5920, Train Accuracy: 78.70%, Val Loss: 0.8312, Val Accuracy: 72.22%


Epoch 69/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.583]

Epoch 68/100, Train Loss: 0.5963, Train Accuracy: 78.58%, Val Loss: 0.8210, Val Accuracy: 72.56%


Epoch 70/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.97it/s, Train Loss=0.592]

Epoch 69/100, Train Loss: 0.5887, Train Accuracy: 78.96%, Val Loss: 0.8082, Val Accuracy: 73.14%


Epoch 71/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.621]

Epoch 70/100, Train Loss: 0.5927, Train Accuracy: 78.73%, Val Loss: 0.8392, Val Accuracy: 72.00%


Epoch 72/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.98it/s, Train Loss=0.584]

Epoch 71/100, Train Loss: 0.5908, Train Accuracy: 78.71%, Val Loss: 0.8206, Val Accuracy: 72.43%


Epoch 73/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.98it/s, Train Loss=0.568]

Epoch 72/100, Train Loss: 0.5879, Train Accuracy: 78.83%, Val Loss: 0.8249, Val Accuracy: 72.79%


Epoch 74/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.578]

Epoch 73/100, Train Loss: 0.5896, Train Accuracy: 78.74%, Val Loss: 0.8198, Val Accuracy: 72.81%


Epoch 75/100:   1%|▏                                                | 1/196 [00:00<00:33,  5.81it/s, Train Loss=0.581]

Epoch 74/100, Train Loss: 0.5909, Train Accuracy: 78.95%, Val Loss: 0.8384, Val Accuracy: 72.18%


Epoch 76/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.535]

Epoch 75/100, Train Loss: 0.5867, Train Accuracy: 78.70%, Val Loss: 0.8282, Val Accuracy: 72.29%


Epoch 77/100:   1%|▎                                                 | 1/196 [00:00<00:32,  5.91it/s, Train Loss=0.54]

Epoch 76/100, Train Loss: 0.5903, Train Accuracy: 78.81%, Val Loss: 0.8260, Val Accuracy: 72.62%


Epoch 78/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.569]

Epoch 77/100, Train Loss: 0.5869, Train Accuracy: 78.90%, Val Loss: 0.8302, Val Accuracy: 72.52%


Epoch 79/100:   1%|▎                                                 | 1/196 [00:00<00:32,  5.95it/s, Train Loss=0.54]

Epoch 78/100, Train Loss: 0.5892, Train Accuracy: 78.85%, Val Loss: 0.8485, Val Accuracy: 71.68%


Epoch 80/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.553]

Epoch 79/100, Train Loss: 0.5833, Train Accuracy: 78.98%, Val Loss: 0.8423, Val Accuracy: 71.96%


Epoch 81/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.95it/s, Train Loss=0.622]

Epoch 80/100, Train Loss: 0.5836, Train Accuracy: 78.85%, Val Loss: 0.8678, Val Accuracy: 71.45%


Epoch 82/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.553]

Epoch 81/100, Train Loss: 0.5804, Train Accuracy: 79.18%, Val Loss: 0.8437, Val Accuracy: 72.07%


Epoch 83/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.555]

Epoch 82/100, Train Loss: 0.5881, Train Accuracy: 78.93%, Val Loss: 0.8231, Val Accuracy: 72.41%


Epoch 84/100:   1%|▎                                                 | 1/196 [00:00<00:33,  5.85it/s, Train Loss=0.63]

Epoch 83/100, Train Loss: 0.5840, Train Accuracy: 79.09%, Val Loss: 0.8432, Val Accuracy: 72.09%


Epoch 85/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.596]

Epoch 84/100, Train Loss: 0.5790, Train Accuracy: 79.24%, Val Loss: 0.8324, Val Accuracy: 72.50%


Epoch 86/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.586]

Epoch 85/100, Train Loss: 0.5769, Train Accuracy: 79.31%, Val Loss: 0.8352, Val Accuracy: 72.49%


Epoch 87/100:   1%|▎                                                 | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.61]

Epoch 86/100, Train Loss: 0.5801, Train Accuracy: 79.19%, Val Loss: 0.8260, Val Accuracy: 72.55%


Epoch 88/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.94it/s, Train Loss=0.545]

Epoch 87/100, Train Loss: 0.5779, Train Accuracy: 79.11%, Val Loss: 0.8451, Val Accuracy: 72.40%


Epoch 89/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.535]

Epoch 88/100, Train Loss: 0.5761, Train Accuracy: 79.26%, Val Loss: 0.8353, Val Accuracy: 72.06%


Epoch 90/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.508]

Epoch 89/100, Train Loss: 0.5723, Train Accuracy: 79.43%, Val Loss: 0.8322, Val Accuracy: 72.27%


Epoch 91/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.93it/s, Train Loss=0.551]

Epoch 90/100, Train Loss: 0.5783, Train Accuracy: 79.36%, Val Loss: 0.8188, Val Accuracy: 72.62%


Epoch 92/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.00it/s, Train Loss=0.568]

Epoch 91/100, Train Loss: 0.5797, Train Accuracy: 79.12%, Val Loss: 0.8432, Val Accuracy: 72.12%


Epoch 93/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.02it/s, Train Loss=0.454]

Epoch 92/100, Train Loss: 0.5714, Train Accuracy: 79.33%, Val Loss: 0.8442, Val Accuracy: 72.40%


Epoch 94/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.557]

Epoch 93/100, Train Loss: 0.5766, Train Accuracy: 79.24%, Val Loss: 0.8465, Val Accuracy: 72.24%


Epoch 95/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.99it/s, Train Loss=0.469]

Epoch 94/100, Train Loss: 0.5766, Train Accuracy: 79.47%, Val Loss: 0.8338, Val Accuracy: 72.48%


Epoch 96/100:   1%|▏                                                | 1/196 [00:00<00:32,  5.98it/s, Train Loss=0.547]

Epoch 95/100, Train Loss: 0.5706, Train Accuracy: 79.57%, Val Loss: 0.8438, Val Accuracy: 72.08%


Epoch 97/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.512]

Epoch 96/100, Train Loss: 0.5699, Train Accuracy: 79.60%, Val Loss: 0.8484, Val Accuracy: 71.91%


Epoch 98/100:   1%|▎                                                  | 1/196 [00:00<00:33,  5.82it/s, Train Loss=0.5]

Epoch 97/100, Train Loss: 0.5679, Train Accuracy: 79.70%, Val Loss: 0.8330, Val Accuracy: 72.52%


Epoch 99/100:   1%|▏                                                | 1/196 [00:00<00:32,  6.01it/s, Train Loss=0.543]

Epoch 98/100, Train Loss: 0.5775, Train Accuracy: 79.17%, Val Loss: 0.8705, Val Accuracy: 71.33%


Epoch 100/100:   1%|▏                                               | 1/196 [00:00<00:32,  5.96it/s, Train Loss=0.597]

Epoch 99/100, Train Loss: 0.5662, Train Accuracy: 79.66%, Val Loss: 0.8318, Val Accuracy: 72.43%


                                                                                                                      

Epoch 100/100, Train Loss: 0.5690, Train Accuracy: 79.69%, Val Loss: 0.8407, Val Accuracy: 72.25%
models saved to ViT/models/20241004_190300.pth


Here is the model:

```bash
AttentionLayers(
  (layers): ModuleList(
    (0): ModuleList(
      (0): ModuleList(
        (0): LayerNorm(
          (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
        )
        (1): None
        (2): None
      )
      (1): Attention(
        (to_q): Linear(in_features=256, out_features=512, bias=False)
        (to_k): Linear(in_features=256, out_features=512, bias=False)
        (to_v): Linear(in_features=256, out_features=512, bias=False)
        (attend): Attend(
          (attn_dropout): Dropout(p=0.0, inplace=False)
        )
        (to_out): Linear(in_features=512, out_features=256, bias=False)
      )
      (2): Residual()
    )
    (1): ModuleList(
      (0): ModuleList(
        (0): LayerNorm(
          (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
        )
        (1): None
        (2): None
      )
      (1): FeedForward(
        (ff): Sequential(
          (0): Sequential(
            (0): Linear(in_features=256, out_features=1024, bias=True)
            (1): MyGELU()
          )
          (1): Dropout(p=0.0, inplace=False)
          (2): Linear(in_features=1024, out_features=256, bias=True)
        )
      )
      (2): Residual()
    )
    (2): ModuleList(
      (0): ModuleList(
        (0): LayerNorm(
          (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
        )
        (1): None
        (2): None
      )
      (1): Attention(
        (to_q): Linear(in_features=256, out_features=512, bias=False)
        (to_k): Linear(in_features=256, out_features=512, bias=False)
        (to_v): Linear(in_features=256, out_features=512, bias=False)
        (attend): Attend(
          (attn_dropout): Dropout(p=0.0, inplace=False)
        )
        (to_out): Linear(in_features=512, out_features=256, bias=False)
      )
      (2): Residual()
    )
    (3): ModuleList(
      (0): ModuleList(
        (0): LayerNorm(
          (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
        )
        (1): None
        (2): None
      )
      (1): FeedForward(
        (ff): Sequential(
          (0): Sequential(
            (0): Linear(in_features=256, out_features=1024, bias=True)
            (1): MyGELU()
          )
          (1): Dropout(p=0.0, inplace=False)
          (2): Linear(in_features=1024, out_features=256, bias=True)
        )
      )
      (2): Residual()
    )
    (4): ModuleList(
      (0): ModuleList(
        (0): LayerNorm(
          (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
        )
        (1): None
        (2): None
      )
      (1): Attention(
        (to_q): Linear(in_features=256, out_features=512, bias=False)
        (to_k): Linear(in_features=256, out_features=512, bias=False)
        (to_v): Linear(in_features=256, out_features=512, bias=False)
        (attend): Attend(
          (attn_dropout): Dropout(p=0.0, inplace=False)
        )
        (to_out): Linear(in_features=512, out_features=256, bias=False)
      )
      (2): Residual()
    )
    (5): ModuleList(
      (0): ModuleList(
        (0): LayerNorm(
          (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
        )
        (1): None
        (2): None
      )
      (1): FeedForward(
        (ff): Sequential(
          (0): Sequential(
            (0): Linear(in_features=256, out_features=1024, bias=True)
            (1): MyGELU()
          )
          (1): Dropout(p=0.0, inplace=False)
          (2): Linear(in_features=1024, out_features=256, bias=True)
        )
      )
      (2): Residual()
    )
    (6): ModuleList(
      (0): ModuleList(
        (0): LayerNorm(
          (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
        )
        (1): None
        (2): None
      )
      (1): Attention(
        (to_q): Linear(in_features=256, out_features=512, bias=False)
        (to_k): Linear(in_features=256, out_features=512, bias=False)
        (to_v): Linear(in_features=256, out_features=512, bias=False)
        (attend): Attend(
          (attn_dropout): Dropout(p=0.0, inplace=False)
        )
        (to_out): Linear(in_features=512, out_features=256, bias=False)
      )
      (2): Residual()
    )
    (7): ModuleList(
      (0): ModuleList(
        (0): LayerNorm(
          (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
        )
        (1): None
        (2): None
      )
      (1): FeedForward(
        (ff): Sequential(
          (0): Sequential(
            (0): Linear(in_features=256, out_features=1024, bias=True)
            (1): MyGELU()
          )
          (1): Dropout(p=0.0, inplace=False)
          (2): Linear(in_features=1024, out_features=256, bias=True)
        )
      )
      (2): Residual()
    )
    (8): ModuleList(
      (0): ModuleList(
        (0): LayerNorm(
          (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
        )
        (1): None
        (2): None
      )
      (1): Attention(
        (to_q): Linear(in_features=256, out_features=512, bias=False)
        (to_k): Linear(in_features=256, out_features=512, bias=False)
        (to_v): Linear(in_features=256, out_features=512, bias=False)
        (attend): Attend(
          (attn_dropout): Dropout(p=0.0, inplace=False)
        )
        (to_out): Linear(in_features=512, out_features=256, bias=False)
      )
      (2): Residual()
    )
    (9): ModuleList(
      (0): ModuleList(
        (0): LayerNorm(
          (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
        )
        (1): None
        (2): None
      )
      (1): FeedForward(
        (ff): Sequential(
          (0): Sequential(
            (0): Linear(in_features=256, out_features=1024, bias=True)
            (1): MyGELU()
          )
          (1): Dropout(p=0.0, inplace=False)
          (2): Linear(in_features=1024, out_features=256, bias=True)
        )
      )
      (2): Residual()
    )
    (10): ModuleList(
      (0): ModuleList(
        (0): LayerNorm(
          (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
        )
        (1): None
        (2): None
      )
      (1): Attention(
        (to_q): Linear(in_features=256, out_features=512, bias=False)
        (to_k): Linear(in_features=256, out_features=512, bias=False)
        (to_v): Linear(in_features=256, out_features=512, bias=False)
        (attend): Attend(
          (attn_dropout): Dropout(p=0.0, inplace=False)
        )
        (to_out): Linear(in_features=512, out_features=256, bias=False)
      )
      (2): Residual()
    )
    (11): ModuleList(
      (0): ModuleList(
        (0): LayerNorm(
          (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
        )
        (1): None
        (2): None
      )
      (1): FeedForward(
        (ff): Sequential(
          (0): Sequential(
            (0): Linear(in_features=256, out_features=1024, bias=True)
            (1): MyGELU()
          )
          (1): Dropout(p=0.0, inplace=False)
          (2): Linear(in_features=1024, out_features=256, bias=True)
        )
      )
      (2): Residual()
    )
  )
  (adaptive_mlp): Identity()
  (final_norm): LayerNorm(
    (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=False)
  )
  (skip_combines): ModuleList(
    (0): None
    (1): None
    (2): None
    (3): None
    (4): None
    (5): None
    (6): None
    (7): None
    (8): None
    (9): None
    (10): None
    (11): None
  )
)

```