In [1]:
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import torch
import torch.nn.init as init

import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import random_split, Dataset, DataLoader
from model import ViT
from torch.optim import lr_scheduler

from tqdm import tqdm
import numpy as np
from random import randint
import random
import warmup_scheduler


In [2]:
%env CUDA_VISIBLE_DEVICES=1

env: CUDA_VISIBLE_DEVICES=1


In [3]:
hp = {
    'batch_size': 256,
    'num_workers': 4,
    'seed': 42
}
seed = hp['seed']
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

In [4]:
cifar10_mean = [0.4914, 0.4822, 0.4465]
cifar10_std = [0.2470, 0.2435, 0.2616]

cifar10_path = '../data/vision/cifar10/'
cifar10_train_transform = transforms.Compose(
            [
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(cifar10_mean, cifar10_std),
            ]
        )
cifar10_test_transform = transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize(cifar10_mean, cifar10_std),
            ]
        )
cifar10_train = CIFAR10(cifar10_path, train=True, transform=cifar10_train_transform)
cifar10_test = CIFAR10(cifar10_path, train=False, transform=cifar10_test_transform)

batch_size = hp['batch_size']
num_workers = hp['num_workers']
train_dataloader = DataLoader(cifar10_train, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True)
val_dataloader = DataLoader(cifar10_test, batch_size=512, shuffle=False, num_workers=num_workers, drop_last=False)

In [5]:
def train(model, train_dataloader, val_dataloader, optimizer, scheduler, loss_fn, epochs):
    optimizer.zero_grad()
    optimizer.step()
    for epoch in range(epochs):    
        scheduler.step(epoch + 1)
        train_epoch(model, train_dataloader, optimizer, loss_fn)
        val_epoch(model, val_dataloader, loss_fn)


In [6]:
def train_epoch(model, train_dataloader, optimizer, loss_fn):
    # step variable
    epoch_total_loss = 0
    epoch_steps = 0
    sample_size = 0
    correct = 0
    
    patch_num = 64
    #set train mode
    model.train()
    for batch_idx, (inputs, labels) in tqdm(enumerate(train_dataloader)):
        # backprop
        optimizer.zero_grad()
        inputs, labels = inputs.cuda(), labels.cuda()
        if model.joint:
            outputs = model(inputs[::2], inputs[1::2])
            labels = labels.repeat_interleave(patch_num)
        else:
            outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        sample_size += labels.size(0)
        
        # update step
        epoch_total_loss += loss.item()
        epoch_steps += 1
        # change learning rate


    epoch_loss = epoch_total_loss / epoch_steps
    accuracy = correct / sample_size

    print(f"learning_rate ${optimizer.param_groups[0]['lr']}")
    print(f"train_epoch_loss ${epoch_loss}")
    print(f"training_accuracy ${accuracy}")
    return epoch_steps

In [7]:
def val_epoch(model, val_dataloader, loss_fn):
    # step_variable
    epoch_total_loss = 0
    epoch_steps = 0
    sample_size = 0
    correct = 0
    
    # set eval mode
    model.eval()
    for batch_idx, (inputs, labels) in tqdm(enumerate(val_dataloader, 0)):
        with torch.no_grad():
            # inference
            inputs, labels = inputs.cuda(), labels.cuda()
            if model.joint:
                outputs = model(inputs, inputs)
            else:
                outputs = model(inputs)
            loss = loss_fn(outputs, labels)

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            sample_size += labels.size(0)
            epoch_total_loss += loss.item()
            epoch_steps += 1
    epoch_loss = epoch_total_loss / epoch_steps
    accuracy = correct / sample_size
    print(f"validation_loss ${epoch_loss}")
    print(f"accuracy ${accuracy}")

In [8]:
d_model = 128
head = 8
d_ff = 1024
dropout = 0
encoder_layers = nn.TransformerEncoderLayer(d_model, head, d_ff, dropout, batch_first=True)
transformer = nn.TransformerEncoder(encoder_layers, 6)
model = ViT(32, 4, d_model, transformer, 10, joint=True)
model.cuda()

ViT(
  (to_patch_embedding): Sequential(
    (0): Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=4, p2=4)
    (1): Linear(in_features=48, out_features=128, bias=True)
  )
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=1024, bias=True)
        (dropout): Dropout(p=0, inplace=False)
        (linear2): Linear(in_features=1024, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0, inplace=False)
        (dropout2): Dropout(p=0, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(i

In [9]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
warm_up_epoch = 5
epochs = 200
base_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
scheduler = warmup_scheduler.GradualWarmupScheduler(optimizer, multiplier=1., total_epoch=warm_up_epoch, after_scheduler=base_scheduler)

train(model, train_dataloader, val_dataloader, optimizer, scheduler, loss_fn, epochs)

195it [00:08, 23.33it/s]

learning_rate $0.0002
train_epoch_loss $2.1525448041084485
training_accuracy $0.1882114508213141



20it [00:01, 18.83it/s]

validation_loss $1.900147420167923
accuracy $0.2964



195it [00:07, 25.03it/s]

learning_rate $0.0004
train_epoch_loss $2.0079096659635884
training_accuracy $0.2535797901642628



20it [00:01, 18.18it/s]

validation_loss $1.6539156556129455
accuracy $0.4032



195it [00:07, 25.02it/s]

learning_rate $0.0006
train_epoch_loss $1.8778447365149473
training_accuracy $0.3151580028044872



20it [00:01, 18.86it/s]

validation_loss $1.5032204568386078
accuracy $0.4564



195it [00:07, 25.43it/s]

learning_rate $0.0008
train_epoch_loss $1.7841795841852823
training_accuracy $0.3523953951322115



20it [00:01, 18.53it/s]

validation_loss $1.4066085636615753
accuracy $0.4974



195it [00:07, 24.97it/s]

learning_rate $0.001
train_epoch_loss $1.7033429255852333
training_accuracy $0.38549241286057695



20it [00:01, 18.92it/s]

validation_loss $1.3298874497413635
accuracy $0.5277



195it [00:07, 24.79it/s]

learning_rate $0.001
train_epoch_loss $1.6290871393986237
training_accuracy $0.4156193659855769



20it [00:01, 18.71it/s]

validation_loss $1.2514374613761903
accuracy $0.5571



195it [00:07, 24.86it/s]

learning_rate $0.0009997532801828658
train_epoch_loss $1.5611856368871835
training_accuracy $0.4394712790464744



20it [00:01, 18.66it/s]

validation_loss $1.2430391907691956
accuracy $0.5502



195it [00:07, 24.86it/s]

learning_rate $0.0009994449374809851
train_epoch_loss $1.497345712246039
training_accuracy $0.4638696915064103



20it [00:01, 18.66it/s]

validation_loss $1.1891478657722474
accuracy $0.5693



195it [00:07, 25.20it/s]

learning_rate $0.0009990133642141358
train_epoch_loss $1.4304868661440335
training_accuracy $0.4887297801482372



20it [00:01, 18.68it/s]

validation_loss $1.095260626077652
accuracy $0.6121



195it [00:07, 25.13it/s]

learning_rate $0.000998458666866564
train_epoch_loss $1.3651418019563724
training_accuracy $0.5144935021033654



20it [00:01, 17.89it/s]

validation_loss $1.043459850549698
accuracy $0.635



195it [00:07, 24.79it/s]

learning_rate $0.00099778098230154
train_epoch_loss $1.3003122922701713
training_accuracy $0.5385494916866987



20it [00:01, 18.34it/s]

validation_loss $1.0019808083772659
accuracy $0.6453



195it [00:07, 24.86it/s]

learning_rate $0.0009969804777275899
train_epoch_loss $1.2547806990452302
training_accuracy $0.5554696890024039



20it [00:01, 18.67it/s]

validation_loss $0.979158130288124
accuracy $0.6518



195it [00:07, 25.25it/s]

learning_rate $0.000996057350657239
train_epoch_loss $1.1946204191599137
training_accuracy $0.5762391701722757



20it [00:01, 18.85it/s]

validation_loss $0.9700902730226517
accuracy $0.656



195it [00:07, 24.72it/s]

learning_rate $0.0009950118288582787
train_epoch_loss $1.1648724271700932
training_accuracy $0.5874496068709936



20it [00:01, 18.53it/s]

validation_loss $0.916499200463295
accuracy $0.679



195it [00:07, 24.77it/s]

learning_rate $0.0009938441702975688
train_epoch_loss $1.1247608890900245
training_accuracy $0.6008216271033654



20it [00:01, 18.81it/s]

validation_loss $0.9053264290094376
accuracy $0.6818



195it [00:07, 24.72it/s]

learning_rate $0.000992554663077387
train_epoch_loss $1.0850454666675666
training_accuracy $0.6157830654046474



20it [00:01, 18.66it/s]

validation_loss $0.8564016819000244
accuracy $0.7003



195it [00:07, 24.84it/s]

learning_rate $0.0009911436253643444
train_epoch_loss $1.059719108312558
training_accuracy $0.6276914938902244



20it [00:01, 17.91it/s]

validation_loss $0.8656766444444657
accuracy $0.6969



195it [00:07, 24.82it/s]

learning_rate $0.000989611405310883
train_epoch_loss $1.0222633126454475
training_accuracy $0.641928961338141



20it [00:01, 17.77it/s]

validation_loss $0.8077051639556885
accuracy $0.7115



195it [00:07, 25.00it/s]

learning_rate $0.0009879583809693738
train_epoch_loss $0.9995534878510696
training_accuracy $0.6474371494391026



20it [00:01, 18.13it/s]

validation_loss $0.7848932027816773
accuracy $0.7183



195it [00:07, 25.11it/s]

learning_rate $0.0009861849601988384
train_epoch_loss $0.9722754738269708
training_accuracy $0.6575912084334936



20it [00:01, 18.56it/s]

validation_loss $0.7670422285795212
accuracy $0.7321



195it [00:07, 24.80it/s]

learning_rate $0.0009842915805643156
train_epoch_loss $0.9536897867153853
training_accuracy $0.6665899814703525



20it [00:01, 18.28it/s]

validation_loss $0.7828573495149612
accuracy $0.7255



195it [00:07, 24.73it/s]

learning_rate $0.000982278709228899
train_epoch_loss $0.9256336065439078
training_accuracy $0.6746694711538461



20it [00:01, 18.75it/s]

validation_loss $0.7505470544099808
accuracy $0.7392



195it [00:07, 24.84it/s]

learning_rate $0.0009801468428384716
train_epoch_loss $0.9056741549418523
training_accuracy $0.6823429987980769



20it [00:01, 18.77it/s]

validation_loss $0.7628728449344635
accuracy $0.7321



195it [00:07, 24.76it/s]

learning_rate $0.000977896507399165
train_epoch_loss $0.8861494119350727
training_accuracy $0.6881332006209936



20it [00:01, 18.33it/s]

validation_loss $0.7335612565279007
accuracy $0.7472



195it [00:07, 24.78it/s]

learning_rate $0.0009755282581475768
train_epoch_loss $0.8630534159831512
training_accuracy $0.6962705954527244



20it [00:01, 18.73it/s]

validation_loss $0.7185164213180542
accuracy $0.7483



195it [00:07, 24.89it/s]

learning_rate $0.0009730426794137727
train_epoch_loss $0.8523476346945151
training_accuracy $0.7015139848758013



20it [00:01, 18.83it/s]

validation_loss $0.7023708820343018
accuracy $0.7583



195it [00:07, 24.76it/s]

learning_rate $0.0009704403844771128
train_epoch_loss $0.8338240522604722
training_accuracy $0.7074347080328526



20it [00:01, 18.37it/s]

validation_loss $0.7115458577871323
accuracy $0.7501



195it [00:07, 24.88it/s]

learning_rate $0.0009677220154149337
train_epoch_loss $0.817293322391999
training_accuracy $0.7119970077123398



20it [00:01, 18.45it/s]

validation_loss $0.7483649641275406
accuracy $0.7386



195it [00:07, 24.87it/s]

learning_rate $0.0009648882429441257
train_epoch_loss $0.8069098215836745
training_accuracy $0.7174156775841346



20it [00:01, 18.59it/s]

validation_loss $0.6793388664722443
accuracy $0.7602



195it [00:07, 24.87it/s]

learning_rate $0.0009619397662556434
train_epoch_loss $0.7852089133018102
training_accuracy $0.7242215670072115



20it [00:01, 18.48it/s]

validation_loss $0.680464568734169
accuracy $0.7639



195it [00:07, 24.76it/s]

learning_rate $0.0009588773128419905
train_epoch_loss $0.7739388563694098
training_accuracy $0.7286990434695513



20it [00:01, 18.24it/s]

validation_loss $0.6952609628438949
accuracy $0.7521



195it [00:07, 24.74it/s]

learning_rate $0.0009557016383177226
train_epoch_loss $0.7567717014214932
training_accuracy $0.7352169721554487



20it [00:01, 18.62it/s]

validation_loss $0.6778911888599396
accuracy $0.7643



195it [00:07, 25.15it/s]

learning_rate $0.0009524135262330098
train_epoch_loss $0.7466777936006204
training_accuracy $0.7379845252403846



20it [00:01, 18.16it/s]

validation_loss $0.6570190697908401
accuracy $0.7726



195it [00:07, 25.09it/s]

learning_rate $0.0009490137878803078
train_epoch_loss $0.7377187493519906
training_accuracy $0.7417596279046474



20it [00:01, 18.63it/s]

validation_loss $0.6591443657875061
accuracy $0.7691



195it [00:07, 24.82it/s]

learning_rate $0.0009455032620941839
train_epoch_loss $0.7147708088923723
training_accuracy $0.7479388897235577



20it [00:01, 18.62it/s]

validation_loss $0.6491044521331787
accuracy $0.7742



195it [00:07, 24.95it/s]

learning_rate $0.0009418828150443468
train_epoch_loss $0.7070148394658016
training_accuracy $0.7512056790865385



20it [00:01, 18.76it/s]

validation_loss $0.6719990313053131
accuracy $0.7601



195it [00:07, 24.91it/s]

learning_rate $0.0009381533400219318
train_epoch_loss $0.6979235389293769
training_accuracy $0.7560590695112179



20it [00:01, 18.53it/s]

validation_loss $0.6522950142621994
accuracy $0.7748



195it [00:07, 24.82it/s]

learning_rate $0.0009343157572190957
train_epoch_loss $0.6784699687590966
training_accuracy $0.7624489808693911



20it [00:01, 18.72it/s]

validation_loss $0.6440980076789856
accuracy $0.7785



195it [00:07, 24.85it/s]

learning_rate $0.0009303710135019718
train_epoch_loss $0.6749998780397268
training_accuracy $0.7634170923477565



20it [00:01, 18.46it/s]

validation_loss $0.6386095464229584
accuracy $0.7785



195it [00:07, 24.94it/s]

learning_rate $0.0009263200821770461
train_epoch_loss $0.6629488819684738
training_accuracy $0.7663808969350961



20it [00:01, 18.94it/s]

validation_loss $0.5938820153474808
accuracy $0.7916



195it [00:07, 24.95it/s]

learning_rate $0.0009221639627510075
train_epoch_loss $0.6448254565397898
training_accuracy $0.7725995968549679



20it [00:01, 18.34it/s]

validation_loss $0.6043926835060119
accuracy $0.7907



195it [00:07, 24.82it/s]

learning_rate $0.0009179036806841352
train_epoch_loss $0.6421619430566445
training_accuracy $0.7757743639823718



20it [00:01, 17.97it/s]

validation_loss $0.5988600820302963
accuracy $0.7922



195it [00:07, 24.77it/s]

learning_rate $0.0009135402871372809
train_epoch_loss $0.630479348011506
training_accuracy $0.7784232897636217



20it [00:01, 18.58it/s]

validation_loss $0.5725470155477523
accuracy $0.8012



195it [00:07, 24.79it/s]

learning_rate $0.0009090748587125117
train_epoch_loss $0.6218714604010949
training_accuracy $0.7821310972556089



20it [00:01, 18.68it/s]

validation_loss $0.5903392910957337
accuracy $0.7938



195it [00:07, 24.77it/s]

learning_rate $0.0009045084971874737
train_epoch_loss $0.6043912889101566
training_accuracy $0.7875550881410256



20it [00:01, 18.76it/s]

validation_loss $0.6029755935072899
accuracy $0.7874



195it [00:07, 24.74it/s]

learning_rate $0.0008998423292435454
train_epoch_loss $0.5892959551933484
training_accuracy $0.791748046875



20it [00:01, 18.47it/s]

validation_loss $0.58031897097826
accuracy $0.7994



195it [00:07, 24.76it/s]

learning_rate $0.0008950775061878451
train_epoch_loss $0.5843390016983717
training_accuracy $0.7954984224759616



20it [00:01, 18.11it/s]

validation_loss $0.5982825085520744
accuracy $0.7901



195it [00:07, 24.82it/s]

learning_rate $0.0008902152036691648
train_epoch_loss $0.5675200060392037
training_accuracy $0.8014554537259615



20it [00:01, 18.73it/s]

validation_loss $0.5944332152605056
accuracy $0.7945



195it [00:07, 24.91it/s]

learning_rate $0.0008852566213878947
train_epoch_loss $0.567035837815358
training_accuracy $0.8009318033854167



20it [00:01, 18.52it/s]

validation_loss $0.5680241569876671
accuracy $0.8076



195it [00:07, 24.76it/s]

learning_rate $0.0008802029828000156
train_epoch_loss $0.5474141136193886
training_accuracy $0.8087508764022436



20it [00:01, 18.70it/s]

validation_loss $0.5750898867845535
accuracy $0.8063



195it [00:07, 24.79it/s]

learning_rate $0.0008750555348152298
train_epoch_loss $0.5431211615220094
training_accuracy $0.8083023462540064



20it [00:01, 18.56it/s]

validation_loss $0.5738379582762718
accuracy $0.8007



195it [00:07, 24.78it/s]

learning_rate $0.0008698155474893048
train_epoch_loss $0.5282512912383446
training_accuracy $0.8147917918669871



20it [00:01, 18.47it/s]

validation_loss $0.5786044418811798
accuracy $0.8041



195it [00:07, 24.80it/s]

learning_rate $0.0008644843137107057
train_epoch_loss $0.5155094779454745
training_accuracy $0.8181139823717949



20it [00:01, 18.35it/s]

validation_loss $0.5639088720083236
accuracy $0.8124



195it [00:07, 24.64it/s]

learning_rate $0.0008590631488815944
train_epoch_loss $0.5067515938709943
training_accuracy $0.8228205754206731



20it [00:01, 18.57it/s]

validation_loss $0.562530343234539
accuracy $0.8067



195it [00:07, 24.73it/s]

learning_rate $0.0008535533905932737
train_epoch_loss $0.502503886895302
training_accuracy $0.8230966421274039



20it [00:01, 18.63it/s]

validation_loss $0.561085744202137
accuracy $0.8069



195it [00:07, 24.88it/s]

learning_rate $0.0008479563982961571
train_epoch_loss $0.49592164219954077
training_accuracy $0.8258494841746795



20it [00:01, 18.53it/s]

validation_loss $0.5630039170384407
accuracy $0.8133



195it [00:07, 24.90it/s]

learning_rate $0.0008422735529643444
train_epoch_loss $0.47815137795912915
training_accuracy $0.8318925906450321



20it [00:01, 18.51it/s]

validation_loss $0.540921288728714
accuracy $0.8164



195it [00:07, 24.84it/s]

learning_rate $0.0008365062567548867
train_epoch_loss $0.47251400229258417
training_accuracy $0.8327001327123398



20it [00:01, 18.33it/s]

validation_loss $0.550747749209404
accuracy $0.8124



195it [00:07, 24.82it/s]

learning_rate $0.0008306559326618259
train_epoch_loss $0.4626085727642744
training_accuracy $0.8377491486378205



20it [00:01, 18.66it/s]

validation_loss $0.5460674881935119
accuracy $0.8142



195it [00:07, 25.01it/s]

learning_rate $0.0008247240241650919
train_epoch_loss $0.45468740738355196
training_accuracy $0.8407176482371795



20it [00:01, 18.60it/s]

validation_loss $0.5396438851952553
accuracy $0.8157



195it [00:07, 25.03it/s]

learning_rate $0.0008187119948743449
train_epoch_loss $0.4392261278934968
training_accuracy $0.8460840469751603



20it [00:01, 18.87it/s]

validation_loss $0.5483933925628662
accuracy $0.8217



195it [00:07, 24.78it/s]

learning_rate $0.0008126213281678527
train_epoch_loss $0.4332887615913
training_accuracy $0.8481119791666667



20it [00:01, 18.71it/s]

validation_loss $0.5278337702155114
accuracy $0.824



195it [00:07, 24.82it/s]

learning_rate $0.0008064535268264883
train_epoch_loss $0.4250205975312453
training_accuracy $0.8503142528044871



20it [00:01, 18.88it/s]

validation_loss $0.536894890666008
accuracy $0.8171



195it [00:07, 24.86it/s]

learning_rate $0.0008002101126629421
train_epoch_loss $0.41409076819053064
training_accuracy $0.8542142427884616



20it [00:01, 18.82it/s]

validation_loss $0.5688400968909264
accuracy $0.8105



195it [00:07, 24.87it/s]

learning_rate $0.0007938926261462367
train_epoch_loss $0.4067872365315755
training_accuracy $0.8579824594350961



20it [00:01, 18.06it/s]

validation_loss $0.5386698856949806
accuracy $0.8196



195it [00:07, 24.86it/s]

learning_rate $0.0007875026260216394
train_epoch_loss $0.39652282030154495
training_accuracy $0.860552509014423



20it [00:01, 19.00it/s]

validation_loss $0.5480857133865357
accuracy $0.8158



195it [00:07, 24.80it/s]

learning_rate $0.0007810416889260654
train_epoch_loss $0.38945388885644766
training_accuracy $0.8626148712940706



20it [00:01, 18.52it/s]

validation_loss $0.536759439110756
accuracy $0.819



195it [00:07, 24.94it/s]

learning_rate $0.0007745114089990659
train_epoch_loss $0.3846952410844656
training_accuracy $0.8631613706931089



20it [00:01, 18.42it/s]

validation_loss $0.5316693872213364
accuracy $0.8256



195it [00:07, 24.68it/s]

learning_rate $0.0007679133974894983
train_epoch_loss $0.3736083282874181
training_accuracy $0.8689778645833334



20it [00:01, 18.73it/s]

validation_loss $0.5231362178921699
accuracy $0.8267



195it [00:07, 24.82it/s]

learning_rate $0.0007612492823579744
train_epoch_loss $0.36568590914591764
training_accuracy $0.8704549153645833



20it [00:01, 18.36it/s]

validation_loss $0.5181216612458229
accuracy $0.8278



195it [00:07, 24.68it/s]

learning_rate $0.0007545207078751857
train_epoch_loss $0.3626781322253056
training_accuracy $0.8719091170873398



20it [00:01, 18.75it/s]

validation_loss $0.5281323015689849
accuracy $0.8269



195it [00:07, 24.75it/s]

learning_rate $0.0007477293342162038
train_epoch_loss $0.34814871083467436
training_accuracy $0.8766504532251602



20it [00:01, 18.70it/s]

validation_loss $0.535064996778965
accuracy $0.8213



195it [00:07, 24.67it/s]

learning_rate $0.0007408768370508576
train_epoch_loss $0.3444537967443466
training_accuracy $0.8781303210136218



20it [00:01, 18.58it/s]

validation_loss $0.5699243381619453
accuracy $0.8169



195it [00:07, 24.78it/s]

learning_rate $0.0007339649071302867
train_epoch_loss $0.3352703585074498
training_accuracy $0.8824519230769231



20it [00:01, 18.03it/s]

validation_loss $0.5478711128234863
accuracy $0.8237



195it [00:07, 24.74it/s]

learning_rate $0.0007269952498697733
train_epoch_loss $0.3347349681915381
training_accuracy $0.8825915214342949



20it [00:01, 18.66it/s]

validation_loss $0.5309621587395668
accuracy $0.8249



195it [00:07, 24.74it/s]

learning_rate $0.0007199695849279575
train_epoch_loss $0.3269434141042905
training_accuracy $0.8837148813100961



20it [00:01, 18.02it/s]

validation_loss $0.5315805539488793
accuracy $0.8264



195it [00:07, 24.85it/s]

learning_rate $0.0007128896457825364
train_epoch_loss $0.31416733708137123
training_accuracy $0.8894108698918269



20it [00:01, 19.14it/s]

validation_loss $0.5242506474256515
accuracy $0.8326



195it [00:07, 24.81it/s]

learning_rate $0.0007057571793025545
train_epoch_loss $0.31423335281702186
training_accuracy $0.889767064803686



20it [00:01, 18.69it/s]

validation_loss $0.5339800655841828
accuracy $0.8306



195it [00:07, 24.94it/s]

learning_rate $0.0006985739453173903
train_epoch_loss $0.30644803077746663
training_accuracy $0.8915605593950321



20it [00:01, 18.62it/s]

validation_loss $0.5357346862554551
accuracy $0.8264



195it [00:07, 24.73it/s]

learning_rate $0.000691341716182545
train_epoch_loss $0.29379259111025396
training_accuracy $0.8960896809895833



20it [00:01, 18.38it/s]

validation_loss $0.5397819012403489
accuracy $0.8301



195it [00:07, 24.75it/s]

learning_rate $0.0006840622763423391
train_epoch_loss $0.29057019895468
training_accuracy $0.8972944210737179



20it [00:01, 18.55it/s]

validation_loss $0.5314573660492897
accuracy $0.8297



195it [00:07, 24.86it/s]

learning_rate $0.0006767374218896287
train_epoch_loss $0.28343088634503194
training_accuracy $0.8995035807291667



20it [00:01, 18.02it/s]

validation_loss $0.5230433210730553
accuracy $0.8291



195it [00:07, 24.83it/s]

learning_rate $0.0006693689601226458
train_epoch_loss $0.2807757180470687
training_accuracy $0.9012861202924679



20it [00:01, 18.38it/s]

validation_loss $0.5143134966492653
accuracy $0.8329



195it [00:07, 24.80it/s]

learning_rate $0.0006619587090990747
train_epoch_loss $0.2702829304413918
training_accuracy $0.905392064803686



20it [00:01, 18.86it/s]

validation_loss $0.5068489402532578
accuracy $0.8357



195it [00:07, 24.74it/s]

learning_rate $0.0006545084971874737
train_epoch_loss $0.26522435821019685
training_accuracy $0.9069658328325321



20it [00:01, 19.04it/s]

validation_loss $0.5371127247810363
accuracy $0.8327



195it [00:07, 24.75it/s]

learning_rate $0.0006470201626161521
train_epoch_loss $0.2541874649432989
training_accuracy $0.9109750600961538



20it [00:01, 17.77it/s]

validation_loss $0.5399553447961807
accuracy $0.832



195it [00:07, 24.93it/s]

learning_rate $0.0006394955530196147
train_epoch_loss $0.2589192080192077
training_accuracy $0.9084300505809295



20it [00:01, 18.30it/s]

validation_loss $0.539144703745842
accuracy $0.8354



195it [00:07, 24.85it/s]

learning_rate $0.0006319365249826864
train_epoch_loss $0.25233398171571586
training_accuracy $0.911000726161859



20it [00:01, 18.56it/s]

validation_loss $0.5325369164347649
accuracy $0.8371



195it [00:07, 24.93it/s]

learning_rate $0.0006243449435824273
train_epoch_loss $0.24563273978539002
training_accuracy $0.9132950220352564



20it [00:01, 18.77it/s]

validation_loss $0.5321219280362129
accuracy $0.8356



195it [00:07, 24.86it/s]

learning_rate $0.0006167226819279528
train_epoch_loss $0.24155300718087416
training_accuracy $0.9157232822516026



20it [00:01, 18.47it/s]

validation_loss $0.5290231242775917
accuracy $0.8373



195it [00:07, 24.89it/s]

learning_rate $0.0006090716206982714
train_epoch_loss $0.23682776865286706
training_accuracy $0.9168920272435898



20it [00:01, 18.13it/s]

validation_loss $0.5163774132728577
accuracy $0.8402



195it [00:07, 24.86it/s]

learning_rate $0.0006013936476782563
train_epoch_loss $0.23010679949552584
training_accuracy $0.9187597030248398



20it [00:01, 18.27it/s]

validation_loss $0.5211132481694222
accuracy $0.8364



195it [00:07, 24.90it/s]

learning_rate $0.0005936906572928624
train_epoch_loss $0.22042561456179008
training_accuracy $0.9239129482171474



20it [00:01, 18.20it/s]

validation_loss $0.5485026344656945
accuracy $0.8336



195it [00:07, 24.84it/s]

learning_rate $0.0005859645501397047
train_epoch_loss $0.22012506880057164
training_accuracy $0.9238622420873397



20it [00:01, 18.13it/s]

validation_loss $0.5322855189442635
accuracy $0.8372



195it [00:07, 24.91it/s]

learning_rate $0.0005782172325201155
train_epoch_loss $0.21573995168392474
training_accuracy $0.9243092072315705



20it [00:01, 18.25it/s]

validation_loss $0.5304804772138596
accuracy $0.8363



195it [00:07, 24.65it/s]

learning_rate $0.0005704506159687914
train_epoch_loss $0.20503130635389916
training_accuracy $0.9280370467748398



20it [00:01, 18.76it/s]

validation_loss $0.5283537551760673
accuracy $0.8363



195it [00:07, 24.92it/s]

learning_rate $0.0005626666167821522
train_epoch_loss $0.20660702463908073
training_accuracy $0.9274141751802885



20it [00:01, 17.90it/s]

validation_loss $0.5548200264573098
accuracy $0.8409



195it [00:07, 24.99it/s]

learning_rate $0.0005548671555455227
train_epoch_loss $0.19770720723347787
training_accuracy $0.9308127378806089



20it [00:01, 18.20it/s]

validation_loss $0.5413061827421188
accuracy $0.8338



195it [00:07, 25.05it/s]

learning_rate $0.0005470541566592571
train_epoch_loss $0.19307931516415033
training_accuracy $0.9325354942908654



20it [00:01, 18.49it/s]

validation_loss $0.5210465207695961
accuracy $0.8449



195it [00:07, 24.86it/s]

learning_rate $0.0005392295478639225
train_epoch_loss $0.19264246240640298
training_accuracy $0.9334322415865385



20it [00:01, 18.12it/s]

validation_loss $0.546888642013073
accuracy $0.8375



195it [00:07, 24.89it/s]

learning_rate $0.0005313952597646568
train_epoch_loss $0.1897643221112398
training_accuracy $0.934104254306891



20it [00:01, 17.61it/s]

validation_loss $0.544250063598156
accuracy $0.8361



195it [00:07, 24.80it/s]

learning_rate $0.0005235532253548213
train_epoch_loss $0.1840990492166617
training_accuracy $0.9363193609775641



20it [00:01, 18.55it/s]

validation_loss $0.5263379812240601
accuracy $0.8425



195it [00:07, 24.97it/s]

learning_rate $0.0005157053795390641
train_epoch_loss $0.1766495819657277
training_accuracy $0.9379767002203525



20it [00:01, 18.48it/s]

validation_loss $0.5508908361196518
accuracy $0.8363



195it [00:07, 24.89it/s]

learning_rate $0.0005078536586559104
train_epoch_loss $0.18206254583902848
training_accuracy $0.9360467372796475



20it [00:01, 17.87it/s]

validation_loss $0.5519056737422943
accuracy $0.8327



195it [00:07, 24.85it/s]

learning_rate $0.0005
train_epoch_loss $0.17217918764322232
training_accuracy $0.9401044796674679



20it [00:01, 18.09it/s]

validation_loss $0.5507649257779121
accuracy $0.8394



195it [00:07, 25.04it/s]

learning_rate $0.0004921463413440898
train_epoch_loss $0.16776582648356755
training_accuracy $0.9419114332932692



20it [00:01, 18.47it/s]

validation_loss $0.549997727572918
accuracy $0.8389



195it [00:07, 24.79it/s]

learning_rate $0.00048429462046093585
train_epoch_loss $0.15964822176939403
training_accuracy $0.9442429762620193



20it [00:01, 18.63it/s]

validation_loss $0.5428793400526046
accuracy $0.8405



195it [00:07, 24.71it/s]

learning_rate $0.00047644677464517873
train_epoch_loss $0.15833893147034522
training_accuracy $0.9441187149439103



20it [00:01, 18.78it/s]

validation_loss $0.5402186200022697
accuracy $0.8392



195it [00:07, 24.94it/s]

learning_rate $0.0004686047402353433
train_epoch_loss $0.15738972968015916
training_accuracy $0.9448260967548077



20it [00:01, 17.18it/s]

validation_loss $0.5325552329421044
accuracy $0.842



195it [00:07, 25.12it/s]

learning_rate $0.0004607704521360776
train_epoch_loss $0.1519928868000324
training_accuracy $0.9473801832932692



20it [00:01, 18.60it/s]

validation_loss $0.5722909152507782
accuracy $0.8384



195it [00:07, 25.03it/s]

learning_rate $0.00045294584334074284
train_epoch_loss $0.14852984620210452
training_accuracy $0.9482146434294871



20it [00:01, 18.54it/s]

validation_loss $0.5506952911615371
accuracy $0.8395



195it [00:07, 24.85it/s]

learning_rate $0.00044513284445447737
train_epoch_loss $0.1459211536324941
training_accuracy $0.9495780749198718



20it [00:01, 17.81it/s]

validation_loss $0.5497851654887199
accuracy $0.8388



195it [00:07, 24.99it/s]

learning_rate $0.00043733338321784795
train_epoch_loss $0.14247676187600844
training_accuracy $0.950095465244391



20it [00:01, 18.74it/s]

validation_loss $0.5582919627428055
accuracy $0.8384



195it [00:07, 24.83it/s]

learning_rate $0.0004295493840312088
train_epoch_loss $0.13945728089564885
training_accuracy $0.9511045798277243



20it [00:01, 18.49it/s]

validation_loss $0.5533387660980225
accuracy $0.844



195it [00:07, 25.16it/s]

learning_rate $0.0004217827674798846
train_epoch_loss $0.1373318857871569
training_accuracy $0.9523684770633013



20it [00:01, 17.98it/s]

validation_loss $0.5643043965101242
accuracy $0.8388



195it [00:07, 24.94it/s]

learning_rate $0.00041403544986029516
train_epoch_loss $0.12981720368067423
training_accuracy $0.9551742162459936



20it [00:01, 18.61it/s]

validation_loss $0.5771053463220597
accuracy $0.8412



195it [00:07, 24.97it/s]

learning_rate $0.0004063093427071377
train_epoch_loss $0.12849356058316352
training_accuracy $0.9552139673477564



20it [00:01, 18.65it/s]

validation_loss $0.5514847159385681
accuracy $0.8433



195it [00:07, 24.99it/s]

learning_rate $0.0003986063523217438
train_epoch_loss $0.12719963496694198
training_accuracy $0.9562975761217949



20it [00:01, 18.79it/s]

validation_loss $0.5521463513374328
accuracy $0.8426



195it [00:07, 24.92it/s]

learning_rate $0.0003909283793017289
train_epoch_loss $0.12644971805887345
training_accuracy $0.9565611227964743



20it [00:01, 18.24it/s]

validation_loss $0.5520963534712792
accuracy $0.8471



195it [00:07, 24.92it/s]

learning_rate $0.0003832773180720472
train_epoch_loss $0.120601563843397
training_accuracy $0.9577261117788461



20it [00:01, 18.60it/s]

validation_loss $0.5450472429394722
accuracy $0.8444



195it [00:07, 24.95it/s]

learning_rate $0.00037565505641757257
train_epoch_loss $0.1196850340335797
training_accuracy $0.9589903220152244



20it [00:01, 18.54it/s]

validation_loss $0.5583207577466964
accuracy $0.8404



195it [00:07, 25.10it/s]

learning_rate $0.0003680634750173136
train_epoch_loss $0.11847683787345886
training_accuracy $0.9591114533253206



20it [00:01, 18.89it/s]

validation_loss $0.5541530564427376
accuracy $0.8428



195it [00:07, 25.10it/s]

learning_rate $0.0003605044469803854
train_epoch_loss $0.1109700867953973
training_accuracy $0.9619632036258012



20it [00:01, 18.77it/s]

validation_loss $0.5599488168954849
accuracy $0.8451



195it [00:07, 24.96it/s]

learning_rate $0.0003529798373838481
train_epoch_loss $0.11188296018502651
training_accuracy $0.9613165439703526



20it [00:01, 18.61it/s]

validation_loss $0.5534881025552749
accuracy $0.8435



195it [00:07, 25.11it/s]

learning_rate $0.00034549150281252644
train_epoch_loss $0.10819104806734965
training_accuracy $0.9620965419671474



20it [00:01, 18.66it/s]

validation_loss $0.5436671003699303
accuracy $0.8477



195it [00:07, 25.02it/s]

learning_rate $0.0003380412909009253
train_epoch_loss $0.1075144594678512
training_accuracy $0.9627917167467949



20it [00:01, 18.02it/s]

validation_loss $0.559036536514759
accuracy $0.8453



195it [00:07, 24.87it/s]

learning_rate $0.0003306310398773543
train_epoch_loss $0.10453458699660423
training_accuracy $0.9644111828926282



20it [00:01, 18.71it/s]

validation_loss $0.5461011812090873
accuracy $0.847



195it [00:07, 24.98it/s]

learning_rate $0.00032326257811037153
train_epoch_loss $0.09965943195498907
training_accuracy $0.9652569110576923



20it [00:01, 18.64it/s]

validation_loss $0.5479896172881127
accuracy $0.8446



195it [00:07, 24.93it/s]

learning_rate $0.00031593772365766116
train_epoch_loss $0.09952953832271771
training_accuracy $0.9657217172475961



20it [00:01, 18.51it/s]

validation_loss $0.5712470084428787
accuracy $0.8447



195it [00:07, 25.08it/s]

learning_rate $0.0003086582838174553
train_epoch_loss $0.09457704321696209
training_accuracy $0.9671909430088141



20it [00:01, 18.55it/s]

validation_loss $0.5636789709329605
accuracy $0.8493



195it [00:07, 24.97it/s]

learning_rate $0.00030142605468260966
train_epoch_loss $0.09612441796522875
training_accuracy $0.9672860952524038



20it [00:01, 18.62it/s]

validation_loss $0.545417933166027
accuracy $0.8453



195it [00:07, 24.99it/s]

learning_rate $0.0002942428206974456
train_epoch_loss $0.0925543134029095
training_accuracy $0.9682595277443911



20it [00:01, 18.61it/s]

validation_loss $0.5620426803827285
accuracy $0.8453



195it [00:07, 24.89it/s]

learning_rate $0.00028711035421746366
train_epoch_loss $0.08936251915800265
training_accuracy $0.9691562750400641



20it [00:01, 18.70it/s]

validation_loss $0.5679553568363189
accuracy $0.8469



195it [00:07, 25.03it/s]

learning_rate $0.0002800304150720424
train_epoch_loss $0.08743631159647917
training_accuracy $0.970454602363782



20it [00:01, 18.58it/s]

validation_loss $0.5714917063713074
accuracy $0.841



195it [00:07, 25.06it/s]

learning_rate $0.00027300475013022663
train_epoch_loss $0.0918720567264618
training_accuracy $0.9682999048477564



20it [00:01, 18.55it/s]

validation_loss $0.5507145956158638
accuracy $0.8483



195it [00:07, 25.14it/s]

learning_rate $0.0002660350928697134
train_epoch_loss $0.0825042901130823
training_accuracy $0.9713767027243589



20it [00:01, 18.76it/s]

validation_loss $0.5582362204790116
accuracy $0.8486



195it [00:07, 25.03it/s]

learning_rate $0.0002591231629491423
train_epoch_loss $0.07986616273530019
training_accuracy $0.9724797175480769



20it [00:01, 18.69it/s]

validation_loss $0.540212769806385
accuracy $0.8489



195it [00:07, 24.98it/s]

learning_rate $0.0002522706657837962
train_epoch_loss $0.08129031755603276
training_accuracy $0.9723313551682692



20it [00:01, 18.70it/s]

validation_loss $0.5446021959185601
accuracy $0.8453



195it [00:07, 25.04it/s]

learning_rate $0.00024547929212481435
train_epoch_loss $0.07805133100885611
training_accuracy $0.9731507912660257



20it [00:01, 18.57it/s]

validation_loss $0.5671647489070892
accuracy $0.8494



195it [00:07, 25.01it/s]

learning_rate $0.00023875071764202561
train_epoch_loss $0.07730016425634041
training_accuracy $0.9732603415464743



20it [00:01, 18.49it/s]

validation_loss $0.5553297683596611
accuracy $0.8482



195it [00:07, 25.07it/s]

learning_rate $0.00023208660251050156
train_epoch_loss $0.07255281422000665
training_accuracy $0.974946789863782



20it [00:01, 18.63it/s]

validation_loss $0.5658859416842461
accuracy $0.848



195it [00:07, 25.07it/s]

learning_rate $0.00022548859100093404
train_epoch_loss $0.07317647679876059
training_accuracy $0.9748882587139424



20it [00:01, 18.71it/s]

validation_loss $0.5637120828032494
accuracy $0.8499



195it [00:07, 25.05it/s]

learning_rate $0.00021895831107393465
train_epoch_loss $0.07448874419698348
training_accuracy $0.9739987104366987



20it [00:01, 18.66it/s]

validation_loss $0.5513921052217483
accuracy $0.8474



195it [00:07, 25.37it/s]

learning_rate $0.00021249737397836073
train_epoch_loss $0.06890896566403218
training_accuracy $0.9756798377403846



20it [00:01, 18.67it/s]

validation_loss $0.5748820617794991
accuracy $0.8468



195it [00:07, 24.98it/s]

learning_rate $0.00020610737385376348
train_epoch_loss $0.07136438884414159
training_accuracy $0.9755145733173077



20it [00:01, 18.66it/s]

validation_loss $0.5641492933034897
accuracy $0.8471



195it [00:07, 25.02it/s]

learning_rate $0.00019978988733705805
train_epoch_loss $0.06962966018189222
training_accuracy $0.9759292993790064



20it [00:01, 18.74it/s]

validation_loss $0.5627577692270279
accuracy $0.8462



195it [00:07, 25.07it/s]

learning_rate $0.00019354647317351172
train_epoch_loss $0.06704421103573763
training_accuracy $0.9769355969551282



20it [00:01, 18.46it/s]

validation_loss $0.5681279197335243
accuracy $0.8506



195it [00:07, 24.99it/s]

learning_rate $0.0001873786718321474
train_epoch_loss $0.06331504420974315
training_accuracy $0.9783888596754807



20it [00:01, 18.65it/s]

validation_loss $0.5810896262526513
accuracy $0.8464



195it [00:07, 24.88it/s]

learning_rate $0.00018128800512565513
train_epoch_loss $0.06438204833330252
training_accuracy $0.9779259314903846



20it [00:01, 18.63it/s]

validation_loss $0.5690432995557785
accuracy $0.8511



195it [00:07, 24.95it/s]

learning_rate $0.00017527597583490823
train_epoch_loss $0.06198780412475268
training_accuracy $0.9788192357772436



20it [00:01, 18.83it/s]

validation_loss $0.5501927509903908
accuracy $0.8503



195it [00:07, 24.99it/s]

learning_rate $0.00016934406733817414
train_epoch_loss $0.060027467946593575
training_accuracy $0.9793673001802885



20it [00:01, 18.69it/s]

validation_loss $0.558638471364975
accuracy $0.8474



195it [00:07, 24.92it/s]

learning_rate $0.00016349374324511328
train_epoch_loss $0.05859207894939643
training_accuracy $0.979575445713141



20it [00:01, 18.56it/s]

validation_loss $0.5551159799098968
accuracy $0.8502



195it [00:07, 25.02it/s]

learning_rate $0.00015772644703565563
train_epoch_loss $0.06092094849699583
training_accuracy $0.9789954552283654



20it [00:01, 18.64it/s]

validation_loss $0.5617594957351685
accuracy $0.8493



195it [00:07, 25.01it/s]

learning_rate $0.00015204360170384285
train_epoch_loss $0.0564741466098871
training_accuracy $0.9807210286458333



20it [00:01, 18.50it/s]

validation_loss $0.5632695332169533
accuracy $0.8501



195it [00:07, 24.84it/s]

learning_rate $0.00014644660940672628
train_epoch_loss $0.056747117953804826
training_accuracy $0.9805930113181089



20it [00:01, 17.86it/s]

validation_loss $0.5615518778562546
accuracy $0.849



195it [00:07, 24.95it/s]

learning_rate $0.00014093685111840566
train_epoch_loss $0.053501257692009974
training_accuracy $0.9818065154246794



20it [00:01, 18.76it/s]

validation_loss $0.5576271921396255
accuracy $0.8509



195it [00:07, 24.78it/s]

learning_rate $0.00013551568628929433
train_epoch_loss $0.05421009149688941
training_accuracy $0.9813820863381411



20it [00:01, 18.61it/s]

validation_loss $0.5669384464621544
accuracy $0.8486



195it [00:07, 24.88it/s]

learning_rate $0.0001301844525106951
train_epoch_loss $0.050526779861404345
training_accuracy $0.982731432792468



20it [00:01, 18.58it/s]

validation_loss $0.5577500522136688
accuracy $0.8495



195it [00:07, 25.02it/s]

learning_rate $0.0001249444651847702
train_epoch_loss $0.05089952185367926
training_accuracy $0.9825104542267629



20it [00:01, 18.70it/s]

validation_loss $0.5537775367498398
accuracy $0.8484



195it [00:07, 24.88it/s]

learning_rate $0.00011979701719998454
train_epoch_loss $0.049901326697988385
training_accuracy $0.9828804211738782



20it [00:01, 18.60it/s]

validation_loss $0.5579766005277633
accuracy $0.8457



195it [00:07, 24.84it/s]

learning_rate $0.00011474337861210544
train_epoch_loss $0.04922646779853564
training_accuracy $0.9827477088341346



20it [00:01, 18.70it/s]

validation_loss $0.5539738744497299
accuracy $0.8507



195it [00:07, 24.85it/s]

learning_rate $0.00010978479633083521
train_epoch_loss $0.04806709394623072
training_accuracy $0.9837640224358974



20it [00:01, 18.46it/s]

validation_loss $0.5674845486879349
accuracy $0.8481



195it [00:07, 25.04it/s]

learning_rate $0.00010492249381215479
train_epoch_loss $0.047045176313855706
training_accuracy $0.9836710611979167



20it [00:01, 18.39it/s]

validation_loss $0.550402557849884
accuracy $0.853



195it [00:07, 25.13it/s]

learning_rate $0.00010015767075645472
train_epoch_loss $0.04740457438314572
training_accuracy $0.9838438376402243



20it [00:01, 18.66it/s]

validation_loss $0.5469461873173713
accuracy $0.8512



195it [00:07, 24.88it/s]

learning_rate $9.549150281252633e-05
train_epoch_loss $0.045624779432247846
training_accuracy $0.9843546549479166



20it [00:01, 18.57it/s]

validation_loss $0.5434899136424065
accuracy $0.8533



195it [00:07, 24.98it/s]

learning_rate $9.092514128748836e-05
train_epoch_loss $0.04468734060915617
training_accuracy $0.984464831229968



20it [00:01, 18.67it/s]

validation_loss $0.5499576106667519
accuracy $0.8528



195it [00:07, 25.07it/s]

learning_rate $8.645971286271914e-05
train_epoch_loss $0.04387982317652458
training_accuracy $0.9846989558293269



20it [00:01, 18.53it/s]

validation_loss $0.5675731554627419
accuracy $0.853



195it [00:07, 24.86it/s]

learning_rate $8.209631931586498e-05
train_epoch_loss $0.04182726995876202
training_accuracy $0.9857290414663461



20it [00:01, 18.73it/s]

validation_loss $0.5592126548290253
accuracy $0.8512



195it [00:07, 24.90it/s]

learning_rate $7.783603724899258e-05
train_epoch_loss $0.041586225904906404
training_accuracy $0.9856038411458333



20it [00:01, 18.59it/s]

validation_loss $0.562182666361332
accuracy $0.8528



195it [00:07, 24.89it/s]

learning_rate $7.367991782295403e-05
train_epoch_loss $0.042657057243662005
training_accuracy $0.9851960011017629



20it [00:01, 18.58it/s]

validation_loss $0.5545125991106034
accuracy $0.8497



195it [00:07, 24.81it/s]

learning_rate $6.962898649802813e-05
train_epoch_loss $0.041823376906223786
training_accuracy $0.9852927183493589



20it [00:01, 17.73it/s]

validation_loss $0.5447754383087158
accuracy $0.8532



195it [00:07, 24.94it/s]

learning_rate $6.568424278090435e-05
train_epoch_loss $0.04017677891712922
training_accuracy $0.9859994741586539



20it [00:01, 18.62it/s]

validation_loss $0.5566635519266129
accuracy $0.8559



195it [00:07, 24.95it/s]

learning_rate $6.184665997806821e-05
train_epoch_loss $0.040021679918162335
training_accuracy $0.9859115209334935



20it [00:01, 18.66it/s]

validation_loss $0.5520653337240219
accuracy $0.8531



195it [00:07, 25.01it/s]

learning_rate $5.811718495565327e-05
train_epoch_loss $0.04034337904781867
training_accuracy $0.9856783353365385



20it [00:01, 18.61it/s]

validation_loss $0.5538110822439194
accuracy $0.855



195it [00:07, 24.99it/s]

learning_rate $5.449673790581611e-05
train_epoch_loss $0.03916007063518732
training_accuracy $0.9860054211738782



20it [00:01, 17.97it/s]

validation_loss $0.5570726111531258
accuracy $0.8536



195it [00:07, 24.97it/s]

learning_rate $5.098621211969223e-05
train_epoch_loss $0.03760539119442304
training_accuracy $0.9869328425480769



20it [00:01, 18.71it/s]

validation_loss $0.5504755437374115
accuracy $0.8552



195it [00:07, 24.93it/s]

learning_rate $4.758647376699032e-05
train_epoch_loss $0.039608579673446144
training_accuracy $0.9861368815104167



20it [00:01, 18.71it/s]

validation_loss $0.5455171659588813
accuracy $0.8547



195it [00:07, 24.91it/s]

learning_rate $4.4298361682277466e-05
train_epoch_loss $0.03751199605564277
training_accuracy $0.9867954351963141



20it [00:01, 18.01it/s]

validation_loss $0.5483792006969452
accuracy $0.8546



195it [00:07, 24.96it/s]

learning_rate $4.112268715800954e-05
train_epoch_loss $0.037756342612780054
training_accuracy $0.98701171875



20it [00:01, 18.62it/s]

validation_loss $0.537652425467968
accuracy $0.8544



195it [00:07, 25.01it/s]

learning_rate $3.806023374435674e-05
train_epoch_loss $0.035927239700387685
training_accuracy $0.9875068860176283



20it [00:01, 18.65it/s]

validation_loss $0.5502196177840233
accuracy $0.8536



195it [00:07, 24.99it/s]

learning_rate $3.5111757055874326e-05
train_epoch_loss $0.03613986101670143
training_accuracy $0.9871218950320513



20it [00:01, 18.74it/s]

validation_loss $0.5482762157917023
accuracy $0.8544



195it [00:07, 25.07it/s]

learning_rate $3.227798458506631e-05
train_epoch_loss $0.035557663440704344
training_accuracy $0.9876367813501603



20it [00:01, 18.70it/s]

validation_loss $0.5410638973116875
accuracy $0.8542



195it [00:07, 25.21it/s]

learning_rate $2.9559615522887274e-05
train_epoch_loss $0.035612549838156274
training_accuracy $0.9877015725160256



20it [00:01, 18.68it/s]

validation_loss $0.5482651889324188
accuracy $0.8545



195it [00:07, 24.95it/s]

learning_rate $2.695732058622735e-05
train_epoch_loss $0.03560339552469743
training_accuracy $0.9876633864182692



20it [00:01, 18.55it/s]

validation_loss $0.5452776864171028
accuracy $0.8533



195it [00:07, 24.72it/s]

learning_rate $2.4471741852423235e-05
train_epoch_loss $0.035163226728446975
training_accuracy $0.9876405373597756



20it [00:01, 18.50it/s]

validation_loss $0.5464756146073342
accuracy $0.8551



195it [00:07, 24.76it/s]

learning_rate $2.210349260083494e-05
train_epoch_loss $0.035060748639397135
training_accuracy $0.987775127704327



20it [00:01, 18.48it/s]

validation_loss $0.5387897774577141
accuracy $0.8557



195it [00:07, 24.87it/s]

learning_rate $1.9853157161528523e-05
train_epoch_loss $0.03303714653429313
training_accuracy $0.9886227338741987



20it [00:01, 18.56it/s]

validation_loss $0.5389807656407356
accuracy $0.8559



195it [00:07, 24.95it/s]

learning_rate $1.7721290771101016e-05
train_epoch_loss $0.03424986346791952
training_accuracy $0.9879763872195513



20it [00:01, 18.67it/s]

validation_loss $0.5318310707807541
accuracy $0.8582



195it [00:07, 24.88it/s]

learning_rate $1.5708419435684518e-05
train_epoch_loss $0.03339081102361282
training_accuracy $0.9884731194911859



20it [00:01, 18.61it/s]

validation_loss $0.5380954176187516
accuracy $0.8562



195it [00:07, 24.93it/s]

learning_rate $1.3815039801161721e-05
train_epoch_loss $0.032796888616986763
training_accuracy $0.988568584735577



20it [00:01, 18.54it/s]

validation_loss $0.5397576093673706
accuracy $0.8568



195it [00:07, 24.96it/s]

learning_rate $1.2041619030626338e-05
train_epoch_loss $0.033201056064512485
training_accuracy $0.98853759765625



20it [00:01, 18.53it/s]

validation_loss $0.5386934474110603
accuracy $0.8582



195it [00:07, 24.95it/s]

learning_rate $1.038859468911707e-05
train_epoch_loss $0.03259353287135943
training_accuracy $0.9885535606971154



20it [00:01, 18.52it/s]

validation_loss $0.5388247162103653
accuracy $0.8558



195it [00:07, 24.77it/s]

learning_rate $8.856374635655639e-06
train_epoch_loss $0.031891766195304885
training_accuracy $0.9886759440104167



20it [00:01, 18.01it/s]

validation_loss $0.542427396774292
accuracy $0.8561



195it [00:07, 24.96it/s]

learning_rate $7.445336922613066e-06
train_epoch_loss $0.03349320626316162
training_accuracy $0.9880674704527244



20it [00:01, 18.59it/s]

validation_loss $0.541293615102768
accuracy $0.8566



195it [00:07, 24.83it/s]

learning_rate $6.15582970243117e-06
train_epoch_loss $0.03147891837721452
training_accuracy $0.9889729817708334



20it [00:01, 18.25it/s]

validation_loss $0.5392356753349304
accuracy $0.8579



195it [00:07, 24.86it/s]

learning_rate $4.988171141721232e-06
train_epoch_loss $0.0327533270065219
training_accuracy $0.9883425981570513



20it [00:01, 18.27it/s]

validation_loss $0.5382685989141465
accuracy $0.8571



195it [00:07, 24.85it/s]

learning_rate $3.942649342761117e-06
train_epoch_loss $0.03223902311367102
training_accuracy $0.9884890825320513



20it [00:01, 18.65it/s]

validation_loss $0.5383084058761597
accuracy $0.8576



195it [00:07, 24.93it/s]

learning_rate $3.0195222724102023e-06
train_epoch_loss $0.031517639460089876
training_accuracy $0.9887951973157051



20it [00:01, 18.62it/s]

validation_loss $0.5401277497410775
accuracy $0.8575



195it [00:07, 24.93it/s]

learning_rate $2.219017698460002e-06
train_epoch_loss $0.030982422054960178
training_accuracy $0.9889316656650641



20it [00:01, 18.66it/s]

validation_loss $0.5398990288376808
accuracy $0.8573



195it [00:07, 24.89it/s]

learning_rate $1.541333133436018e-06
train_epoch_loss $0.03264300668946443
training_accuracy $0.9883936172876603



20it [00:01, 18.70it/s]

validation_loss $0.5399981454014778
accuracy $0.8573





In [10]:
model

ViT(
  (to_patch_embedding): Sequential(
    (0): Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=4, p2=4)
    (1): Linear(in_features=48, out_features=128, bias=True)
  )
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=1024, bias=True)
        (dropout): Dropout(p=0, inplace=False)
        (linear2): Linear(in_features=1024, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0, inplace=False)
        (dropout2): Dropout(p=0, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(i