In [1]:
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import torch
import torch.nn.init as init

import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import random_split, Dataset, DataLoader
from model import ViT
from torch.optim import lr_scheduler

from tqdm import tqdm
import numpy as np
from random import randint
import random
import warmup_scheduler


In [2]:
hp = {
    'batch_size': 128,
    'num_workers': 4,
    'seed': 42
}
seed = hp['seed']
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

In [3]:
cifar10_mean = [0.4914, 0.4822, 0.4465]
cifar10_std = [0.2470, 0.2435, 0.2616]

cifar10_path = '../data/vision/cifar10/'
cifar10_train_transform = transforms.Compose(
            [
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(cifar10_mean, cifar10_std),
            ]
        )
cifar10_test_transform = transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize(cifar10_mean, cifar10_std),
            ]
        )
cifar10_train = CIFAR10(cifar10_path, train=True, transform=cifar10_train_transform)
cifar10_test = CIFAR10(cifar10_path, train=False, transform=cifar10_test_transform)

batch_size = hp['batch_size']
num_workers = hp['num_workers']
train_dataloader = DataLoader(cifar10_train, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True)
val_dataloader = DataLoader(cifar10_test, batch_size=512, shuffle=False, num_workers=num_workers, drop_last=False)

In [4]:
def train(model, train_dataloader, val_dataloader, optimizer, scheduler, loss_fn, epochs):
    optimizer.zero_grad()
    optimizer.step()
    for epoch in range(epochs):    
        scheduler.step(epoch + 1)
        train_epoch(model, train_dataloader, optimizer, loss_fn)
        val_epoch(model, val_dataloader, loss_fn)


In [5]:
def train_epoch(model, train_dataloader, optimizer, loss_fn):
    # step variable
    epoch_total_loss = 0
    epoch_steps = 0
    sample_size = 0
    correct = 0
    
    patch_num = 64
    #set train mode
    model.train()
    for batch_idx, (inputs, labels) in tqdm(enumerate(train_dataloader)):
        # backprop
        optimizer.zero_grad()
        inputs, labels = inputs.cuda(), labels.cuda()
        if model.joint:
            outputs = model(inputs[::2], inputs[1::2])
            labels = labels.repeat_interleave(patch_num)
        else:
            outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        sample_size += labels.size(0)
        
        # update step
        epoch_total_loss += loss.item()
        epoch_steps += 1
        # change learning rate


    epoch_loss = epoch_total_loss / epoch_steps
    accuracy = correct / sample_size

    print(f"learning_rate ${optimizer.param_groups[0]['lr']}")
    print(f"train_epoch_loss ${epoch_loss}")
    print(f"training_accuracy ${accuracy}")
    return epoch_steps

In [6]:
def val_epoch(model, val_dataloader, loss_fn):
    # step_variable
    epoch_total_loss = 0
    epoch_steps = 0
    sample_size = 0
    correct = 0
    
    # set eval mode
    model.eval()
    for batch_idx, (inputs, labels) in tqdm(enumerate(val_dataloader, 0)):
        with torch.no_grad():
            # inference
            inputs, labels = inputs.cuda(), labels.cuda()
            if model.joint:
                outputs = model(inputs, inputs)
            else:
                outputs = model(inputs)
            loss = loss_fn(outputs, labels)

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            sample_size += labels.size(0)
            epoch_total_loss += loss.item()
            epoch_steps += 1
    epoch_loss = epoch_total_loss / epoch_steps
    accuracy = correct / sample_size
    print(f"validation_loss ${epoch_loss}")
    print(f"accuracy ${accuracy}")

In [7]:
d_model = 384
head = 12
d_ff = 384 * 4
dropout = 0
encoder_layers = nn.TransformerEncoderLayer(d_model, head, d_ff, dropout, batch_first=True)
transformer = nn.TransformerEncoder(encoder_layers, 7)
model = ViT(32, 4, d_model, transformer, 10, joint=False)
model.cuda()

ViT(
  (to_patch_embedding): Sequential(
    (0): Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=4, p2=4)
    (1): Linear(in_features=48, out_features=384, bias=True)
  )
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=384, out_features=384, bias=True)
        )
        (linear1): Linear(in_features=384, out_features=384, bias=True)
        (dropout): Dropout(p=0, inplace=False)
        (linear2): Linear(in_features=384, out_features=384, bias=True)
        (norm1): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0, inplace=False)
        (dropout2): Dropout(p=0, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-5)
warm_up_epoch = 5
epochs = 200
base_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
scheduler = warmup_scheduler.GradualWarmupScheduler(optimizer, multiplier=1., total_epoch=warm_up_epoch, after_scheduler=base_scheduler)

train(model, train_dataloader, val_dataloader, optimizer, scheduler, loss_fn, epochs)

390it [00:14, 26.96it/s]

learning_rate $0.0002
train_epoch_loss $1.797537685663272
training_accuracy $0.33405448717948716



20it [00:00, 20.17it/s]

validation_loss $1.4869668781757355
accuracy $0.4599



390it [00:14, 27.75it/s]

learning_rate $0.0004
train_epoch_loss $1.5017630170553158
training_accuracy $0.4518028846153846



20it [00:00, 20.15it/s]

validation_loss $1.3118082106113433
accuracy $0.5207



390it [00:14, 27.71it/s]

learning_rate $0.0006
train_epoch_loss $1.409032218884199
training_accuracy $0.48549679487179487



20it [00:00, 20.36it/s]

validation_loss $1.2649121403694152
accuracy $0.5407



390it [00:13, 27.87it/s]


learning_rate $0.0008
train_epoch_loss $1.4049898743629456
training_accuracy $0.4901241987179487


20it [00:00, 20.65it/s]

validation_loss $1.299406772851944
accuracy $0.5356



390it [00:14, 27.78it/s]

learning_rate $0.001
train_epoch_loss $1.5394449885074908
training_accuracy $0.438661858974359



20it [00:00, 20.45it/s]

validation_loss $1.5914032280445098
accuracy $0.4223



390it [00:14, 27.84it/s]


learning_rate $0.001
train_epoch_loss $1.6580662806828816
training_accuracy $0.3939102564102564


20it [00:00, 20.23it/s]

validation_loss $1.5688964784145356
accuracy $0.4319



390it [00:14, 27.81it/s]

learning_rate $0.0009997532801828658
train_epoch_loss $1.6622084898826404
training_accuracy $0.3908653846153846



20it [00:00, 20.47it/s]

validation_loss $1.5725719153881073
accuracy $0.4231



390it [00:14, 27.83it/s]

learning_rate $0.0009994449374809851
train_epoch_loss $1.6489520198259597
training_accuracy $0.39435096153846155



20it [00:00, 20.53it/s]

validation_loss $1.667277067899704
accuracy $0.382



390it [00:14, 27.81it/s]

learning_rate $0.0009990133642141358
train_epoch_loss $1.6358621881558344
training_accuracy $0.39845753205128204



20it [00:00, 20.21it/s]

validation_loss $1.5826353311538697
accuracy $0.4341



390it [00:14, 27.79it/s]

learning_rate $0.000998458666866564
train_epoch_loss $1.620021873865372
training_accuracy $0.4041065705128205



20it [00:00, 20.41it/s]

validation_loss $1.5764516174793244
accuracy $0.4353



390it [00:14, 27.83it/s]

learning_rate $0.00099778098230154
train_epoch_loss $1.5853043898558006
training_accuracy $0.42375801282051284



20it [00:00, 20.47it/s]

validation_loss $1.4954209089279176
accuracy $0.4442



390it [00:14, 27.68it/s]

learning_rate $0.0009969804777275899
train_epoch_loss $1.5834494440983504
training_accuracy $0.4208533653846154



20it [00:00, 20.44it/s]

validation_loss $1.4954691231250763
accuracy $0.4514



390it [00:13, 27.88it/s]

learning_rate $0.000996057350657239
train_epoch_loss $1.5471307910405672
training_accuracy $0.43509615384615385



20it [00:00, 20.48it/s]

validation_loss $1.5706639647483827
accuracy $0.4223



390it [00:13, 27.90it/s]

learning_rate $0.0009950118288582787
train_epoch_loss $1.533008764951657
training_accuracy $0.43784054487179486



20it [00:00, 20.54it/s]

validation_loss $1.4956619918346405
accuracy $0.4542



390it [00:14, 27.84it/s]

learning_rate $0.0009938441702975688
train_epoch_loss $1.5271808000711293
training_accuracy $0.4430889423076923



20it [00:00, 20.47it/s]

validation_loss $1.4790595114231109
accuracy $0.4711



390it [00:14, 27.67it/s]

learning_rate $0.000992554663077387
train_epoch_loss $1.5068683101580693
training_accuracy $0.44977964743589743



20it [00:00, 20.50it/s]

validation_loss $1.4590905010700226
accuracy $0.4709



390it [00:14, 27.78it/s]


learning_rate $0.0009911436253643444
train_epoch_loss $1.4849342651856252
training_accuracy $0.45869391025641026


20it [00:00, 20.27it/s]

validation_loss $1.423859679698944
accuracy $0.4779



390it [00:14, 27.75it/s]

learning_rate $0.000989611405310883
train_epoch_loss $1.4767547748027703
training_accuracy $0.4627804487179487



20it [00:00, 20.51it/s]

validation_loss $1.4078517258167267
accuracy $0.4895



390it [00:14, 27.70it/s]

learning_rate $0.0009879583809693738
train_epoch_loss $1.4726611048747331
training_accuracy $0.4628605769230769



20it [00:00, 20.66it/s]

validation_loss $1.3801874756813048
accuracy $0.4965



390it [00:14, 27.66it/s]

learning_rate $0.0009861849601988384
train_epoch_loss $1.455356325247349
training_accuracy $0.46979166666666666



20it [00:00, 20.48it/s]

validation_loss $1.4538651287555695
accuracy $0.476



390it [00:14, 27.73it/s]

learning_rate $0.0009842915805643156
train_epoch_loss $1.4374077026660625
training_accuracy $0.47612179487179485



20it [00:00, 20.46it/s]

validation_loss $1.3758163511753083
accuracy $0.5



390it [00:13, 27.88it/s]

learning_rate $0.000982278709228899
train_epoch_loss $1.4283188951321137
training_accuracy $0.48004807692307694



20it [00:00, 20.58it/s]

validation_loss $1.3758163571357727
accuracy $0.5034



390it [00:14, 27.83it/s]

learning_rate $0.0009801468428384716
train_epoch_loss $1.4200213371179042
training_accuracy $0.4834735576923077



20it [00:00, 20.38it/s]

validation_loss $1.3845389664173127
accuracy $0.5024



390it [00:14, 27.68it/s]

learning_rate $0.000977896507399165
train_epoch_loss $1.4007349983239785
training_accuracy $0.49040464743589746



20it [00:00, 20.54it/s]

validation_loss $1.3675536334514617
accuracy $0.5041



390it [00:14, 27.68it/s]

learning_rate $0.0009755282581475768
train_epoch_loss $1.3892228618646278
training_accuracy $0.4949519230769231



20it [00:00, 20.42it/s]

validation_loss $1.3568364262580872
accuracy $0.5095



390it [00:14, 27.82it/s]

learning_rate $0.0009730426794137727
train_epoch_loss $1.3749532491732865
training_accuracy $0.4986778846153846



20it [00:00, 20.72it/s]

validation_loss $1.3944308280944824
accuracy $0.4976



390it [00:14, 27.84it/s]

learning_rate $0.0009704403844771128
train_epoch_loss $1.3678668447029896
training_accuracy $0.5043269230769231



20it [00:00, 20.19it/s]

validation_loss $1.3256908237934113
accuracy $0.5234



390it [00:14, 27.83it/s]

learning_rate $0.0009677220154149337
train_epoch_loss $1.3406592885653177
training_accuracy $0.5134214743589743



20it [00:00, 20.41it/s]

validation_loss $1.327906036376953
accuracy $0.5212



390it [00:13, 27.87it/s]

learning_rate $0.0009648882429441257
train_epoch_loss $1.3316639753488393
training_accuracy $0.5145432692307692



20it [00:00, 20.39it/s]

validation_loss $1.2787015676498412
accuracy $0.5315



390it [00:14, 27.73it/s]

learning_rate $0.0009619397662556434
train_epoch_loss $1.3114704174873157
training_accuracy $0.5221153846153846



20it [00:00, 20.65it/s]

validation_loss $1.2731449007987976
accuracy $0.5388



390it [00:14, 27.70it/s]

learning_rate $0.0009588773128419905
train_epoch_loss $1.2906822119003687
training_accuracy $0.5300080128205128



20it [00:00, 20.37it/s]

validation_loss $1.2782549858093262
accuracy $0.5354



390it [00:14, 27.69it/s]

learning_rate $0.0009557016383177226
train_epoch_loss $1.2844483632307786
training_accuracy $0.5338741987179487



20it [00:00, 20.57it/s]

validation_loss $1.2721297085285186
accuracy $0.5458



390it [00:14, 27.75it/s]

learning_rate $0.0009524135262330098
train_epoch_loss $1.256637506148754
training_accuracy $0.5461338141025641



20it [00:00, 20.40it/s]

validation_loss $1.2449683129787446
accuracy $0.5537



390it [00:14, 27.70it/s]

learning_rate $0.0009490137878803078
train_epoch_loss $1.246871007252962
training_accuracy $0.5493790064102564



20it [00:00, 20.40it/s]

validation_loss $1.2588550388813018
accuracy $0.5524



390it [00:14, 27.77it/s]

learning_rate $0.0009455032620941839
train_epoch_loss $1.2339947208380089
training_accuracy $0.5552283653846154



20it [00:00, 20.45it/s]

validation_loss $1.2359387159347535
accuracy $0.559



390it [00:14, 27.84it/s]

learning_rate $0.0009418828150443468
train_epoch_loss $1.2229629437128702
training_accuracy $0.5576121794871794



20it [00:00, 20.46it/s]

validation_loss $1.2134937405586244
accuracy $0.5621



390it [00:14, 27.78it/s]

learning_rate $0.0009381533400219318
train_epoch_loss $1.2051159527057256
training_accuracy $0.5659855769230769



20it [00:00, 20.43it/s]

validation_loss $1.2134954988956452
accuracy $0.5658



390it [00:14, 27.71it/s]

learning_rate $0.0009343157572190957
train_epoch_loss $1.1848947561704195
training_accuracy $0.5700320512820513



20it [00:00, 20.44it/s]

validation_loss $1.1637111008167267
accuracy $0.5862



390it [00:14, 27.75it/s]

learning_rate $0.0009303710135019718
train_epoch_loss $1.1569220234186222
training_accuracy $0.5824719551282052



20it [00:00, 20.45it/s]

validation_loss $1.1500881612300873
accuracy $0.5882



390it [00:14, 27.65it/s]


learning_rate $0.0009263200821770461
train_epoch_loss $1.1502203880212245
training_accuracy $0.5860376602564102


20it [00:00, 20.51it/s]

validation_loss $1.1366567075252534
accuracy $0.5918



390it [00:14, 27.76it/s]

learning_rate $0.0009221639627510075
train_epoch_loss $1.14741554076855
training_accuracy $0.5874198717948718



20it [00:00, 20.46it/s]

validation_loss $1.127378386259079
accuracy $0.5948



390it [00:14, 27.64it/s]

learning_rate $0.0009179036806841352
train_epoch_loss $1.1307885434383
training_accuracy $0.5934094551282051



20it [00:00, 20.45it/s]

validation_loss $1.0952897369861603
accuracy $0.6044



390it [00:14, 27.77it/s]

learning_rate $0.0009135402871372809
train_epoch_loss $1.102837399030343
training_accuracy $0.6036258012820512



20it [00:00, 20.43it/s]

validation_loss $1.1706620573997497
accuracy $0.5882



390it [00:13, 27.86it/s]

learning_rate $0.0009090748587125117
train_epoch_loss $1.0761734127998352
training_accuracy $0.6155048076923076



20it [00:00, 20.55it/s]

validation_loss $1.0926093697547912
accuracy $0.6117



390it [00:14, 27.79it/s]

learning_rate $0.0009045084971874737
train_epoch_loss $1.0562708888298427
training_accuracy $0.6216546474358975



20it [00:00, 20.42it/s]

validation_loss $1.0803573727607727
accuracy $0.618



390it [00:14, 27.65it/s]

learning_rate $0.0008998423292435454
train_epoch_loss $1.0280085646189177
training_accuracy $0.6324519230769231



20it [00:00, 20.58it/s]

validation_loss $1.0410316973924636
accuracy $0.6321



390it [00:13, 27.86it/s]

learning_rate $0.0008950775061878451
train_epoch_loss $1.0091287637368227
training_accuracy $0.6360576923076923



20it [00:00, 20.43it/s]

validation_loss $1.0465686738491058
accuracy $0.6278



390it [00:14, 27.56it/s]

learning_rate $0.0008902152036691648
train_epoch_loss $0.985934654260293
training_accuracy $0.6449719551282052



20it [00:00, 20.42it/s]

validation_loss $0.9964129000902175
accuracy $0.6439



390it [00:13, 27.98it/s]

learning_rate $0.0008852566213878947
train_epoch_loss $0.9586502000307425
training_accuracy $0.6591746794871794



20it [00:01, 19.64it/s]

validation_loss $0.98975368142128
accuracy $0.6473



390it [00:14, 27.70it/s]

learning_rate $0.0008802029828000156
train_epoch_loss $0.9379285080310625
training_accuracy $0.6657451923076924



20it [00:00, 20.40it/s]

validation_loss $0.9540944367647171
accuracy $0.6617



390it [00:13, 27.86it/s]

learning_rate $0.0008750555348152298
train_epoch_loss $0.912512641839492
training_accuracy $0.6732972756410256



20it [00:00, 20.36it/s]

validation_loss $0.9487131506204605
accuracy $0.6646



390it [00:14, 27.85it/s]

learning_rate $0.0008698155474893048
train_epoch_loss $0.8980848396435762
training_accuracy $0.6814703525641026



20it [00:00, 20.41it/s]

validation_loss $0.933381587266922
accuracy $0.6688



390it [00:14, 27.81it/s]

learning_rate $0.0008644843137107057
train_epoch_loss $0.8748197709902739
training_accuracy $0.6876001602564102



20it [00:00, 20.47it/s]

validation_loss $0.921262127161026
accuracy $0.6785



390it [00:14, 27.73it/s]

learning_rate $0.0008590631488815944
train_epoch_loss $0.871652768055598
training_accuracy $0.689082532051282



20it [00:00, 20.07it/s]

validation_loss $0.8853641718626022
accuracy $0.6814



390it [00:13, 27.87it/s]


learning_rate $0.0008535533905932737
train_epoch_loss $0.8439184376826653
training_accuracy $0.6989783653846153


20it [00:00, 20.44it/s]

validation_loss $0.9011364161968232
accuracy $0.6857



390it [00:13, 27.91it/s]

learning_rate $0.0008479563982961571
train_epoch_loss $0.8233155774764526
training_accuracy $0.7071915064102564



20it [00:00, 20.43it/s]

validation_loss $0.883617952466011
accuracy $0.6893



390it [00:14, 27.82it/s]

learning_rate $0.0008422735529643444
train_epoch_loss $0.8142549054744916
training_accuracy $0.7085737179487179



20it [00:00, 20.47it/s]

validation_loss $0.8703748136758804
accuracy $0.6899



390it [00:13, 27.87it/s]


learning_rate $0.0008365062567548867
train_epoch_loss $0.7915561633232312
training_accuracy $0.7189703525641026


20it [00:00, 20.50it/s]

validation_loss $0.876249960064888
accuracy $0.6958



390it [00:14, 27.81it/s]

learning_rate $0.0008306559326618259
train_epoch_loss $0.7761059906238165
training_accuracy $0.7240785256410256



20it [00:00, 20.17it/s]

validation_loss $0.8689835608005524
accuracy $0.6983



390it [00:14, 27.80it/s]

learning_rate $0.0008247240241650919
train_epoch_loss $0.7626921397753251
training_accuracy $0.7292067307692308



20it [00:00, 20.23it/s]

validation_loss $0.8327942997217178
accuracy $0.7088



390it [00:14, 27.74it/s]

learning_rate $0.0008187119948743449
train_epoch_loss $0.7508070999231093
training_accuracy $0.7333133012820513



20it [00:00, 20.39it/s]

validation_loss $0.8440054357051849
accuracy $0.7075



390it [00:14, 27.82it/s]

learning_rate $0.0008126213281678527
train_epoch_loss $0.7326525713388736
training_accuracy $0.737139423076923



20it [00:00, 20.45it/s]

validation_loss $0.8400799214839936
accuracy $0.711



390it [00:14, 27.60it/s]

learning_rate $0.0008064535268264883
train_epoch_loss $0.7255843883905655
training_accuracy $0.7430689102564103



20it [00:00, 20.48it/s]

validation_loss $0.8154899120330811
accuracy $0.7131



390it [00:13, 27.99it/s]

learning_rate $0.0008002101126629421
train_epoch_loss $0.7080123661420284
training_accuracy $0.74765625



20it [00:00, 20.46it/s]

validation_loss $0.8037836581468583
accuracy $0.7199



390it [00:14, 27.75it/s]

learning_rate $0.0007938926261462367
train_epoch_loss $0.6930714343602841
training_accuracy $0.7552083333333334



20it [00:00, 20.44it/s]

validation_loss $0.794294011592865
accuracy $0.722



390it [00:14, 27.70it/s]

learning_rate $0.0007875026260216394
train_epoch_loss $0.6845162778328626
training_accuracy $0.7568910256410256



20it [00:00, 20.46it/s]

validation_loss $0.8081653237342834
accuracy $0.7216



390it [00:14, 27.64it/s]

learning_rate $0.0007810416889260654
train_epoch_loss $0.665192704093762
training_accuracy $0.7644230769230769



20it [00:00, 20.49it/s]

validation_loss $0.79520583152771
accuracy $0.7244



390it [00:14, 27.69it/s]

learning_rate $0.0007745114089990659
train_epoch_loss $0.6564163904923659
training_accuracy $0.7646834935897436



20it [00:00, 20.13it/s]

validation_loss $0.7907937377691269
accuracy $0.7272



390it [00:14, 27.81it/s]

learning_rate $0.0007679133974894983
train_epoch_loss $0.6456121773291856
training_accuracy $0.7700120192307692



20it [00:00, 20.29it/s]

validation_loss $0.8198938548564911
accuracy $0.7226



390it [00:14, 27.69it/s]

learning_rate $0.0007612492823579744
train_epoch_loss $0.6349927443724412
training_accuracy $0.7740384615384616



20it [00:00, 20.26it/s]

validation_loss $0.7843757718801498
accuracy $0.729



390it [00:13, 27.99it/s]

learning_rate $0.0007545207078751857
train_epoch_loss $0.6212086295470213
training_accuracy $0.7796274038461538



20it [00:00, 20.57it/s]

validation_loss $0.7886507958173752
accuracy $0.7275



390it [00:14, 27.74it/s]

learning_rate $0.0007477293342162038
train_epoch_loss $0.609708932806284
training_accuracy $0.7831330128205128



20it [00:00, 20.31it/s]

validation_loss $0.7867638170719147
accuracy $0.7316



390it [00:14, 27.76it/s]

learning_rate $0.0007408768370508576
train_epoch_loss $0.6018931495837676
training_accuracy $0.7878205128205128



20it [00:00, 20.50it/s]

validation_loss $0.7675913035869598
accuracy $0.7351



390it [00:14, 27.72it/s]

learning_rate $0.0007339649071302867
train_epoch_loss $0.580047000447909
training_accuracy $0.7922676282051282



20it [00:00, 20.31it/s]

validation_loss $0.8001179248094559
accuracy $0.7225



390it [00:14, 27.80it/s]

learning_rate $0.0007269952498697733
train_epoch_loss $0.5781194718220295
training_accuracy $0.7940104166666667



20it [00:00, 20.37it/s]

validation_loss $0.7596411168575287
accuracy $0.7393



390it [00:14, 27.77it/s]

learning_rate $0.0007199695849279575
train_epoch_loss $0.5629179168970156
training_accuracy $0.8001201923076923



20it [00:00, 20.32it/s]

validation_loss $0.778126671910286
accuracy $0.7337



390it [00:14, 27.62it/s]

learning_rate $0.0007128896457825364
train_epoch_loss $0.5505512801500467
training_accuracy $0.8047676282051283



20it [00:00, 20.36it/s]

validation_loss $0.7569810509681701
accuracy $0.7458



390it [00:14, 27.65it/s]


learning_rate $0.0007057571793025545
train_epoch_loss $0.538122983620717
training_accuracy $0.8079527243589744


20it [00:00, 20.41it/s]

validation_loss $0.787592938542366
accuracy $0.7353



390it [00:14, 27.69it/s]

learning_rate $0.0006985739453173903
train_epoch_loss $0.5318352367633429
training_accuracy $0.8127003205128205



20it [00:00, 20.41it/s]

validation_loss $0.7737095475196838
accuracy $0.7426



390it [00:13, 27.90it/s]

learning_rate $0.000691341716182545
train_epoch_loss $0.5176372563991791
training_accuracy $0.8167668269230769



20it [00:00, 20.57it/s]

validation_loss $0.8110078871250153
accuracy $0.7309



390it [00:14, 27.83it/s]


learning_rate $0.0006840622763423391
train_epoch_loss $0.5031613638767829
training_accuracy $0.8215144230769231


20it [00:00, 20.37it/s]

validation_loss $0.7629443407058716
accuracy $0.7458



390it [00:14, 27.75it/s]

learning_rate $0.0006767374218896287
train_epoch_loss $0.5006369802432182
training_accuracy $0.8210336538461539



20it [00:00, 20.51it/s]

validation_loss $0.7874647319316864
accuracy $0.7426



390it [00:13, 27.90it/s]

learning_rate $0.0006693689601226458
train_epoch_loss $0.4860086670288673
training_accuracy $0.8279847756410257



20it [00:00, 20.19it/s]

validation_loss $0.7715457320213318
accuracy $0.7452



390it [00:14, 27.64it/s]

learning_rate $0.0006619587090990747
train_epoch_loss $0.480144604964134
training_accuracy $0.8281850961538462



20it [00:00, 20.44it/s]

validation_loss $0.7742850035429001
accuracy $0.7425



390it [00:13, 27.95it/s]

learning_rate $0.0006545084971874737
train_epoch_loss $0.4711155712604523
training_accuracy $0.8318309294871795



20it [00:00, 20.31it/s]

validation_loss $0.7978027284145355
accuracy $0.7401



390it [00:14, 27.81it/s]

learning_rate $0.0006470201626161521
train_epoch_loss $0.46047918552007433
training_accuracy $0.8372395833333334



20it [00:00, 20.38it/s]

validation_loss $0.778309777379036
accuracy $0.7465



390it [00:14, 27.78it/s]

learning_rate $0.0006394955530196147
train_epoch_loss $0.44901072948406906
training_accuracy $0.8399038461538462



20it [00:00, 20.51it/s]

validation_loss $0.8206451296806335
accuracy $0.7396



390it [00:14, 27.80it/s]

learning_rate $0.0006319365249826864
train_epoch_loss $0.4434984875795169
training_accuracy $0.8404447115384616



20it [00:00, 20.46it/s]

validation_loss $0.793365079164505
accuracy $0.7429



390it [00:13, 27.92it/s]

learning_rate $0.0006243449435824273
train_epoch_loss $0.4322186900637089
training_accuracy $0.845713141025641



20it [00:00, 20.51it/s]

validation_loss $0.7970230847597122
accuracy $0.7411



390it [00:14, 27.59it/s]

learning_rate $0.0006167226819279528
train_epoch_loss $0.4250356565683316
training_accuracy $0.8464342948717949



20it [00:00, 20.03it/s]

validation_loss $0.8088563412427903
accuracy $0.7518



390it [00:14, 27.64it/s]

learning_rate $0.0006090716206982714
train_epoch_loss $0.4110876163228964
training_accuracy $0.8518229166666667



20it [00:00, 20.36it/s]

validation_loss $0.8325299441814422
accuracy $0.7457



390it [00:14, 27.75it/s]

learning_rate $0.0006013936476782563
train_epoch_loss $0.4097838138540586
training_accuracy $0.8540064102564102



20it [00:00, 20.37it/s]

validation_loss $0.7890234619379044
accuracy $0.7487



390it [00:14, 27.74it/s]

learning_rate $0.0005936906572928624
train_epoch_loss $0.3981086174264932
training_accuracy $0.8582131410256411



20it [00:00, 20.26it/s]

validation_loss $0.8262514621019363
accuracy $0.7489



390it [00:14, 27.68it/s]

learning_rate $0.0005859645501397047
train_epoch_loss $0.38242955387402805
training_accuracy $0.8645432692307692



20it [00:00, 20.40it/s]

validation_loss $0.8413332819938659
accuracy $0.7396



390it [00:14, 27.74it/s]

learning_rate $0.0005782172325201155
train_epoch_loss $0.38024619825375383
training_accuracy $0.8639423076923077



20it [00:01, 19.76it/s]

validation_loss $0.8334177672863007
accuracy $0.7453



390it [00:14, 27.78it/s]

learning_rate $0.0005704506159687914
train_epoch_loss $0.3694338714083036
training_accuracy $0.8667267628205129



20it [00:00, 20.62it/s]

validation_loss $0.8421260684728622
accuracy $0.7488



390it [00:14, 27.77it/s]

learning_rate $0.0005626666167821522
train_epoch_loss $0.3633411502226805
training_accuracy $0.8686698717948718



20it [00:00, 20.30it/s]

validation_loss $0.8150672614574432
accuracy $0.7506



390it [00:14, 27.71it/s]

learning_rate $0.0005548671555455227
train_epoch_loss $0.35262608314171817
training_accuracy $0.8728165064102564



20it [00:00, 20.44it/s]

validation_loss $0.8595090866088867
accuracy $0.7468



390it [00:14, 27.74it/s]

learning_rate $0.0005470541566592571
train_epoch_loss $0.3505734892991873
training_accuracy $0.8747195512820513



20it [00:01, 19.38it/s]

validation_loss $0.8553014308214187
accuracy $0.7407



390it [00:14, 27.57it/s]

learning_rate $0.0005392295478639225
train_epoch_loss $0.3393365965821804
training_accuracy $0.8784455128205129



20it [00:01, 19.53it/s]

validation_loss $0.8667489171028138
accuracy $0.7452



390it [00:14, 27.72it/s]

learning_rate $0.0005313952597646568
train_epoch_loss $0.33087391455968224
training_accuracy $0.8818309294871794



20it [00:00, 20.37it/s]

validation_loss $0.8589667767286301
accuracy $0.7434



390it [00:14, 27.69it/s]

learning_rate $0.0005235532253548213
train_epoch_loss $0.3172956337531408
training_accuracy $0.8877203525641025



20it [00:00, 20.29it/s]

validation_loss $0.8949192762374878
accuracy $0.744



390it [00:14, 27.62it/s]


learning_rate $0.0005157053795390641
train_epoch_loss $0.3174146089798365
training_accuracy $0.8872395833333333


20it [00:00, 20.34it/s]

validation_loss $0.8729402452707291
accuracy $0.7491



390it [00:14, 27.76it/s]


learning_rate $0.0005078536586559104
train_epoch_loss $0.3067512129743894
training_accuracy $0.8897636217948718


20it [00:00, 20.27it/s]

validation_loss $0.8748610258102417
accuracy $0.7545



390it [00:14, 27.69it/s]

learning_rate $0.0005
train_epoch_loss $0.3040734867636974
training_accuracy $0.8900440705128205



20it [00:00, 20.29it/s]

validation_loss $0.8764206647872925
accuracy $0.7441



390it [00:14, 27.76it/s]

learning_rate $0.0004921463413440898
train_epoch_loss $0.2910692730393165
training_accuracy $0.8963341346153846



20it [00:01, 19.84it/s]

validation_loss $0.9131333589553833
accuracy $0.7402



390it [00:14, 27.62it/s]

learning_rate $0.00048429462046093585
train_epoch_loss $0.27859058040074813
training_accuracy $0.9010416666666666



20it [00:00, 20.07it/s]

validation_loss $0.9241817593574524
accuracy $0.7436



390it [00:14, 27.76it/s]

learning_rate $0.00047644677464517873
train_epoch_loss $0.27388849296630957
training_accuracy $0.9019831730769231



20it [00:00, 20.25it/s]

validation_loss $0.9297260791063309
accuracy $0.7465



390it [00:14, 27.81it/s]

learning_rate $0.0004686047402353433
train_epoch_loss $0.2731588557171516
training_accuracy $0.9017027243589744



20it [00:00, 20.42it/s]

validation_loss $0.9026932865381241
accuracy $0.7477



390it [00:14, 27.83it/s]

learning_rate $0.0004607704521360776
train_epoch_loss $0.26066469026681705
training_accuracy $0.9071514423076923



20it [00:00, 20.33it/s]

validation_loss $0.9194805085659027
accuracy $0.7479



390it [00:14, 27.84it/s]

learning_rate $0.00045294584334074284
train_epoch_loss $0.25010168090080603
training_accuracy $0.909354967948718



20it [00:00, 20.20it/s]

validation_loss $0.9525965332984925
accuracy $0.7497



390it [00:14, 27.79it/s]

learning_rate $0.00044513284445447737
train_epoch_loss $0.24613081144216734
training_accuracy $0.9118389423076924



20it [00:01, 19.07it/s]

validation_loss $0.9617154151201248
accuracy $0.7499



390it [00:14, 27.84it/s]

learning_rate $0.00043733338321784795
train_epoch_loss $0.2475327358987087
training_accuracy $0.9127003205128205



20it [00:00, 20.35it/s]

validation_loss $0.9310107886791229
accuracy $0.7492



390it [00:14, 27.85it/s]

learning_rate $0.0004295493840312088
train_epoch_loss $0.2304821231808418
training_accuracy $0.91640625



20it [00:00, 20.69it/s]

validation_loss $1.009571298956871
accuracy $0.7395



390it [00:14, 27.74it/s]

learning_rate $0.0004217827674798846
train_epoch_loss $0.2258786318393854
training_accuracy $0.9177684294871795



20it [00:00, 20.42it/s]

validation_loss $0.9864678829908371
accuracy $0.7451



390it [00:14, 27.70it/s]

learning_rate $0.00041403544986029516
train_epoch_loss $0.2224900860625964
training_accuracy $0.9208934294871794



20it [00:00, 20.22it/s]

validation_loss $1.0122225999832153
accuracy $0.7449



390it [00:14, 27.61it/s]

learning_rate $0.0004063093427071377
train_epoch_loss $0.21553608463742793
training_accuracy $0.9229767628205128



20it [00:00, 20.36it/s]

validation_loss $1.0282833993434906
accuracy $0.7448



390it [00:14, 27.76it/s]

learning_rate $0.0003986063523217438
train_epoch_loss $0.20700006666473852
training_accuracy $0.9243389423076923



20it [00:00, 20.36it/s]

validation_loss $1.0224579125642776
accuracy $0.7455



390it [00:14, 27.74it/s]

learning_rate $0.0003909283793017289
train_epoch_loss $0.20422783893270371
training_accuracy $0.9252804487179487



20it [00:00, 20.02it/s]

validation_loss $1.0311570703983306
accuracy $0.7429



390it [00:14, 27.70it/s]

learning_rate $0.0003832773180720472
train_epoch_loss $0.19163549634126517
training_accuracy $0.930869391025641



20it [00:00, 20.12it/s]

validation_loss $1.0346927523612977
accuracy $0.7464



390it [00:14, 27.56it/s]

learning_rate $0.00037565505641757257
train_epoch_loss $0.1925438809566773
training_accuracy $0.9319310897435897



20it [00:00, 20.27it/s]

validation_loss $1.0794176012277603
accuracy $0.7428



390it [00:14, 27.74it/s]

learning_rate $0.0003680634750173136
train_epoch_loss $0.18163869170806346
training_accuracy $0.93515625



20it [00:00, 20.47it/s]


validation_loss $1.0694274574518203
accuracy $0.7447


390it [00:14, 27.73it/s]

learning_rate $0.00034549150281252644
train_epoch_loss $0.1745409454863805
training_accuracy $0.9373798076923077



20it [00:00, 20.40it/s]

validation_loss $1.067878058552742
accuracy $0.7453



390it [00:14, 27.76it/s]

learning_rate $0.0003380412909009253
train_epoch_loss $0.16364673534647012
training_accuracy $0.9411458333333333



20it [00:00, 20.22it/s]

validation_loss $1.1207835853099823
accuracy $0.7418



390it [00:13, 27.86it/s]

learning_rate $0.0003306310398773543
train_epoch_loss $0.16128419634814445
training_accuracy $0.9423878205128206



20it [00:00, 20.32it/s]

validation_loss $1.0950718462467193
accuracy $0.743



390it [00:14, 27.79it/s]


learning_rate $0.00032326257811037153
train_epoch_loss $0.15421243401674123
training_accuracy $0.9446113782051282


20it [00:00, 20.38it/s]

validation_loss $1.1154457569122314
accuracy $0.7468



390it [00:14, 27.69it/s]

learning_rate $0.00031593772365766116
train_epoch_loss $0.15099217449434293
training_accuracy $0.9454727564102564



20it [00:00, 20.40it/s]

validation_loss $1.1261526316404342
accuracy $0.747



390it [00:14, 27.78it/s]

learning_rate $0.0003086582838174553
train_epoch_loss $0.14109848000300237
training_accuracy $0.9487580128205129



20it [00:00, 20.37it/s]

validation_loss $1.1709278672933578
accuracy $0.7416



390it [00:14, 27.75it/s]

learning_rate $0.00030142605468260966
train_epoch_loss $0.14095956568534557
training_accuracy $0.9502403846153846



20it [00:00, 20.32it/s]

validation_loss $1.130906280875206
accuracy $0.7496



390it [00:14, 27.73it/s]

learning_rate $0.0002942428206974456
train_epoch_loss $0.12887398643562428
training_accuracy $0.9531450320512821



20it [00:00, 20.34it/s]

validation_loss $1.1731509536504745
accuracy $0.7486



390it [00:13, 27.91it/s]

learning_rate $0.00028711035421746366
train_epoch_loss $0.12774620943535597
training_accuracy $0.9540865384615385



20it [00:00, 20.38it/s]

validation_loss $1.1743193686008453
accuracy $0.7502



390it [00:14, 27.74it/s]

learning_rate $0.0002800304150720424
train_epoch_loss $0.12120531058082214
training_accuracy $0.9569511217948717



20it [00:00, 20.32it/s]

validation_loss $1.1681968867778778
accuracy $0.75



390it [00:14, 27.80it/s]

learning_rate $0.00027300475013022663
train_epoch_loss $0.12192462995075262
training_accuracy $0.955508814102564



20it [00:00, 20.00it/s]

validation_loss $1.1983146101236344
accuracy $0.7435



390it [00:14, 27.74it/s]

learning_rate $0.0002660350928697134
train_epoch_loss $0.11558869256136509
training_accuracy $0.9597355769230769



20it [00:01, 19.50it/s]

validation_loss $1.2312986522912979
accuracy $0.7462



390it [00:14, 27.86it/s]

learning_rate $0.0002591231629491423
train_epoch_loss $0.1131618963363461
training_accuracy $0.9596955128205128



20it [00:00, 20.45it/s]

validation_loss $1.2465851545333861
accuracy $0.7442



390it [00:14, 27.70it/s]

learning_rate $0.0002522706657837962
train_epoch_loss $0.10905567262894832
training_accuracy $0.9612980769230769



20it [00:00, 20.29it/s]

validation_loss $1.211368665099144
accuracy $0.7558



390it [00:14, 27.79it/s]

learning_rate $0.00024547929212481435
train_epoch_loss $0.10241907221289018
training_accuracy $0.9625400641025641



20it [00:00, 20.29it/s]

validation_loss $1.2527888715267181
accuracy $0.7515



390it [00:14, 27.79it/s]

learning_rate $0.00023875071764202561
train_epoch_loss $0.09455078791062801
training_accuracy $0.9662059294871795



20it [00:00, 20.34it/s]

validation_loss $1.2480239540338516
accuracy $0.7485



390it [00:14, 27.74it/s]

learning_rate $0.00023208660251050156
train_epoch_loss $0.09810422661308295
training_accuracy $0.9650040064102564



20it [00:00, 20.38it/s]

validation_loss $1.270172443985939
accuracy $0.7506



390it [00:14, 27.73it/s]

learning_rate $0.00022548859100093404
train_epoch_loss $0.09354723351410567
training_accuracy $0.967167467948718



20it [00:00, 20.39it/s]

validation_loss $1.285894250869751
accuracy $0.7474



390it [00:14, 27.76it/s]

learning_rate $0.00021895831107393465
train_epoch_loss $0.08818345706527814
training_accuracy $0.96875



20it [00:00, 20.32it/s]

validation_loss $1.3183585822582244
accuracy $0.7459



390it [00:14, 27.63it/s]

learning_rate $0.00021249737397836073
train_epoch_loss $0.08850263222956505
training_accuracy $0.9687299679487179



20it [00:00, 20.40it/s]

validation_loss $1.29307963848114
accuracy $0.7493



390it [00:14, 27.81it/s]

learning_rate $0.00020610737385376348
train_epoch_loss $0.0822907557233404
training_accuracy $0.9707331730769231



20it [00:00, 20.38it/s]

validation_loss $1.3555684983730316
accuracy $0.7467



390it [00:14, 27.79it/s]

learning_rate $0.00019978988733705805
train_epoch_loss $0.07940131172251243
training_accuracy $0.9721554487179487



20it [00:00, 20.43it/s]

validation_loss $1.3358876645565032
accuracy $0.7465



390it [00:14, 27.73it/s]

learning_rate $0.00019354647317351172
train_epoch_loss $0.07718543384510737
training_accuracy $0.9732371794871795



20it [00:00, 20.18it/s]

validation_loss $1.3269465446472168
accuracy $0.7508



390it [00:14, 27.76it/s]

learning_rate $0.0001873786718321474
train_epoch_loss $0.07101920917868997
training_accuracy $0.9743589743589743



20it [00:00, 20.34it/s]

validation_loss $1.3452618837356567
accuracy $0.7479



390it [00:14, 27.76it/s]

learning_rate $0.00018128800512565513
train_epoch_loss $0.06934105479277862
training_accuracy $0.9752003205128205



20it [00:00, 20.37it/s]

validation_loss $1.3652603089809419
accuracy $0.747



390it [00:13, 27.93it/s]

learning_rate $0.00017527597583490823
train_epoch_loss $0.06517972490774133
training_accuracy $0.9776041666666667



20it [00:00, 20.34it/s]

validation_loss $1.3918651521205903
accuracy $0.7471



390it [00:14, 27.75it/s]

learning_rate $0.00016934406733817414
train_epoch_loss $0.06973635116114449
training_accuracy $0.9755008012820513



20it [00:00, 20.52it/s]

validation_loss $1.376160305738449
accuracy $0.7474



390it [00:14, 27.79it/s]

learning_rate $0.00016349374324511328
train_epoch_loss $0.0597736845843685
training_accuracy $0.9791266025641026



20it [00:00, 20.43it/s]

validation_loss $1.3882879674434663
accuracy $0.7502



390it [00:14, 27.79it/s]

learning_rate $0.00015772644703565563
train_epoch_loss $0.06050457898288583
training_accuracy $0.978946314102564



20it [00:00, 20.38it/s]

validation_loss $1.395979654788971
accuracy $0.7441



390it [00:14, 27.81it/s]

learning_rate $0.00015204360170384285
train_epoch_loss $0.05472434553055045
training_accuracy $0.9808693910256411



20it [00:00, 20.40it/s]

validation_loss $1.4177612006664275
accuracy $0.7508



390it [00:14, 27.85it/s]

learning_rate $0.00014644660940672628
train_epoch_loss $0.053346805510899195
training_accuracy $0.9814102564102564



20it [00:00, 20.37it/s]

validation_loss $1.4497814118862151
accuracy $0.7514



390it [00:14, 27.82it/s]


learning_rate $0.00014093685111840566
train_epoch_loss $0.055183318978509845
training_accuracy $0.9803886217948717


20it [00:00, 20.49it/s]

validation_loss $1.431303286552429
accuracy $0.7506



390it [00:14, 27.65it/s]

learning_rate $0.00013551568628929433
train_epoch_loss $0.050395573064302784
training_accuracy $0.9822716346153846



20it [00:00, 20.38it/s]

validation_loss $1.4702468514442444
accuracy $0.7501



390it [00:14, 27.84it/s]

learning_rate $0.0001301844525106951
train_epoch_loss $0.04814940524192002
training_accuracy $0.9833333333333333



20it [00:00, 20.10it/s]

validation_loss $1.4492357850074769
accuracy $0.7485



390it [00:14, 27.79it/s]

learning_rate $0.0001249444651847702
train_epoch_loss $0.04711603321588766
training_accuracy $0.9837139423076923



20it [00:00, 20.42it/s]

validation_loss $1.4686648666858673
accuracy $0.7477



390it [00:13, 27.93it/s]

learning_rate $0.00011979701719998454
train_epoch_loss $0.04652196337492802
training_accuracy $0.9841546474358974



20it [00:00, 20.03it/s]

validation_loss $1.455789613723755
accuracy $0.7529



390it [00:14, 27.67it/s]

learning_rate $0.00011474337861210544
train_epoch_loss $0.04109969642729713
training_accuracy $0.9853365384615385



20it [00:00, 20.34it/s]

validation_loss $1.511158925294876
accuracy $0.7514



80it [00:02, 27.93it/s]

In [None]:
model