### Train optimizer
Notebook to train different optimizer to compare

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import os
import sys
import time
import torch
import torch.optim as optim
import torch.nn as nn
from tqdm.notebook import trange, tqdm

In [3]:
# Run this cell only if you run this notebook on Google Colab.
from google.colab import drive
drive.mount('/content/drive')

sys.path.append('/content/drive/My Drive/OptiForML2022')

Mounted at /content/drive


In [4]:
from optimizer.AdaHessian import AdaHessian
from optimizer.Atmo import Atmo, MASScheduler
from optimizer.Padam_SGD import PadamSGDWeighted
from dataset import ImagesDataset
from model import ResNet18
from path import TRAIN_HISTORY_DIR, TRAIN_MODEL_DIR

In [5]:
# choose dataset
# data_name = "MNIST"
data_name = "cifar"
full = True
tiny = False

# choose optimizer
#optimizer_name = "adam"
#optimizer_name = "sgd"
#optimizer_name = "atmo"
optimizer_name = "dynamicAtom"
# optimizer_name = "adaHessian"
# optimizer_name = "padam_sgd"
#optimizer_name = "padam_sgd_dyn"

# choose scheduler
#scheduler_name = None
#scheduler_name = "cosineAnnealinglr"
#scheduler_name = "steplr"
scheduler_name = "multilr"

# choose nb of epochs
epochs = 200

In [6]:
# load dataset
train_dataset = ImagesDataset(full = full, tiny=tiny, cifar=(data_name=="cifar"))
test_dataset = ImagesDataset(full = full, tiny=tiny, cifar=(data_name=="cifar"), test=True)


trainDataLoader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
testDataLoader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True)

* Using CIFAR
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar10/cifar-10-python.tar.gz to ./data/cifar10/
** Use 50000 train samples
* Using CIFAR
Files already downloaded and verified
** Use 10000 test samples


In [7]:
# load model
model = ResNet18(in_channel=1 if data_name=="MNIST" else 3)
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(device)
model.train()
model.to(device)

# load optimizer
if optimizer_name == "adam":
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
if optimizer_name == "sgd":
    optimizer = optim.SGD(model.parameters(), lr=1e-3)
if optimizer_name == "atmo":
    optimizer = Atmo(model.parameters())
if optimizer_name == "dynamicAtom":
    optimizer = Atmo(model.parameters(), adam_w=1, sgd_w=0, lr=0.01, weight_decay=2.5e-2, momentum=0.9)
    dynamic = MASScheduler(optimizer, 1, 0, 100)
if optimizer_name == "adaHessian":
    optimizer = AdaHessian(model.parameters())
if optimizer_name == "padam_sgd":
    optimizer = PadamSGDWeighted(model.parameters(), adam_w=0.5, sgd_w=0.5, lr=0.01, weight_decay=2.5e-2, partial=0.125, momentum=0.9)
if optimizer_name == "padam_sgd_dyn":
    optimizer = PadamSGDWeighted(model.parameters(), adam_w=1, sgd_w=0, lr=0.01, weight_decay=2.5e-2, partial=0.125, momentum=0.9)
    dynamic = MASScheduler(optimizer, 1, 0, 100)

# load scheduler
if scheduler_name is not None:
    if scheduler_name == "cosineAnnealinglr":
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
    if scheduler_name == "steplr":
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=150)
    if scheduler_name == "multilr":
      scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1)


criterion = nn.CrossEntropyLoss()

cuda


In [8]:
train_loss = []
train_acc = []
test_loss = []
test_acc = []
time_epoch = []

with trange(1, epochs + 1, desc='Training', unit='epoch') as t:
  for epoch in t:
    losses = []
    acc = []
    start_time = time.time()
    with tqdm(trainDataLoader, desc=f'Train epoch {epoch}',
              unit='batch', leave=False) as t1:
      for x_train, y_train in t1:
        x_train = x_train.to(device)
        y_train = y_train.to(device)

        optimizer.zero_grad()
        output = model(x_train)
        loss = criterion(output, y_train)
        loss.backward(create_graph=(optimizer_name=="adaHessian"))
        optimizer.step()

        losses.append(loss)
        pred = torch.argmax(output, axis = 1)
        acc.append(sum(pred == y_train).item()/pred.shape[0])

    if optimizer_name in ["dynamicAtmo", "padam_sgd_dyn"]:
          dynamic.step()

    train_loss.append(sum(losses)/len(losses))
    train_acc.append(sum(acc)/len(acc))

    losses = []
    acc = []
    with torch.no_grad():
      with tqdm(testDataLoader, desc=f'Test epoch {epoch}',
                unit='batch', leave=False) as t1:
        for x_test, y_test in t1:
          x_test = x_test.to(device)
          y_test = y_test.to(device)

          output = model(x_test)
          loss = criterion(output, y_test)
          losses.append(loss)

          pred = torch.argmax(output, axis = 1)
          acc.append(sum(pred == y_test).item()/pred.shape[0])

      test_loss.append(sum(losses)/len(losses))
      test_acc.append(sum(acc)/len(acc))

      end_time = time.time()
      time_epoch.append(end_time-start_time)
    if scheduler_name is not None:
          scheduler.step()

#mean by epoch
time_epoch = sum(time_epoch)/len(time_epoch)

Training:   0%|          | 0/200 [00:00<?, ?epoch/s]

Train epoch 1:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 1:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 2:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 2:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 3:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 3:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 4:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 4:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 5:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 5:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 6:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 6:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 7:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 7:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 8:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 8:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 9:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 9:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 10:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 10:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 11:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 11:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 12:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 12:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 13:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 13:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 14:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 14:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 15:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 15:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 16:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 16:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 17:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 17:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 18:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 18:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 19:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 19:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 20:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 20:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 21:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 21:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 22:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 22:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 23:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 23:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 24:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 24:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 25:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 25:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 26:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 26:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 27:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 27:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 28:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 28:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 29:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 29:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 30:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 30:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 31:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 31:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 32:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 32:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 33:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 33:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 34:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 34:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 35:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 35:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 36:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 36:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 37:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 37:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 38:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 38:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 39:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 39:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 40:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 40:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 41:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 41:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 42:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 42:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 43:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 43:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 44:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 44:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 45:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 45:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 46:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 46:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 47:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 47:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 48:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 48:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 49:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 49:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 50:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 50:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 51:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 51:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 52:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 52:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 53:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 53:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 54:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 54:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 55:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 55:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 56:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 56:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 57:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 57:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 58:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 58:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 59:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 59:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 60:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 60:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 61:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 61:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 62:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 62:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 63:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 63:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 64:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 64:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 65:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 65:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 66:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 66:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 67:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 67:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 68:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 68:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 69:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 69:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 70:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 70:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 71:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 71:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 72:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 72:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 73:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 73:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 74:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 74:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 75:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 75:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 76:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 76:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 77:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 77:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 78:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 78:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 79:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 79:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 80:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 80:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 81:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 81:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 82:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 82:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 83:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 83:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 84:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 84:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 85:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 85:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 86:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 86:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 87:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 87:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 88:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 88:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 89:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 89:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 90:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 90:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 91:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 91:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 92:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 92:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 93:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 93:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 94:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 94:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 95:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 95:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 96:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 96:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 97:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 97:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 98:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 98:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 99:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 99:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 100:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 100:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 101:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 101:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 102:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 102:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 103:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 103:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 104:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 104:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 105:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 105:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 106:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 106:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 107:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 107:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 108:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 108:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 109:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 109:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 110:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 110:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 111:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 111:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 112:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 112:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 113:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 113:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 114:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 114:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 115:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 115:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 116:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 116:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 117:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 117:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 118:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 118:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 119:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 119:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 120:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 120:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 121:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 121:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 122:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 122:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 123:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 123:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 124:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 124:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 125:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 125:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 126:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 126:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 127:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 127:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 128:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 128:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 129:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 129:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 130:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 130:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 131:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 131:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 132:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 132:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 133:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 133:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 134:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 134:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 135:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 135:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 136:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 136:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 137:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 137:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 138:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 138:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 139:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 139:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 140:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 140:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 141:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 141:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 142:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 142:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 143:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 143:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 144:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 144:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 145:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 145:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 146:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 146:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 147:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 147:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 148:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 148:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 149:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 149:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 150:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 150:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 151:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 151:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 152:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 152:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 153:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 153:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 154:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 154:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 155:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 155:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 156:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 156:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 157:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 157:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 158:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 158:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 159:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 159:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 160:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 160:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 161:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 161:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 162:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 162:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 163:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 163:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 164:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 164:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 165:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 165:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 166:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 166:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 167:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 167:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 168:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 168:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 169:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 169:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 170:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 170:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 171:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 171:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 172:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 172:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 173:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 173:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 174:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 174:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 175:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 175:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 176:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 176:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 177:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 177:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 178:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 178:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 179:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 179:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 180:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 180:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 181:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 181:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 182:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 182:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 183:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 183:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 184:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 184:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 185:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 185:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 186:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 186:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 187:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 187:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 188:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 188:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 189:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 189:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 190:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 190:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 191:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 191:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 192:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 192:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 193:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 193:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 194:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 194:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 195:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 195:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 196:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 196:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 197:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 197:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 198:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 198:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 199:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 199:   0%|          | 0/79 [00:00<?, ?batch/s]

Train epoch 200:   0%|          | 0/391 [00:00<?, ?batch/s]

Test epoch 200:   0%|          | 0/79 [00:00<?, ?batch/s]

In [9]:
# run this cell to save history
history = dict()
history["train_loss"] = train_loss
history["train_acc"] = train_acc
history["test_loss"] = test_loss
history["test_acc"] = test_acc
history["time_epoch"] = time_epoch
history["data"] = data_name
history["model"] = "resnet18"
history["epochs"] = epochs
history["optimizer"] = optimizer_name
with open(os.path.join(TRAIN_HISTORY_DIR, f"log-{optimizer_name}_{epochs}epochs_{scheduler_name}_{data_name}.pickle"), 'wb') as f:
  pickle.dump(history, f)

In [10]:
# run this cell to save model
weights = model.state_dict()
torch.save(weights,os.path.join(TRAIN_MODEL_DIR, f"{optimizer_name}_{epochs}epochs_{scheduler_name}_{data_name}.pt"))