### Train optimizer
Notebook to train different optimizer to compare

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import os
import sys
import time
import torch
import torch.optim as optim
import torch.nn as nn
from tqdm.notebook import trange, tqdm

In [3]:
# Run this cell only if you run this notebook on Google Colab.
from google.colab import drive
drive.mount('/content/drive')

sys.path.append('/content/drive/MyDrive/Colab_Notebooks/OptiML/OptiForML2022')

Mounted at /content/drive


In [4]:
from optimizer.AdaHessian import AdaHessian
from optimizer.Atmo import Atmo, MASScheduler
from optimizer.AdaSGD import AdaSGD, AdaSGDscheduler
from dataset import ImagesDataset
from model import ResNet18
from path import TRAIN_HISTORY_DIR, TRAIN_MODEL_DIR

In [5]:
# choose dataset
#data_name = "MNIST"
data_name = "cifar"
full = True
tiny = False

# choose optimizer
optimizer_name = "adam"
optimizer_name = "sgd"
optimizer_name = "atmo"
optimizer_name = "dynamicAtmo"
optimizer_name = "adaHessian"
#optimizer_name = "adaSGD"

# choose scheduler
#scheduler_name = None
scheduler_name = "cosineAnnealinglr"
#scheduler_name = "steplr"
#scheduler_name = "multiSteplr"

# choose nb of epochs
epochs = 160
#if tiny:
#  epochs = 50
#else:
#  epochs = 10

In [6]:
# load dataset
train_dataset = ImagesDataset(full = full, tiny=tiny, cifar=(data_name=="cifar"))
test_dataset = ImagesDataset(full = full, tiny=tiny, cifar=(data_name=="cifar"), test=True)


trainDataLoader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True)
testDataLoader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=True)

* Using CIFAR
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar10/cifar-10-python.tar.gz to ./data/cifar10/
** Use 50000 train samples
* Using CIFAR
Files already downloaded and verified
** Use 10000 test samples


In [7]:
# load model
model = ResNet18(in_channel=1 if data_name=="MNIST" else 3)
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(device)
model.train()
model.to(device)
#model.load_state_dict(torch.load(os.path.join(TRAIN_MODEL_DIR, f"{optimizer_name}_{scheduler_name}_{epochs}epochs_{data_name}.pt")))

dynamic = None

# load optimizer
if optimizer_name == "adam":
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
if optimizer_name == "sgd":
    optimizer = optim.SGD(model.parameters(), lr=1e-3)
if optimizer_name == "atmo":
    optimizer = Atmo(model.parameters())
if optimizer_name == "dynamicAtmo":
    optimizer = Atmo(model.parameters(), lr = 0.01, adam_w=1, sgd_w=0, momentum = 0.95, weight_decay = 0.0005)
    dynamic = MASScheduler(optimizer, epochs = epochs)
if optimizer_name == "adaHessian":
    optimizer = AdaHessian(model.parameters(), lr = 0.15)
if optimizer_name == "adaSGD":
    optimizer = AdaSGD(model.parameters(), lr = 0.1, ada_w = 1, sgd_w = 0)
    dynamic = AdaSGDscheduler(optimizer, epochs = 100) # do only 100 epochs with both optim then only sgd

# load scheduler
if scheduler_name is not None:
    if scheduler_name == "cosineAnnealinglr":
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs, verbose=True)
    if scheduler_name == "steplr":
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=150, verbose=True)
    if scheduler_name == "multiSteplr":
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones = [80, 120], gamma = 0.1, verbose = False)



criterion = nn.CrossEntropyLoss()
#with open(os.path.join(TRAIN_HISTORY_DIR, f"log-{optimizer_name}_{scheduler_name}_{epochs}epochs_{data_name}.pickle"), 'rb') as f:
#    history = pickle.load(f)

cuda
Adjusting learning rate of group 0 to 1.5000e-01.


In [8]:
train_loss = [] #history['train_loss']
train_acc = [] #history['train_acc']
test_loss = [] #history['test_loss']
test_acc = [] #history['test_acc']
time_epoch = [] #history['time_epoch']
epochs_done = 1 #len(train_loss)

with trange(epochs_done, epochs + 1, desc='Training', unit='epoch') as t:
  for epoch in t:
    losses = []
    acc = []
    start_time = time.time()
    with tqdm(trainDataLoader, desc=f'Train epoch {epoch}',
              unit='batch', leave=False) as t1:
      for x_train, y_train in t1:
        x_train = x_train.to(device)
        y_train = y_train.to(device)

        optimizer.zero_grad()
        output = model(x_train)
        loss = criterion(output, y_train)
        loss.backward(create_graph=(optimizer_name=="adaHessian"))
        optimizer.step()

        losses.append(loss)
        pred = torch.argmax(output, axis = 1)
        acc.append(sum(pred == y_train).item()/pred.shape[0])

    if dynamic is not None:
          dynamic.step()

    train_loss.append(sum(losses)/len(losses))
    train_acc.append(sum(acc)/len(acc))

    losses = []
    acc = []
    with torch.no_grad():
      with tqdm(testDataLoader, desc=f'Test epoch {epoch}',
                unit='batch', leave=False) as t1:
        for x_test, y_test in t1:
          x_test = x_test.to(device)
          y_test = y_test.to(device)

          output = model(x_test)
          loss = criterion(output, y_test)
          losses.append(loss)

          pred = torch.argmax(output, axis = 1)
          acc.append(sum(pred == y_test).item()/pred.shape[0])

      test_loss.append(sum(losses)/len(losses))
      test_acc.append(sum(acc)/len(acc))

      end_time = time.time()
      time_epoch.append(end_time-start_time)
    if scheduler_name is not None:
          scheduler.step()
    history = dict()
    history["train_loss"] = train_loss
    history["train_acc"] = train_acc
    history["test_loss"] = test_loss
    history["test_acc"] = test_acc
    history["time_epoch"] = time_epoch
    history["data"] = data_name
    history["model"] = "resnet18"
    history["epochs"] = epochs
    history["optimizer"] = optimizer_name
    with open(os.path.join(TRAIN_HISTORY_DIR, f"log-{optimizer_name}_{scheduler_name}_{epochs}epochs_{data_name}_lr_0.01.pickle"), 'wb') as f:
      pickle.dump(history, f)
    weights = model.state_dict()
    torch.save(weights,os.path.join(TRAIN_MODEL_DIR, f"{optimizer_name}_{scheduler_name}_{epochs}epochs_{data_name}_lr_0.01.pt"))
#mean by epoch
time_epoch = sum(time_epoch)/len(time_epoch)

Training:   0%|          | 0/160 [00:00<?, ?epoch/s]

Train epoch 1:   0%|          | 0/196 [00:00<?, ?batch/s]

  allow_unreachable=True, accumulate_grad=True)  # Calls into the C++ engine to run the backward pass


Test epoch 1:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4999e-01.


Train epoch 2:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 2:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4994e-01.


Train epoch 3:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 3:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4987e-01.


Train epoch 4:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 4:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4977e-01.


Train epoch 5:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 5:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4964e-01.


Train epoch 6:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 6:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4948e-01.


Train epoch 7:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 7:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4929e-01.


Train epoch 8:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 8:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4908e-01.


Train epoch 9:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 9:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4883e-01.


Train epoch 10:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 10:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4856e-01.


Train epoch 11:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 11:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4826e-01.


Train epoch 12:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 12:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4793e-01.


Train epoch 13:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 13:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4757e-01.


Train epoch 14:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 14:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4718e-01.


Train epoch 15:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 15:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4677e-01.


Train epoch 16:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 16:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4633e-01.


Train epoch 17:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 17:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4586e-01.


Train epoch 18:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 18:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4536e-01.


Train epoch 19:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 19:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4484e-01.


Train epoch 20:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 20:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4429e-01.


Train epoch 21:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 21:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4371e-01.


Train epoch 22:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 22:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4311e-01.


Train epoch 23:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 23:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4248e-01.


Train epoch 24:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 24:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4183e-01.


Train epoch 25:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 25:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4114e-01.


Train epoch 26:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 26:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4044e-01.


Train epoch 27:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 27:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3971e-01.


Train epoch 28:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 28:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3895e-01.


Train epoch 29:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 29:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3817e-01.


Train epoch 30:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 30:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3736e-01.


Train epoch 31:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 31:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3653e-01.


Train epoch 32:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 32:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3568e-01.


Train epoch 33:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 33:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3480e-01.


Train epoch 34:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 34:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3390e-01.


Train epoch 35:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 35:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3298e-01.


Train epoch 36:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 36:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3203e-01.


Train epoch 37:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 37:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3106e-01.


Train epoch 38:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 38:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3007e-01.


Train epoch 39:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 39:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.2906e-01.


Train epoch 40:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 40:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.2803e-01.


Train epoch 41:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 41:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.2698e-01.


Train epoch 42:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 42:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.2591e-01.


Train epoch 43:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 43:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.2482e-01.


Train epoch 44:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 44:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.2371e-01.


Train epoch 45:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 45:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.2258e-01.


Train epoch 46:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 46:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.2143e-01.


Train epoch 47:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 47:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.2027e-01.


Train epoch 48:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 48:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.1908e-01.


Train epoch 49:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 49:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.1788e-01.


Train epoch 50:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 50:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.1667e-01.


Train epoch 51:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 51:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.1544e-01.


Train epoch 52:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 52:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.1419e-01.


Train epoch 53:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 53:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.1292e-01.


Train epoch 54:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 54:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.1165e-01.


Train epoch 55:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 55:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.1035e-01.


Train epoch 56:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 56:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0905e-01.


Train epoch 57:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 57:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0773e-01.


Train epoch 58:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 58:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0640e-01.


Train epoch 59:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 59:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0506e-01.


Train epoch 60:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 60:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0370e-01.


Train epoch 61:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 61:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0234e-01.


Train epoch 62:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 62:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0096e-01.


Train epoch 63:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 63:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 9.9572e-02.


Train epoch 64:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 64:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 9.8176e-02.


Train epoch 65:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 65:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 9.6771e-02.


Train epoch 66:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 66:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 9.5358e-02.


Train epoch 67:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 67:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 9.3937e-02.


Train epoch 68:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 68:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 9.2508e-02.


Train epoch 69:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 69:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 9.1073e-02.


Train epoch 70:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 70:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 8.9632e-02.


Train epoch 71:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 71:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 8.8185e-02.


Train epoch 72:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 72:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 8.6733e-02.


Train epoch 73:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 73:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 8.5276e-02.


Train epoch 74:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 74:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 8.3815e-02.


Train epoch 75:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 75:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 8.2351e-02.


Train epoch 76:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 76:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 8.0884e-02.


Train epoch 77:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 77:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 7.9415e-02.


Train epoch 78:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 78:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 7.7944e-02.


Train epoch 79:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 79:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 7.6473e-02.


Train epoch 80:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 80:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 7.5000e-02.


Train epoch 81:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 81:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 7.3527e-02.


Train epoch 82:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 82:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 7.2056e-02.


Train epoch 83:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 83:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 7.0585e-02.


Train epoch 84:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 84:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 6.9116e-02.


Train epoch 85:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 85:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 6.7649e-02.


Train epoch 86:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 86:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 6.6185e-02.


Train epoch 87:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 87:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 6.4724e-02.


Train epoch 88:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 88:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 6.3267e-02.


Train epoch 89:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 89:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 6.1815e-02.


Train epoch 90:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 90:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 6.0368e-02.


Train epoch 91:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 91:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 5.8927e-02.


Train epoch 92:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 92:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 5.7492e-02.


Train epoch 93:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 93:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 5.6063e-02.


Train epoch 94:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 94:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 5.4642e-02.


Train epoch 95:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 95:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 5.3229e-02.


Train epoch 96:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 96:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 5.1824e-02.


Train epoch 97:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 97:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 5.0428e-02.


Train epoch 98:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 98:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 4.9041e-02.


Train epoch 99:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 99:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 4.7665e-02.


Train epoch 100:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 100:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 4.6299e-02.


Train epoch 101:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 101:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 4.4944e-02.


Train epoch 102:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 102:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 4.3601e-02.


Train epoch 103:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 103:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 4.2269e-02.


Train epoch 104:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 104:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 4.0951e-02.


Train epoch 105:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 105:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 3.9645e-02.


Train epoch 106:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 106:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 3.8353e-02.


Train epoch 107:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 107:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 3.7076e-02.


Train epoch 108:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 108:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 3.5813e-02.


Train epoch 109:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 109:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 3.4565e-02.


Train epoch 110:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 110:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 3.3332e-02.


Train epoch 111:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 111:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 3.2116e-02.


Train epoch 112:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 112:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 3.0916e-02.


Train epoch 113:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 113:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.9733e-02.


Train epoch 114:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 114:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.8568e-02.


Train epoch 115:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 115:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.7421e-02.


Train epoch 116:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 116:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.6291e-02.


Train epoch 117:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 117:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.5181e-02.


Train epoch 118:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 118:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.4090e-02.


Train epoch 119:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 119:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.3018e-02.


Train epoch 120:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 120:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.1967e-02.


Train epoch 121:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 121:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.0936e-02.


Train epoch 122:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 122:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.9926e-02.


Train epoch 123:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 123:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.8937e-02.


Train epoch 124:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 124:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.7970e-02.


Train epoch 125:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 125:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.7024e-02.


Train epoch 126:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 126:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.6101e-02.


Train epoch 127:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 127:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.5201e-02.


Train epoch 128:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 128:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4324e-02.


Train epoch 129:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 129:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3470e-02.


Train epoch 130:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 130:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.2640e-02.


Train epoch 131:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 131:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.1834e-02.


Train epoch 132:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 132:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.1052e-02.


Train epoch 133:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 133:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0295e-02.


Train epoch 134:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 134:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 9.5628e-03.


Train epoch 135:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 135:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 8.8559e-03.


Train epoch 136:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 136:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 8.1745e-03.


Train epoch 137:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 137:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 7.5189e-03.


Train epoch 138:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 138:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 6.8893e-03.


Train epoch 139:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 139:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 6.2859e-03.


Train epoch 140:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 140:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 5.7090e-03.


Train epoch 141:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 141:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 5.1589e-03.


Train epoch 142:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 142:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 4.6356e-03.


Train epoch 143:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 143:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 4.1395e-03.


Train epoch 144:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 144:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 3.6708e-03.


Train epoch 145:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 145:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 3.2295e-03.


Train epoch 146:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 146:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.8159e-03.


Train epoch 147:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 147:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.4301e-03.


Train epoch 148:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 148:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.0723e-03.


Train epoch 149:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 149:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.7426e-03.


Train epoch 150:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 150:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4411e-03.


Train epoch 151:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 151:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.1680e-03.


Train epoch 152:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 152:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 9.2337e-04.


Train epoch 153:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 153:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 7.0730e-04.


Train epoch 154:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 154:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 5.1987e-04.


Train epoch 155:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 155:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 3.6115e-04.


Train epoch 156:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 156:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 2.3120e-04.


Train epoch 157:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 157:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.3008e-04.


Train epoch 158:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 158:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 5.7822e-05.


Train epoch 159:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 159:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.4457e-05.


Train epoch 160:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 160:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 0.0000e+00.


In [9]:
# run this cell to save history
history = dict()
history["train_loss"] = train_loss
history["train_acc"] = train_acc
history["test_loss"] = test_loss
history["test_acc"] = test_acc
history["time_epoch"] = time_epoch
history["data"] = data_name
history["model"] = "resnet18"
history["epochs"] = epochs
history["optimizer"] = optimizer_name
with open(os.path.join(TRAIN_HISTORY_DIR, f"log-{optimizer_name}_{epochs}epochs_{data_name}_lr_0.01.pickle"), 'wb') as f:
  pickle.dump(history, f)

In [10]:
# run this cell to save model
weights = model.state_dict()
torch.save(weights,os.path.join(TRAIN_MODEL_DIR, f"{optimizer_name}_{epochs}epochs_{data_name}_lr_0.01.pt"))