### Train optimizer
Notebook to train different optimizer to compare

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import os
import sys
import time
import torch
import torch.optim as optim
import torch.nn as nn
from tqdm.notebook import trange, tqdm

In [3]:
# Run this cell only if you run this notebook on Google Colab.
from google.colab import drive
drive.mount('/content/drive')

sys.path.append('/content/drive/MyDrive/Colab_Notebooks/OptiML/OptiForML2022')

Mounted at /content/drive


In [4]:
from optimizer.AdaHessian import AdaHessian
from optimizer.Atmo import Atmo, MASScheduler
from dataset import ImagesDataset
from model import ResNet18
from path import TRAIN_HISTORY_DIR, TRAIN_MODEL_DIR

In [5]:
# choose dataset
#data_name = "MNIST"
data_name = "cifar"
full = True
tiny = False

# choose optimizer
optimizer_name = "adam"
optimizer_name = "sgd"
optimizer_name = "atmo"
#optimizer_name = "dynamicAtom"
#optimizer_name = "adaHessian"

# choose scheduler
scheduler_name = None
scheduler_name = "cosineAnnealinglr"
scheduler_name = "steplr"

# choose nb of epochs
epochs = 350

In [6]:
# load dataset
train_dataset = ImagesDataset(full = full, tiny=tiny, cifar=(data_name=="cifar"))
test_dataset = ImagesDataset(full = full, tiny=tiny, cifar=(data_name=="cifar"), test=True)


trainDataLoader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True)
testDataLoader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=True)

* Using CIFAR
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar10/cifar-10-python.tar.gz to ./data/cifar10/
** Use 50000 train samples
* Using CIFAR
Files already downloaded and verified
** Use 10000 test samples


In [7]:
# load model
model = ResNet18(in_channel=1 if data_name=="MNIST" else 3)
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(device)
model.train()
model.to(device)
model.load_state_dict(torch.load(os.path.join(TRAIN_MODEL_DIR, f"{optimizer_name}_{scheduler_name}_{epochs}epochs_{data_name}.pt")))
model.eval()

# load optimizer
if optimizer_name == "adam":
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
if optimizer_name == "sgd":
    optimizer = optim.SGD(model.parameters(), lr=1e-3)
if optimizer_name == "atmo":
    optimizer = Atmo(model.parameters(), lr = 1e-5)
if optimizer_name == "dynamicAtom":
    optimizer = Atmo(model.parameters(), adam_w=1, sgd_w=0)
    dynamic = MASScheduler(optimizer, epochs = epochs)
if optimizer_name == "adaHessian":
    optimizer = AdaHessian(model.parameters())

# load scheduler
if scheduler_name is not None:
    if scheduler_name == "cosineAnnealinglr":
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs, verbose=True)
    if scheduler_name == "steplr":
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=150, verbose=True)


criterion = nn.CrossEntropyLoss()

with open(os.path.join(TRAIN_HISTORY_DIR, f"log-{optimizer_name}_{scheduler_name}_{epochs}epochs_{data_name}.pickle"), 'rb') as f:
    history = pickle.load(f)

cuda
Adjusting learning rate of group 0 to 1.0000e-05.


In [8]:
train_loss = history['train_loss']
train_acc = history['train_acc']
test_loss = history['test_loss']
test_acc = history['test_acc']
time_epoch = history['time_epoch']
epochs_done = len(train_loss)

with trange(epochs_done, epochs + 1, desc='Training', unit='epoch') as t:
  for epoch in t:
    losses = []
    acc = []
    start_time = time.time()
    with tqdm(trainDataLoader, desc=f'Train epoch {epoch}',
              unit='batch', leave=False) as t1:
      for x_train, y_train in t1:
        x_train = x_train.to(device)
        y_train = y_train.to(device)

        optimizer.zero_grad()
        output = model(x_train)
        loss = criterion(output, y_train)
        loss.backward(create_graph=(optimizer_name=="adaHessian"))
        optimizer.step()

        losses.append(loss)
        pred = torch.argmax(output, axis = 1)
        acc.append(sum(pred == y_train).item()/pred.shape[0])

    if optimizer_name=="dynamicAtmo":
          dynamic.step()

    train_loss.append(sum(losses)/len(losses))
    train_acc.append(sum(acc)/len(acc))

    losses = []
    acc = []
    with torch.no_grad():
      with tqdm(testDataLoader, desc=f'Test epoch {epoch}',
                unit='batch', leave=False) as t1:
        for x_test, y_test in t1:
          x_test = x_test.to(device)
          y_test = y_test.to(device)

          output = model(x_test)
          loss = criterion(output, y_test)
          losses.append(loss)

          pred = torch.argmax(output, axis = 1)
          acc.append(sum(pred == y_test).item()/pred.shape[0])

      test_loss.append(sum(losses)/len(losses))
      test_acc.append(sum(acc)/len(acc))

      end_time = time.time()
      time_epoch.append(end_time-start_time)
    if scheduler_name is not None:
          scheduler.step()
    history = dict()
    history["train_loss"] = train_loss
    history["train_acc"] = train_acc
    history["test_loss"] = test_loss
    history["test_acc"] = test_acc
    history["time_epoch"] = time_epoch
    history["data"] = data_name
    history["model"] = "resnet18"
    history["epochs"] = epochs
    history["optimizer"] = optimizer_name
    with open(os.path.join(TRAIN_HISTORY_DIR, f"log-{optimizer_name}_{scheduler_name}_{epochs}epochs_{data_name}.pickle"), 'wb') as f:
      pickle.dump(history, f)
    weights = model.state_dict()
    torch.save(weights,os.path.join(TRAIN_MODEL_DIR, f"{optimizer_name}_{scheduler_name}_{epochs}epochs_{data_name}.pt"))
#mean by epoch
time_epoch = sum(time_epoch)/len(time_epoch)

Training:   0%|          | 0/137 [00:00<?, ?epoch/s]

Train epoch 214:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 214:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 215:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 215:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 216:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 216:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 217:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 217:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 218:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 218:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 219:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 219:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 220:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 220:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 221:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 221:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 222:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 222:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 223:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 223:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 224:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 224:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 225:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 225:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 226:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 226:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 227:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 227:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 228:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 228:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 229:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 229:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 230:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 230:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 231:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 231:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 232:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 232:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 233:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 233:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 234:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 234:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 235:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 235:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 236:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 236:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 237:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 237:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 238:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 238:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 239:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 239:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 240:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 240:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 241:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 241:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 242:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 242:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 243:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 243:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 244:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 244:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 245:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 245:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 246:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 246:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 247:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 247:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 248:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 248:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 249:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 249:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 250:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 250:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 251:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 251:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 252:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 252:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 253:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 253:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 254:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 254:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 255:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 255:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 256:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 256:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 257:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 257:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 258:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 258:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 259:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 259:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 260:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 260:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 261:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 261:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 262:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 262:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 263:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 263:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 264:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 264:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 265:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 265:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 266:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 266:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 267:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 267:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 268:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 268:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 269:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 269:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 270:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 270:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 271:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 271:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 272:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 272:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 273:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 273:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 274:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 274:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 275:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 275:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 276:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 276:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 277:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 277:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 278:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 278:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 279:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 279:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 280:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 280:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 281:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 281:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 282:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 282:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 283:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 283:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 284:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 284:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 285:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 285:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 286:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 286:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 287:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 287:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 288:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 288:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 289:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 289:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 290:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 290:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 291:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 291:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 292:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 292:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 293:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 293:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 294:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 294:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 295:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 295:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 296:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 296:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 297:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 297:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 298:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 298:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 299:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 299:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 300:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 300:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 301:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 301:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 302:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 302:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 303:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 303:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 304:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 304:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 305:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 305:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 306:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 306:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 307:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 307:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 308:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 308:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 309:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 309:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 310:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 310:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 311:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 311:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 312:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 312:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 313:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 313:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 314:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 314:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 315:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 315:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 316:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 316:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 317:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 317:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 318:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 318:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 319:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 319:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 320:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 320:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 321:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 321:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 322:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 322:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 323:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 323:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 324:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 324:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 325:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 325:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 326:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 326:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 327:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 327:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 328:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 328:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 329:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 329:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 330:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 330:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 331:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 331:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 332:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 332:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 333:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 333:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 334:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 334:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 335:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 335:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 336:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 336:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 337:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 337:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 338:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 338:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 339:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 339:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 340:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 340:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 341:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 341:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 342:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 342:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 343:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 343:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 344:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 344:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 345:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 345:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 346:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 346:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 347:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 347:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 348:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 348:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 349:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 349:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


Train epoch 350:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 350:   0%|          | 0/40 [00:00<?, ?batch/s]

Adjusting learning rate of group 0 to 1.0000e-05.


In [9]:
# run this cell to save history
history = dict()
history["train_loss"] = train_loss
history["train_acc"] = train_acc
history["test_loss"] = test_loss
history["test_acc"] = test_acc
history["time_epoch"] = time_epoch
history["data"] = data_name
history["model"] = "resnet18"
history["epochs"] = epochs
history["optimizer"] = optimizer_name
with open(os.path.join(TRAIN_HISTORY_DIR, f"log-{optimizer_name}_{epochs}epochs_{data_name}.pickle"), 'wb') as f:
  pickle.dump(history, f)

In [10]:
# run this cell to save model
weights = model.state_dict()
torch.save(weights,os.path.join(TRAIN_MODEL_DIR, f"{optimizer_name}_{epochs}epochs_{data_name}.pt"))