### Train optimizer
Notebook to train different optimizer to compare

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pickle
import os
import sys
import time
import torch
import torch.optim as optim
import torch.nn as nn
from tqdm.notebook import trange, tqdm

In [4]:
sys.path.append('/content/drive/MyDrive/Colab_Notebooks/OptiML/OptiForML2022')

In [5]:
from optimizer.AdaHessian import AdaHessian
from optimizer.Atmo import Atmo, MASScheduler
from dataset import ImagesDataset
from model import ResNet18
from path import TRAIN_HISTORY_DIR, TRAIN_MODEL_DIR

In [6]:
# choose dataset
# data_name = "MNIST"
data_name = "cifar"
full = True
tiny = False

# choose optimizer
#optimizer_name = "adam"
#optimizer_name = "sgd"
#optimizer_name = "atmo"
#optimizer_name = "dynamicAtom"
optimizer_name = "adaHessian"

# choose nb of epochs
epochs = 350

In [7]:
# load dataset
train_dataset = ImagesDataset(full = full, tiny=tiny, cifar=(data_name=="cifar"))
test_dataset = ImagesDataset(full = full, tiny=tiny, cifar=(data_name=="cifar"), test=True)


trainDataLoader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True)
testDataLoader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=True)

* Using CIFAR
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar10/cifar-10-python.tar.gz to ./data/cifar10/
** Use 50000 train samples
* Using CIFAR
Files already downloaded and verified
** Use 10000 test samples


In [8]:
# load model
model = ResNet18(in_channel=1 if data_name=="MNIST" else 3)
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(device)
model.train()
model.to(device)
model.load_state_dict(torch.load(os.path.join(TRAIN_MODEL_DIR, f"{optimizer_name}_{epochs}epochs_{data_name}.pt")))
model.eval()
with open(os.path.join(TRAIN_HISTORY_DIR, f"log-{optimizer_name}_{epochs}epochs_{data_name}.pickle"), 'rb') as f:
    history = pickle.load(f)

# load optimizer
if optimizer_name == "adam":
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
if optimizer_name == "sgd":
    optimizer = optim.SGD(model.parameters(), lr=1e-3)
if optimizer_name == "atmo":
    optimizer = Atmo(model.parameters())
if optimizer_name == "dynamicAtom":
    optimizer = Atmo(model.parameters(), adam_w=1, sgd_w=0)
    dynamic = MASScheduler(optimizer, epochs = epochs)
if optimizer_name == "adaHessian":
    optimizer = AdaHessian(model.parameters())

criterion = nn.CrossEntropyLoss()

cuda


In [9]:
train_loss = history['train_loss']
train_acc = history['train_acc']
test_loss = history['test_loss']
test_acc = history['test_acc']
time_epoch = history['time_epoch']
epoch_done = len(train_loss)

with trange(epoch_done, epochs + 1, desc='Training', unit='epoch') as t:
  for epoch in t:
    losses = []
    acc = []
    start_time = time.time()
    with tqdm(trainDataLoader, desc=f'Train epoch {epoch}',
              unit='batch', leave=False) as t1:
      for x_train, y_train in t1:
        x_train = x_train.to(device)
        y_train = y_train.to(device)

        optimizer.zero_grad()
        output = model(x_train)
        loss = criterion(output, y_train)
        loss.backward(create_graph=(optimizer_name=="adaHessian"))
        optimizer.step()

        losses.append(loss)
        pred = torch.argmax(output, axis = 1)
        acc.append(sum(pred == y_train).item()/pred.shape[0])

    if optimizer_name=="dynamicAtmo":
          dynamic.step()

    train_loss.append(sum(losses)/len(losses))
    train_acc.append(sum(acc)/len(acc))

    losses = []
    acc = []
    with torch.no_grad():
      with tqdm(testDataLoader, desc=f'Test epoch {epoch}',
                unit='batch', leave=False) as t1:
        for x_test, y_test in t1:
          x_test = x_test.to(device)
          y_test = y_test.to(device)

          output = model(x_test)
          loss = criterion(output, y_test)
          losses.append(loss)

          pred = torch.argmax(output, axis = 1)
          acc.append(sum(pred == y_test).item()/pred.shape[0])

      test_loss.append(sum(losses)/len(losses))
      test_acc.append(sum(acc)/len(acc))

      end_time = time.time()
      time_epoch.append(end_time-start_time)
    weights = model.state_dict()
    torch.save(weights,os.path.join(TRAIN_MODEL_DIR, f"{optimizer_name}_{epochs}epochs_{data_name}.pt"))
    history = dict()
    history["train_loss"] = train_loss
    history["train_acc"] = train_acc
    history["test_loss"] = test_loss
    history["test_acc"] = test_acc
    history["time_epoch"] = time_epoch
    history["data"] = data_name
    history["model"] = "resnet18"
    history["epochs"] = epochs
    history["optimizer"] = optimizer_name
    with open(os.path.join(TRAIN_HISTORY_DIR, f"log-{optimizer_name}_{epochs}epochs_{data_name}.pickle"), 'wb') as f:
      pickle.dump(history, f)

#mean by epoch
time_epoch = sum(time_epoch)/len(time_epoch)

Training:   0%|          | 0/67 [00:00<?, ?epoch/s]

Train epoch 284:   0%|          | 0/196 [00:00<?, ?batch/s]

  allow_unreachable=True, accumulate_grad=True)  # Calls into the C++ engine to run the backward pass


Test epoch 284:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 285:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 285:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 286:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 286:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 287:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 287:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 288:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 288:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 289:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 289:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 290:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 290:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 291:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 291:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 292:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 292:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 293:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 293:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 294:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 294:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 295:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 295:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 296:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 296:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 297:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 297:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 298:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 298:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 299:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 299:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 300:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 300:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 301:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 301:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 302:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 302:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 303:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 303:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 304:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 304:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 305:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 305:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 306:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 306:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 307:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 307:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 308:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 308:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 309:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 309:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 310:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 310:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 311:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 311:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 312:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 312:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 313:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 313:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 314:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 314:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 315:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 315:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 316:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 316:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 317:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 317:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 318:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 318:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 319:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 319:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 320:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 320:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 321:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 321:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 322:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 322:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 323:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 323:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 324:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 324:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 325:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 325:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 326:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 326:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 327:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 327:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 328:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 328:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 329:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 329:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 330:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 330:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 331:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 331:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 332:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 332:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 333:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 333:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 334:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 334:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 335:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 335:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 336:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 336:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 337:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 337:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 338:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 338:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 339:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 339:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 340:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 340:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 341:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 341:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 342:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 342:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 343:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 343:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 344:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 344:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 345:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 345:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 346:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 346:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 347:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 347:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 348:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 348:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 349:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 349:   0%|          | 0/40 [00:00<?, ?batch/s]

Train epoch 350:   0%|          | 0/196 [00:00<?, ?batch/s]

Test epoch 350:   0%|          | 0/40 [00:00<?, ?batch/s]

In [10]:
# run this cell to save history
history = dict()
history["train_loss"] = train_loss
history["train_acc"] = train_acc
history["test_loss"] = test_loss
history["test_acc"] = test_acc
history["time_epoch"] = time_epoch
history["data"] = data_name
history["model"] = "resnet18"
history["epochs"] = epochs
history["optimizer"] = optimizer_name
with open(os.path.join(TRAIN_HISTORY_DIR, f"log-{optimizer_name}_{epochs}epochs_{data_name}.pickle"), 'wb') as f:
  pickle.dump(history, f)

In [11]:
# run this cell to save model
weights = model.state_dict()
torch.save(weights,os.path.join(TRAIN_MODEL_DIR, f"{optimizer_name}_{epochs}epochs_{data_name}.pt"))

In [6]:
%cd drive/MyDrive/Colab_Notebooks/OptiML/OptiForML2022

/content/drive/MyDrive/Colab_Notebooks/OptiML/OptiForML2022


In [None]:
!git status
!git pull

On branch main
Your branch is ahead of 'origin/main' by 1 commit.
  (use "git push" to publish your local commits)

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git checkout -- <file>..." to discard changes in working directory)

	[31mmodified:   train_optimizer_copie_Louis.ipynb[m

no changes added to commit (use "git add" and/or "git commit -a")


In [9]:
!git config --global user.email "louis@gmail.com"
!git config --global user.name "Louis"

!git add -A
!git commit -m "adahessian"
!git push

[main 5d6e313] adahessian
 3 files changed, 1 insertion(+), 357 deletions(-)
 create mode 100644 output/pretrained_model/adaHessian_350epochs_cifar.pt
 create mode 100644 output/training_history/log-adaHessian_350epochs_cifar.pickle
 rewrite train_optimizer_copie_Louis.ipynb (97%)
To https://github.com/louisPoulain/OptiForML2022.git
 ! [rejected]        main -> main (fetch first)
error: failed to push some refs to 'https://louisPoulain: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@github.com/louisPoulain/OptiForML2022.git'
hint: Updates were rejected because the remote contains work that you do
hint: not have locally. This is usually caused by another repository pushing
hint: to the same ref. You may want to first integrate the remote changes
hint: (e.g., 'git pull ...') before pushing again.
hint: See the 'Note about fast-forwards' in 'git push --help' for details.


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
