# 0 - Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')
import os

#ROOT_PATH = "/content/drive/MyDrive/EPFL/optMLProject_Nicolas²_Oliver/" # Nicolas P.
ROOT_PATH = "/content/drive/MyDrive/optMLProject_Nicolas²_Oliver/" # Nicolas B.
#ROOT_PATH = "/content/drive/MyDrive/ ... / optMLProject_Nicolas²_Oliver/" # Oliver D.

print(os.listdir(ROOT_PATH))

Mounted at /content/drive
['.DS_Store', 'README.md', '.vscode', '.git', 'models', 'utils', 'data', 'optimizers', 'Hyperparameters Search.gdoc', 'more_info_hyperparameters_search.xlsx', 'optML.ipynb', 'hyperparameters_search.xlsx']


In [2]:
%load_ext autoreload
%autoreload 2

#Imports

import sys
sys.path.append(ROOT_PATH)

import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn

import torch
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

!pip install Lion_pytorch
from lion_pytorch import Lion

from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold


from utils import constants as cst
from utils import auxiliaries as aux

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting Lion_pytorch
  Downloading lion_pytorch-0.1.2-py3-none-any.whl (4.4 kB)
Installing collected packages: Lion_pytorch
Successfully installed Lion_pytorch-0.1.2


# 1 - Grid-Search for Hyperparameters  

In [3]:
########### 1st cell ###########

######### TO MANUALLY SET UP #########

# choices : resnet18, resnet101
model_name = "resnet18" 

# choices : SGD, SGDM, RMS, ADAM, LION
optimizer_name = "LION"

######### TO MANUALLY SET UP #########

# Just checking
assert model_name in ["resnet18", "resnet101"]
assert optimizer_name in ["SGD", "SGDM", "RMS", "ADAM", "LION"]

In [4]:
########### 2nd cell ###########

# Retrieve the hyperparamaters 
hyperparams_sheet_path = ROOT_PATH + "hyperparameters_search.xlsx"
hyperparams = aux.get_hyperparams(model_name, optimizer_name, hyperparams_sheet_path)


print(f"HYPERPARAMS CHECK : {hyperparams}")

### The path for saving the best model ###
model_path = ROOT_PATH + "models/"  + model_name + "/"
assert os.path. isdir(model_path)

file_name = model_name + '-' + optimizer_name
save_path = model_path + file_name
print(f"SAVE-PATH CHECK : {save_path}")
### The path for saving the best model ###

# Creates the config
config = dict(
    model=model_name,
    optimizer=optimizer_name,
)
print(f"CONFIG CHECK : {config}")

# Creates the folds 
folds = aux.get_folds()

HYPERPARAMS CHECK : {'lr': [9e-06, 3e-05], 'weight_decay': [1.0, 0.5], 'momentum': [0.0]}
SAVE-PATH CHECK : /content/drive/MyDrive/optMLProject_Nicolas²_Oliver/models/resnet18/resnet18-LION
CONFIG CHECK : {'model': 'resnet18', 'optimizer': 'LION'}
Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:05<00:00, 28247362.21it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified


In [6]:
########### 3rd cell ###########

######### RUN the Grid-Search #########
(all_accuracies, 
all_hyperparams, 
best_accuracy, 
best_hyperparams) = aux.grid_search(
    model_name, 
    optimizer_name, 
    hyperparams, 
    folds,
    save_path)

# Store the accuracies for all hyperparameters combinations in the sheet
aux.store_grid_search_results(all_accuracies, all_hyperparams, best_accuracy.item(), best_hyperparams, model_name, optimizer_name, hyperparams_sheet_path)

--- Working with LION on resnet18 ---
Hyperparameters: 
Learning Rate : 9e-06 
Weight Decay : 1.0 
Momentum : 0.0
----- Training on fold # 1 -----
Epoch: 1|10


Training: 100%|██████████| 157/157 [00:23<00:00,  6.76it/s]


Epoch: 2|10


Training: 100%|██████████| 157/157 [00:22<00:00,  6.86it/s]


Epoch: 3|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.18it/s]


Epoch: 4|10


Training: 100%|██████████| 157/157 [00:24<00:00,  6.41it/s]


Epoch: 5|10


Training: 100%|██████████| 157/157 [00:22<00:00,  6.94it/s]


Epoch: 6|10


Training: 100%|██████████| 157/157 [00:22<00:00,  7.06it/s]


Epoch: 7|10


Training: 100%|██████████| 157/157 [00:23<00:00,  6.81it/s]


Epoch: 8|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.21it/s]


Epoch: 9|10


Training: 100%|██████████| 157/157 [00:23<00:00,  6.79it/s]


Epoch: 10|10


Training: 100%|██████████| 157/157 [00:22<00:00,  6.88it/s]


Average lost on the last bacthes : -13.449067497253418


Testing: 100%|██████████| 10/10 [00:04<00:00,  2.08it/s]


Accuracy : 0.15139999985694885

 Best Hyperparameters so far, Model Saved !
Best Accuracy : 0.15139999985694885
--- Working with LION on resnet18 ---
Hyperparameters: 
Learning Rate : 9e-06 
Weight Decay : 0.5 
Momentum : 0.0
----- Training on fold # 1 -----
Epoch: 1|10


Training: 100%|██████████| 157/157 [00:22<00:00,  6.85it/s]


Epoch: 2|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.30it/s]


Epoch: 3|10


Training: 100%|██████████| 157/157 [00:22<00:00,  6.88it/s]


Epoch: 4|10


Training: 100%|██████████| 157/157 [00:22<00:00,  7.06it/s]


Epoch: 5|10


Training: 100%|██████████| 157/157 [00:23<00:00,  6.68it/s]


Epoch: 6|10


Training: 100%|██████████| 157/157 [00:22<00:00,  6.87it/s]


Epoch: 7|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.37it/s]


Epoch: 8|10


Training: 100%|██████████| 157/157 [00:22<00:00,  7.00it/s]


Epoch: 9|10


Training: 100%|██████████| 157/157 [00:22<00:00,  7.08it/s]


Epoch: 10|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.32it/s]


Average lost on the last bacthes : -13.589619636535645


Testing: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]


Accuracy : 0.14720000326633453
--- Working with LION on resnet18 ---
Hyperparameters: 
Learning Rate : 3e-05 
Weight Decay : 1.0 
Momentum : 0.0
----- Training on fold # 1 -----
Epoch: 1|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.33it/s]


Epoch: 2|10


Training: 100%|██████████| 157/157 [00:22<00:00,  6.95it/s]


Epoch: 3|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.31it/s]


Epoch: 4|10


Training: 100%|██████████| 157/157 [00:22<00:00,  7.05it/s]


Epoch: 5|10


Training: 100%|██████████| 157/157 [00:23<00:00,  6.64it/s]


Epoch: 6|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.31it/s]


Epoch: 7|10


Training: 100%|██████████| 157/157 [00:22<00:00,  7.01it/s]


Epoch: 8|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.39it/s]


Epoch: 9|10


Training: 100%|██████████| 157/157 [00:22<00:00,  6.92it/s]


Epoch: 10|10


Training: 100%|██████████| 157/157 [00:22<00:00,  7.06it/s]


Average lost on the last bacthes : -38.297804260253905


Testing: 100%|██████████| 10/10 [00:05<00:00,  1.97it/s]


Accuracy : 0.1339000016450882
--- Working with LION on resnet18 ---
Hyperparameters: 
Learning Rate : 3e-05 
Weight Decay : 0.5 
Momentum : 0.0
----- Training on fold # 1 -----
Epoch: 1|10


Training: 100%|██████████| 157/157 [00:22<00:00,  6.95it/s]


Epoch: 2|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.31it/s]


Epoch: 3|10


Training: 100%|██████████| 157/157 [00:22<00:00,  6.95it/s]


Epoch: 4|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.37it/s]


Epoch: 5|10


Training: 100%|██████████| 157/157 [00:22<00:00,  6.98it/s]


Epoch: 6|10


Training: 100%|██████████| 157/157 [00:23<00:00,  6.62it/s]


Epoch: 7|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.30it/s]


Epoch: 8|10


Training: 100%|██████████| 157/157 [00:22<00:00,  6.91it/s]


Epoch: 9|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.15it/s]


Epoch: 10|10


Training: 100%|██████████| 157/157 [00:21<00:00,  7.14it/s]


Average lost on the last bacthes : -39.99157028198242


Testing: 100%|██████████| 10/10 [00:05<00:00,  1.96it/s]


Accuracy : 0.15649999678134918

 Best Hyperparameters so far, Model Saved !
Best Accuracy : 0.15649999678134918


# 5 - Oliver's trainings

In [None]:
train_plot_info = []
test_plot_info = []
PLOT_GRANULARITY = 50

## SGDDM

In [None]:
#Train using SGD with momentum
model_sgdm = NN()
sgdm = optim.SGD(model_sgdm.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
criterion_sgdm = nn.NLLLoss()

train_plot_info.append(train_model(model_sgdm, sgdm, criterion_sgdm, "SGD with Momentum", training_set, EPOCHS, PLOT_GRANULARITY))
test_plot_info.append(test_model(model_sgdm, criterion_sgdm, test_set, "SGD with Momentum"))

## RMS

In [None]:
#Train using RMSProp
model_rms = NN()
rms = optim.RMSprop(model_rms.parameters(), lr=LEARNING_RATE, momentum=0)
criterion_rms = nn.NLLLoss()
train_plot_info.append(train_model(model_rms, rms, criterion_rms, "RMSProp", training_set, EPOCHS, PLOT_GRANULARITY))
test_plot_info.append(test_model(model_rms, criterion_rms, test_set, "RMSProp"))

## LION

In [None]:
#Train using Lion
model_lion = NN()
lion = Lion(model_lion.parameters(), lr=1e-4, weight_decay=1e-2)
criterion_lion = nn.NLLLoss()

train_plot_info.append(train_model(model_lion, lion, criterion_lion, "Lion", training_set, EPOCHS, PLOT_GRANULARITY))
test_plot_info.append(test_model(model_lion, criterion_lion, test_set, "Lion"))

## SGD

In [None]:
#Train using SGD
model_sgd = NN()
sgd = optim.SGD(model_sgd.parameters(), lr=LEARNING_RATE, momentum=0)
criterion_sgd = nn.NLLLoss()

train_plot_info.append(train_model(model_sgd, sgd, criterion_sgd, "SGD", training_set, EPOCHS, PLOT_GRANULARITY))
test_plot_info.append(test_model(model_sgd, criterion_sgd, test_set, "SGD"))

## ADAM

In [None]:
#Train using Adam
model_adam = NN()
adam = optim.Adam(model_adam.parameters(), lr=LEARNING_RATE)
criterion_adam = nn.NLLLoss()

train_plot_info.append(train_model(model_adam, adam, criterion_adam, "Adam", training_set, EPOCHS, PLOT_GRANULARITY))
test_plot_info.append(test_model(model_adam, criterion_adam, test_set, "Adam"))

## Plots

In [None]:
train_plot_info[0]

In [None]:
fig1 = plt.figure()

a, b = train_plot_info[0]
plt.plot(a, b)

In [None]:
fig, axs = plt.subplots(figsize=(10,8))
[plt.plot(c,a) for (a,_,c,_) in train_plot_info]
axs.legend([d for (a,b,c,d) in train_plot_info], loc='upper right')
plt.xlabel('Training Examples')
plt.ylabel('NLL loss')
plt.ylim([0,3])
plt.grid()
plt.show()

In [None]:
fig, axs = plt.subplots(figsize=(10,8))
acc_pairs = [(c,b) for (a,b,c) in test_plot_info]
sorted_acc_pairs = sorted(acc_pairs,key=lambda t: t[1], reverse=True)
sorted_names = [a[0] for a in sorted_acc_pairs]
sorted_acc = [a[1] for a in sorted_acc_pairs]
axs.bar(sorted_names, sorted_acc, log=True)
plt.title("Accuracy per Optimizer")
plt.grid()
plt.show()

In [None]:
fig, axs = plt.subplots(figsize=(10,8))
loss_pairs = [(c,a) for (a,b,c) in test_plot_info]
sorted_loss_pairs = sorted(loss_pairs,key=lambda t: t[1], reverse=False)
sorted_names = [a[0] for a in sorted_loss_pairs]
sorted_loss = [a[1] for a in sorted_loss_pairs]
axs.bar(sorted_names, sorted_loss)
plt.title("Generalization Error per Optimizer")
plt.grid()
plt.show()

In [None]:
#TODO: Can also plot execution times (including hardware information), in case there are differences

In [None]:
models=['resnet18','resnet101']
optimizers=['SGD','SGDM','RMS','ADAM','LION']
dataset='Cifar100'
batch_size=256   #/!\ add to cst
num_epoch=10   #/!\ add to cst
train_plot_info = []
test_plot_info = []
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
training_loader, test_loader = aux.get_dataset(batch_size,dataset, shuffle_train=True)
for m in models:
  for o in optimizers:
    config=dict(dataset, model=m, optimizer=o)
    model = aux.get_model(config, device)
    optimizer, scheduler = aux.get_optimizer(config, model.parameters())
    criterion = torch.nn.CrossEntropyLoss()
    train_plot_info.append(aux.train_model(
                model=model, 
                optimizer=optimizer,
                criterion = criterion, 
                store_name = o, 
                training_loader = training_loader, 
                device = device,
                epochs = num_epoch, 
                plot_granularity = cst.PLOT_GRANULARITY,
                model_save_path = ROOT_PATH + "models/",
                optimizer_save_path = ROOT_PATH + "optimizers/",
            ))
      test_plot_info.append(aux.test_model(model=model, criterion=criterion, test_loader=test_loader, device = device, name=o))


#do the plot for everything somehow


In [None]:

fig, axs = plt.subplots(figsize=(10,8))
[plt.plot(c,a) for (a,_,c,_) in train_plot_info]
axs.legend([d for (a,b,c,d) in train_plot_info], loc='upper right')
plt.xlabel('Training Examples')
plt.ylabel('NLL loss')
plt.ylim([0,3])
plt.grid()
plt.show()


In [None]:
fig, axs = plt.subplots(figsize=(10,8))
acc_pairs = [(c,b) for (a,b,c,d) in test_plot_info]
sorted_acc_pairs = sorted(acc_pairs,key=lambda t: t[1], reverse=True)
sorted_names = [a[0] for a in sorted_acc_pairs]
sorted_acc = [a[1] for a in sorted_acc_pairs]
axs.bar(sorted_names, sorted_acc, log=True)
plt.title("Accuracy per Optimizer")
plt.grid()
plt.show()

In [None]:
fig, axs = plt.subplots(figsize=(10,8))
loss_pairs = [(c,a) for (a,b,c,d) in test_plot_info]
sorted_loss_pairs = sorted(loss_pairs,key=lambda t: t[1], reverse=False)
sorted_names = [a[0] for a in sorted_loss_pairs]
sorted_loss = [a[1] for a in sorted_loss_pairs]
axs.bar(sorted_names, sorted_loss)
plt.title("Generalization Error per Optimizer")
plt.grid()
plt.show()