In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from adan_pytorch import Adan
import copy
from utils import preparte_data_loader
from utils import set_parameter_requires_grad
from utils import DEVICE, LOSS_CRITERIA
import time
from utils import BertForSequenceLabeling

import matplotlib.pyplot as plt
from utils import DEVICE
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
# batch_size = 64
classes = 10
epochs = 10
device=DEVICE
loss_criteria = LOSS_CRITERIA
train_dataloader = preparte_data_loader(mode='train', batch_size=20)
val_dataloader = preparte_data_loader(mode='val', batch_size=20)
dataloaders = {
    'train':train_dataloader,
    'val':val_dataloader
}



def optune_optimizer_for_model(trial):
    bert_model = BertForSequenceLabeling()
    model = bert_model.to(DEVICE)
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)  # log=True, will use log scale to interplolate between lr
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "SGD","Adan"])
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name == 'SGD':
        sgd_momentum = trial.suggest_float("sgd_momentum", 1e-1, 1)
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=sgd_momentum, nesterov=True)
    elif optimizer_name == 'Adan':
        beta1 = trial.suggest_float("beta1", 1e-3, 1e-1)
        beta2 = trial.suggest_float("beta2", 1e-3, 1e-1)
        beta3 = trial.suggest_float("beta3", 1e-3, 1e-1)
        optimizer = Adan(model.parameters(),lr = lr,
            betas = (beta1, beta2, beta3), 
            weight_decay = 0.02         # weight decay 0.02 is optimal per author
        )
        
    
    # out_features = trial.suggest_int("n_units_l{}".format(i), 4, 128)  # number of units will be between 4 and 128
    

    for epoch in range(epochs):
        print('Epoch {}/{}'.format(epoch, epochs - 1))
        print('-' * 10)
        
        model.train()
        
        # Iterate over data.
        for inputs, labels in dataloaders['train']:
            inputs = inputs.to(device)
            labels = labels.to(device)
            with torch.set_grad_enabled(True):
                # Get model outputs and calculate loss
                outputs = model(inputs)
                loss = loss_criteria(outputs, labels)

                _, preds = torch.max(outputs, 1)

                # backward + optimize only if in training phase
                # zero the parameter gradients
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        
        model.eval()
        running_corrects = 0.0
        for inputs, labels in dataloaders['val']:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # forward
                # track history if only in train
                with torch.set_grad_enabled(False):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = loss_criteria(outputs, labels)
                    _, preds = torch.max(outputs, 1)

                # statistics
                running_corrects += torch.sum(preds == labels.data)

        epoch_acc = running_corrects.double() / len(dataloaders['val'].dataset)

        # report back to Optuna how far it is (epoch-wise) into the trial and how well it is doing (accuracy)
        trial.report(epoch_acc, epoch)  

        # then, Optuna can decide if the trial should be pruned
        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return epoch_acc

In [8]:
# now we can run the experiment
sampler = optuna.samplers.TPESampler()
study = optuna.create_study(study_name="bert_optimizers", direction="maximize", sampler=sampler)
study.optimize(optune_optimizer_for_model, n_trials=100, timeout=600)

pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))
print("Best trial:")

trial = study.best_trial

print("  Value: ", trial.value)
print("  Params: ")

for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[I 2024-08-05 23:41:59,595] A new study created in memory with name: bert_optimizers


Epoch 0/9
----------
Epoch 1/9
----------
Epoch 2/9
----------
Epoch 3/9
----------
Epoch 4/9
----------
Epoch 5/9
----------
Epoch 6/9
----------
Epoch 7/9
----------
Epoch 8/9
----------
Epoch 9/9
----------


[I 2024-08-05 23:49:18,469] Trial 0 finished with value: 41.5025 and parameters: {'lr': 0.09806489514726056, 'optimizer': 'Adam'}. Best is trial 0 with value: 41.5025.


Epoch 0/9
----------
Epoch 1/9
----------
Epoch 2/9
----------


[W 2024-08-05 23:51:30,342] Trial 1 failed with parameters: {'lr': 0.0013221924920628844, 'optimizer': 'Adan', 'beta1': 0.02270487705723893, 'beta2': 0.07939698973319406, 'beta3': 0.08545991942704444} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/mlspeech/royweber/anaconda3/envs/roy_lang/lib/python3.8/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_582844/229243235.py", line 63, in optune_optimizer_for_model
    for inputs, labels in dataloaders['val']:
  File "/home/mlspeech/royweber/anaconda3/envs/roy_lang/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 634, in __next__
    data = self._next_data()
  File "/home/mlspeech/royweber/anaconda3/envs/roy_lang/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 678, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
  File "/home/mlspeech/royweb

KeyboardInterrupt: 