In [2]:
import torch
from transformers import BertTokenizer
from torch.optim.lr_scheduler import ExponentialLR, StepLR, MultiStepLR, ReduceLROnPlateau, CosineAnnealingLR
from pys.functions import train_with_validation

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Note - Instead of using grid search for hyperparameter tuning of the learning rate schedulers, Bayesian Optimization can be implemented.

### ExponentialLR

In [None]:
def exponential_lr(model, train_loader, val_loader, optimizer, device, num_epochs):

    grid_params_ExponentialLR = {
        'gamma': [0.9]
    }

    best_f1 = float('-inf')
    best_f1_train = None
    best_f1_val = None
    best_acc_train = None
    best_acc_val = None
    best_loss_train = None
    best_loss_val = None
    best_scheduler = None
    best_scheduler_params = None

    for gamma in grid_params_ExponentialLR['gamma']:
        scheduler = ExponentialLR(optimizer, gamma=gamma)
        
        print(f"Using ExponentialLR with gamma={gamma}")
        f1_train, f1_val, acc_train, acc_val, loss_train, loss_val = train_with_validation(
            model, train_loader, val_loader, optimizer, device, num_epochs, scheduler
        )

        if f1_val[-1] > best_f1:
            best_f1 = f1_val[-1]
            best_f1_train = f1_train
            best_f1_val = f1_val
            best_acc_train = acc_train
            best_acc_val = acc_val
            best_loss_train = loss_train
            best_loss_val = loss_val
            best_scheduler = scheduler
            best_scheduler_params = {'gamma': gamma}

    return best_f1_train, best_f1_val, best_acc_train, best_acc_val, best_loss_train, best_loss_val, best_scheduler, best_scheduler_params

### StepLR

In [None]:
def step_lr(model, train_loader, val_loader, optimizer, device, num_epochs):
    grid_params_StepLR = {
        'step_size': [2],
        'gamma': [0.9]
    }

    best_f1 = float('-inf')
    best_f1_train = None
    best_f1_val = None
    best_acc_train = None
    best_acc_val = None
    best_loss_train = None
    best_loss_val = None
    best_scheduler = None
    best_scheduler_params = None

    for step_size in grid_params_StepLR['step_size']:
        for gamma in grid_params_StepLR['gamma']:
            scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma)
            
            print(f"Using StepLR with step_size={step_size} and gamma={gamma}")
            f1_train, f1_val, acc_train, acc_val, loss_train, loss_val = train_with_validation(
                model, train_loader, val_loader, optimizer, device, num_epochs, scheduler
            )

            if f1_val[-1] > best_f1:
                best_f1 = f1_val[-1]
                best_f1_train = f1_train
                best_f1_val = f1_val
                best_acc_train = acc_train
                best_acc_val = acc_val
                best_loss_train = loss_train
                best_loss_val = loss_val
                best_scheduler = scheduler
                best_scheduler_params = {'step_size': step_size, 'gamma': gamma}

    return best_f1_train, best_f1_val, best_acc_train, best_acc_val, best_loss_train, best_loss_val, best_scheduler, best_scheduler_params

### MultiStepLR

In [None]:
def multi_step_lr(model, train_loader, val_loader, optimizer, device, num_epochs):    
    grid_params_MultiStepLR = {
        'milestones': [[1, 5]],
        'gamma': [0.1]
    }

    best_f1 = float('-inf')
    best_f1_train = None
    best_f1_val = None
    best_acc_train = None
    best_acc_val = None
    best_loss_train = None
    best_loss_val = None
    best_scheduler = None
    best_scheduler_params = None

    for milestones in grid_params_MultiStepLR['milestones']:
        for gamma in grid_params_MultiStepLR['gamma']:
            scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=gamma)
            
            print(f"Using MultiStepLR with milestones={milestones} and gamma={gamma}")
            f1_train, f1_val, acc_train, acc_val, loss_train, loss_val = train_with_validation(
                model, train_loader, val_loader, optimizer, device, num_epochs, scheduler
            )

            if f1_val[-1] > best_f1:
                best_f1 = f1_val[-1]
                best_f1_train = f1_train
                best_f1_val = f1_val
                best_acc_train = acc_train
                best_acc_val = acc_val
                best_loss_train = loss_train
                best_loss_val = loss_val
                best_scheduler = scheduler
                best_scheduler_params = {'milestones': milestones, 'gamma': gamma}

    return best_f1_train, best_f1_val, best_acc_train, best_acc_val, best_loss_train, best_loss_val, best_scheduler, best_scheduler_params

### ReduceLROnPlateau

In [2]:
def reduce_lr_on_plateau(model, train_loader, val_loader, optimizer, device, num_epochs):
    grid_params_ReduceLROnPlateau = {
        'mode': ['max'],
        'factor': [0.1],
        'patience': [2],
    }

    best_f1 = float('-inf')
    best_f1_train = None
    best_f1_val = None
    best_acc_train = None
    best_acc_val = None
    best_loss_train = None
    best_loss_val = None
    best_scheduler = None
    best_scheduler_params = None

    for mode in grid_params_ReduceLROnPlateau['mode']:
        for factor in grid_params_ReduceLROnPlateau['factor']:
            for patience in grid_params_ReduceLROnPlateau['patience']:
                scheduler = ReduceLROnPlateau(optimizer, mode=mode, factor=factor, patience=patience)
                
                print(f"Using ReduceLROnPlateau with mode={mode}, factor={factor}, patience={patience}")
                f1_train, f1_val, acc_train, acc_val, loss_train, loss_val = train_with_validation(
                    model, train_loader, val_loader, optimizer, device, num_epochs, scheduler
                )

                if f1_val[-1] > best_f1:
                    best_f1 = f1_val[-1]
                    best_f1_train = f1_train
                    best_f1_val = f1_val
                    best_acc_train = acc_train
                    best_acc_val = acc_val
                    best_loss_train = loss_train
                    best_loss_val = loss_val
                    best_scheduler = scheduler
                    best_scheduler_params = {'mode': mode, 'factor': factor, 'patience': patience}

    return best_f1_train, best_f1_val, best_acc_train, best_acc_val, best_loss_train, best_loss_val, best_scheduler, best_scheduler_params


### CosineAnnealingLR

In [None]:
def cosine_annealing_lr(model, train_loader, val_loader, optimizer, device, num_epochs):
    grid_params_CosineAnnealingLR = {
        'T_max': [50],
        'eta_min': [1e-5]
    }

    best_f1 = float('-inf')
    best_f1_train = None
    best_f1_val = None
    best_acc_train = None
    best_acc_val = None
    best_loss_train = None
    best_loss_val = None
    best_scheduler = None
    best_scheduler_params = None

    for T_max in grid_params_CosineAnnealingLR['T_max']:
        for eta_min in grid_params_CosineAnnealingLR['eta_min']:
            scheduler = CosineAnnealingLR(optimizer, T_max=T_max, eta_min=eta_min)
            
            print(f"Using CosineAnnealingLR with T_max={T_max} and eta_min={eta_min}")
            f1_train, f1_val, acc_train, acc_val, loss_train, loss_val = train_with_validation(
                model, train_loader, val_loader, optimizer, device, num_epochs, scheduler
            )

            if f1_val[-1] > best_f1:
                best_f1 = f1_val[-1]
                best_f1_train = f1_train
                best_f1_val = f1_val
                best_acc_train = acc_train
                best_acc_val = acc_val
                best_loss_train = loss_train
                best_loss_val = loss_val
                best_scheduler = scheduler
                best_scheduler_params = {'T_max': T_max, 'eta_min': eta_min}

    return best_f1_train, best_f1_val, best_acc_train, best_acc_val, best_loss_train, best_loss_val, best_scheduler, best_scheduler_params


________________________

In [None]:
def decay_lr_grid(model, train_loader, val_loader, optimizer, device, num_epochs):

    best_f1 = float('-inf')
    best_f1_train = None
    best_f1_val = None
    best_acc_train = None
    best_acc_val = None
    best_loss_train = None
    best_loss_val = None
    best_scheduler = None
    best_scheduler_params = None


    f1_train, f1_val, acc_train, acc_val, loss_train, loss_val, scheduler, scheduler_params = exponential_lr(
        model, train_loader, val_loader, optimizer, device, num_epochs
    )
    if f1_val[-1] > best_f1:
        best_f1 = f1_val[-1]
        best_f1_train = f1_train
        best_f1_val = f1_val
        best_acc_train = acc_train
        best_acc_val = acc_val
        best_loss_train = loss_train
        best_loss_val = loss_val
        best_scheduler = scheduler
        best_scheduler_params = scheduler_params

    f1_train, f1_val, acc_train, acc_val, loss_train, loss_val, scheduler, scheduler_params = step_lr(
        model, train_loader, val_loader, optimizer, device, num_epochs
    )
    if f1_val[-1] > best_f1:
        best_f1 = f1_val[-1]
        best_f1_train = f1_train
        best_f1_val = f1_val
        best_acc_train = acc_train
        best_acc_val = acc_val
        best_loss_train = loss_train
        best_loss_val = loss_val
        best_scheduler = scheduler
        best_scheduler_params = scheduler_params

    f1_train, f1_val, acc_train, acc_val, loss_train, loss_val, scheduler, scheduler_params = multi_step_lr(
        model, train_loader, val_loader, optimizer, device, num_epochs
    )
    if f1_val[-1] > best_f1:
        best_f1 = f1_val[-1]
        best_f1_train = f1_train
        best_f1_val = f1_val
        best_acc_train = acc_train
        best_acc_val = acc_val
        best_loss_train = loss_train
        best_loss_val = loss_val
        best_scheduler = scheduler
        best_scheduler_params = scheduler_params

    f1_train, f1_val, acc_train, acc_val, loss_train, loss_val, scheduler, scheduler_params = reduce_lr_on_plateau(
        model, train_loader, val_loader, optimizer, device, num_epochs
    )
    if f1_val[-1] > best_f1:
        best_f1 = f1_val[-1]
        best_f1_train = f1_train
        best_f1_val = f1_val
        best_acc_train = acc_train
        best_acc_val = acc_val
        best_loss_train = loss_train
        best_loss_val = loss_val
        best_scheduler = scheduler
        best_scheduler_params = scheduler_params

    f1_train, f1_val, acc_train, acc_val, loss_train, loss_val, scheduler, scheduler_params = cosine_annealing_lr(
        model, train_loader, val_loader, optimizer, device, num_epochs
    )
    if f1_val[-1] > best_f1:
        best_f1 = f1_val[-1]
        best_f1_train = f1_train
        best_f1_val = f1_val
        best_acc_train = acc_train
        best_acc_val = acc_val
        best_loss_train = loss_train
        best_loss_val = loss_val
        best_scheduler = scheduler
        best_scheduler_params = scheduler_params

    return best_f1_train, best_f1_val, best_acc_train, best_acc_val, best_loss_train, best_loss_val, best_scheduler, best_scheduler_params