In [50]:
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
import torchvision
import random
from tqdm import tqdm
import seaborn as sns

In [51]:
import os
os.chdir("/home/peppe/01_Study/01_University/Semester/2/Intro_to_ML/Project/Code/models_methods")

In [63]:
from utility.bypass_bn import enable_running_stats, disable_running_stats
from utility.initialize import initialize
from utility.early_stopping import EarlyStopping

In [53]:
def training_step(model, data_loader, optimizer, loss_fn, device, SAM=False):
    samples = 0.0
    cumulative_loss = 0.0
    cumulative_accuracy = 0.0

    model.train()

    for batch_idx, (inputs, targets) in enumerate(data_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        # first forward-backward step
        if SAM:        
            enable_running_stats(model) # disable batch norm running stats
        outputs = model(inputs)

        loss = loss_fn(outputs, targets)
        loss.mean().backward()
        
        if SAM:
            optimizer.first_step(zero_grad=True)
            # second forward-backward step
            disable_running_stats(model)
            loss = loss_fn(model(inputs), targets)
            loss.mean().backward()
            optimizer.second_step(zero_grad=True)
        else:
            optimizer.step()
            optimizer.zero_grad()

        samples += inputs.shape[0]
        cumulative_loss += loss.item()
        _, predicted = outputs.max(dim=1)

        cumulative_accuracy += predicted.eq(targets).sum().item()

    return cumulative_loss / samples, cumulative_accuracy / samples * 100

In [None]:
def test_step(model, data_loader, loss_fn, device):
    samples = 0.
    cumulative_loss = 0.
    cumulative_accuracy = 0.

    model.eval()

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(data_loader):
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)

            loss = loss_fn(outputs, targets)

            samples += inputs.shape[0]
            cumulative_loss += loss.item() 
            _, predicted = outputs.max(1)

            cumulative_accuracy += predicted.eq(targets).sum().item()

    return cumulative_loss / samples, cumulative_accuracy / samples * 100

In [54]:
# tensorboard logging utilities
def log_values(writer, step, loss, accuracy, prefix):
    writer.add_scalar(f"{prefix}/loss", loss, step)
    writer.add_scalar(f"{prefix}/accuracy", accuracy, step)

In [62]:
def main(model,
         optimizer,
         loss_fn,
         data_loaders: dict,
         train_step: callable,
         test_step: callable,
         device,
         epochs=10,
         exp_name=None,
         exp_path="Code/experiments/",
         verbose=True,
         use_early_stopping=True,
         patience=5,
         delta=1e-3,
         scheduler=None):
    
    assert os.path.exists(f"{exp_path + exp_name}"), "Experiment path does not exist"
    # Create a logger for the experiment
    writer = SummaryWriter(log_dir=f"{exp_path + exp_name}")

    initialize(seed=42)

    if use_early_stopping:
        early_stopping = EarlyStopping(patience=patience, 
                                       delta=delta)
        
    model.to(device)
    
    # Computes evaluation results before training
    print("Before training:")
    train_loss, train_accuracy = test_step(model, data_loaders["train_loader"], loss_fn,device=device)
    val_loss, val_accuracy = test_step(model, data_loaders["val_loader"], loss_fn,device=device)
    test_loss, test_accuracy = test_step(model, data_loaders["test_loader"], loss_fn,device=device)
    
    # Log to TensorBoard
    log_values(writer, -1, train_loss, train_accuracy, "Train")
    log_values(writer, -1, val_loss, val_accuracy, "Validation")
    log_values(writer, -1, test_loss, test_accuracy, "Test")

    print(f"\tTraining loss {train_loss:.5f}, Training accuracy {train_accuracy:.2f}")
    print(f"\tValidation loss {val_loss:.5f}, Validation accuracy {val_accuracy:.2f}")
    print(f"\tTest loss {test_loss:.5f}, Test accuracy {test_accuracy:.2f}")
    print("-----------------------------------------------------")
    
    pbar = tqdm(range(epochs), desc="Training")
    for e in pbar:
        train_loss, train_accuracy = train_step(model, data_loaders["train_loader"], optimizer, loss_fn, device=device)
        if scheduler:
            scheduler.step()
        val_loss, val_accuracy = test_step(model, data_loaders["val_loader"], loss_fn,device=device)
        if verbose:
            print(f"Epoch: {e + 1}")
            print(f"\tTraining loss {train_loss:.5f}, Training accuracy {train_accuracy:.2f}")
            print(f"\tValidation loss {val_loss:.5f}, Validation accuracy {val_accuracy:.2f}")
            print("-----------------------------------------------------")
        
        # Logs to TensorBoard
        log_values(writer, e, train_loss, train_accuracy, "Train")
        log_values(writer, e, val_loss, val_accuracy, "Validation")

        pbar.set_postfix(train_loss=train_loss, train_accuracy=train_accuracy, val_loss=val_loss, val_accuracy=val_accuracy)

        if use_early_stopping:
            early_stopping(val_loss, model)
            if early_stopping.early_stop:
                print("Early stopping")
                break
    # Compute final evaluation results
    print("After training:")
    train_loss, train_accuracy = test_step(model, data_loaders["train_loader"], loss_fn,device=device)
    val_loss, val_accuracy = test_step(model, data_loaders["val_loader"], loss_fn,device=device)
    test_loss, test_accuracy = test_step(model, data_loaders["test_loader"], loss_fn,device=device)

    # Log to TensorBoard
    log_values(writer, epochs + 1, train_loss, train_accuracy, "Train")
    log_values(writer, epochs + 1, val_loss, val_accuracy, "Validation")
    log_values(writer, epochs + 1, test_loss, test_accuracy, "Test")

    print(f"\tTraining loss {train_loss:.5f}, Training accuracy {train_accuracy:.2f}")
    print(f"\tValidation loss {val_loss:.5f}, Validation accuracy {val_accuracy:.2f}")
    print(f"\tTest loss {test_loss:.5f}, Test accuracy {test_accuracy:.2f}")
    print("-----------------------------------------------------")

    # Closes the logger
    writer.close()

    # Let's return the net
    return model

In [None]:
# visualize with tensorboard
%reload_ext tensorboard
%tensorboard --logdir=runs3