# Model 1: Tuning Fully Connected Neural Network

In [None]:
from pathlib import Path
import torch 

In [None]:
import sys; sys.path.insert(0, '../')

In [None]:
from models.fully_connected import FullyConnected, feature_columns, label_columns, get_loss_function, get_optimizer_function, train

## Read Datasets from .csv

In [None]:
from utils.file_io import read_angle_datasets

In [None]:
data_folder = Path("../../data/")

# Read 0.25 of the total data amount
train_data, test_data = read_angle_datasets(data_folder, 0.9, feature_columns, label_columns, sample_size=0.25)

In [None]:
input_shape, output_shape = train_data[0][0].shape[0], train_data[0][1].shape[0]
print(f"Data shape {input_shape} / {output_shape} of total {len(train_data) + len(test_data)} data rows!")

## Load parameter, functions and dataloader

In [None]:
from torch.utils.data import DataLoader
from utils.file_io import define_dataloader_from_angle_dataset

In [None]:
tune_path = Path("../../models/fully_connected/tune").absolute()

## Train the model with optuna hyperparameter tuning

In [None]:
from ray import tune, train as ray_train
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.optuna import OptunaSearch
from torch import nn
from typing import Dict
from utils.cluster import attach_ray, disconnect_ray

In [None]:
def parameter_train(parameter: Dict, train_epochs: int, train_data: DataLoader, test_data: DataLoader, model_input_shape: int,
                    model_output_shape: int, checkpoint_path: Path) -> None:

    # Determ device on the actual worker used for the trail
    device = ("cuda" if torch.cuda.is_available() else "cpu")

    if device != "cuda":
        print("No cuda device found!")
        
    run_id = ray_train.get_context().get_trial_id()
    run_checkpoint = checkpoint_path / run_id
    run_checkpoint.mkdir(parents=True)

    train_dataloader, validation_dataloader, _ = define_dataloader_from_angle_dataset(train_data, test_data, batch_size=parameter["batch_size"])

    model = FullyConnected(model_input_shape, parameter["hidden_layers"], model_output_shape, parameter["dropout"], parameter["hidden_activation"])

    optimizer = get_optimizer_function(model, parameter["lr"])
    loss_function = get_loss_function()

    _ = train(train_epochs, train_dataloader, validation_dataloader, model, loss_function, optimizer, run_checkpoint, device, report_interval=50, tune=True)

Define parameter ranges

In [None]:
num_samples = 500
num_epochs = 1000

In [None]:
parameter_space = {
    "lr": tune.loguniform(1e-5, 1e-3),
    "batch_size": tune.choice(list(range(40, 100, 10))),
    "hidden_layers": tune.choice([[8, 16], [8, 16, 32], [16, 32]]),
    "hidden_activation": tune.choice(["relu", "tanh"]),
    "dropout": tune.uniform(0.1, 0.5)
}

In [None]:
scheduler = ASHAScheduler(
    metric="loss",
    mode="min",
    max_t=num_epochs,
    grace_period=5
)

In [None]:
search_alg = OptunaSearch(
    metric="loss",
    mode="min"
) 

In [None]:
attach_ray(manager = True)

In [None]:
ray_resources_manager = tune.with_resources(
    trainable = lambda param: parameter_train(param, num_epochs, train_data, test_data, input_shape, output_shape, tune_path),
    # See: https://stackoverflow.com/questions/58967793/what-is-the-way-to-make-tune-run-parallel-trials-across-multiple-gpus
    resources = { "cpu": 3, "gpu": 0.25 }
)

tuner = tune.Tuner(
    ray_resources_manager,
    param_space = parameter_space,
    tune_config = tune.TuneConfig(
        scheduler = scheduler,
        search_alg = search_alg,
        num_samples = num_samples
    ),
    run_config = ray_train.RunConfig(
        name = "fully_connected"
    )
)

Start hyperparameter optimization

In [None]:
results = tuner.fit()

In [None]:
disconnect_ray()

In [None]:
# Save as csv file
result_grid = results.get_dataframe()
result_grid.to_csv(tune_path / "trail_grid_.csv")

In [None]:
result_grid.sort_values('loss').head(5)

In [None]:
best_result = result_grid.iloc[result_grid['loss'].idxmin()].to_dict()
trail_id = best_result['trial_id']

print(f"Trail ID from the best run: {trail_id}")

In [None]:
print(f"Best trail by loss value {best_result['loss']}", "\n------")
for key in best_result:
    if 'config' in key:
        print(f"Best trail: {key} value {best_result[key]}")

Do not forget to save all checkpoints on other devices of the trail if the training was carried out with a cluster.