# Model 1: Tuning Fully Connected Neural Network

In [None]:
from pathlib import Path
import torch 
import numpy as np

In [None]:
import sys; sys.path.insert(0, '/mnt/src')

In [None]:
from models.fully_connected import FullyConnected, train

## Read Datasets from .csv

In [None]:
from utils.file_io import read_angle_datasets

In [None]:
feature_columns = [
    'left_boom_base_yaw_joint', 
    'left_boom_base_pitch_joint',
    'left_boom_main_prismatic_joint',
    'left_boom_second_roll_joint',
    'left_boom_second_yaw_joint',
    'left_boom_top_pitch_joint'
]

label_features = [
    ('cable1_lowest_point', np.array([1], dtype=np.int64)),
    ('cable2_lowest_point', np.array([1], dtype=np.int64)),
    ('cable3_lowest_point', np.array([1], dtype=np.int64))
]

In [None]:
data_folder = Path("/mnt/data/").absolute()

train_data, test_data = read_angle_datasets(data_folder, 0.9, feature_columns, label_features)

In [None]:
input_shape, output_shape = train_data[0][0].shape[0], train_data[0][1].shape[0]
print(f"Data shape {input_shape} / {output_shape} of total {len(train_data) + len(test_data)} data rows!")

## Load parameter, functions and dataloader

In [None]:
from utils.file_io import define_dataloader_from_angle_dataset

In [None]:
tune_path = Path("/mnt/models/fully_connected/tune").absolute()
tune_path.mkdir(parents=True)

## Train the model with optuna hyperparameter tuning

In [None]:
from ray import tune, train as ray_train
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.optuna import OptunaSearch
from torch import nn
from typing import Dict
from utils.cluster import attach_ray, disconnect_ray
from utils.angle_dataset import AngleDataset
from utils.activation import get_activation
from utils.optimizer import get_optimizer_function
from utils.loss_functions import get_loss_function
import random; random.seed(0)

In [None]:
def parameter_train(parameter: Dict, train_epochs: int, train_data: AngleDataset, test_data: AngleDataset, model_input_shape: int, model_output_shape: int) -> None:

    # Determ device on the actual worker used for the trail
    device = ("cuda" if torch.cuda.is_available() else "cpu")

    if device != "cuda":
        print("No cuda device found!")
        
    train_dataloader, validation_dataloader, _ = define_dataloader_from_angle_dataset(train_data, test_data, batch_size=parameter["batch_size"])

    activation = get_activation(parameter["activation"])
    model = FullyConnected(model_input_shape, parameter["hidden_layers"], model_output_shape, parameter["dropout"], hidden_activation=activation)

    # The model needs to be on the device used for training before instance the optimizer
    model.to(device)

    optimizer = get_optimizer_function(parameter["optimizer"], model, parameter["lr"])
    loss_function = get_loss_function()

    _ = train(train_epochs, train_dataloader, validation_dataloader, model, loss_function, optimizer, None, device, report_interval=50, tune=True)

Define parameter ranges

In [None]:
num_samples = 2000
num_epochs = 750
grace_period = 10

In [None]:
# Generate list of hidden layer layouts
num_hidden_layers = [1, 2, 3, 5, 10]
hidde_layer_layouts = []

max_size = output_shape*5

# Random layouts
for _ in range(2):
    for i in num_hidden_layers:
        layout = []
        for l in range(i):
            layout.append(random.randrange(input_shape, max_size, 1))
        hidde_layer_layouts.append(layout)

# Up-down layouts
for i in num_hidden_layers:
    if i < 3: continue

    layout = [0] * i
    last_layer = input_shape

    for l in range(i):
        if l*2 >= i: break
        dim = random.randrange(last_layer, last_layer + input_shape, 1)
        if dim == last_layer: dim += 1
        layout[l] = layout[-(l+1)] = last_layer = dim

    hidde_layer_layouts.append(layout)

# Bottleneck layouts
for i in num_hidden_layers:
    if i < 3: continue

    layout = [0] * i
    last_layer = input_shape*2

    for l in range(i):
        if l*2 >= i: break
        dim = random.randrange(last_layer - output_shape, last_layer, 1)
        if dim == last_layer and not dim < output_shape: dim -= 1
        layout[l] = layout[-(l+1)] = last_layer = dim

    hidde_layer_layouts.append(layout)

In [None]:
parameter_space = {
    "optimizer": tune.choice(["adam", "sgd", "adamw", "adagrad"]),
    "lr": tune.loguniform(1e-6, 1e-2, base = 10),
    "activation": tune.choice(["relu", "gelu"]),
    "batch_size": tune.choice(list(range(64, 256, 16))),
    "dropout": tune.loguniform(0.05, 0.6, base = 2),
    "hidden_layers": tune.choice(hidde_layer_layouts)
}

In [None]:
scheduler = ASHAScheduler(
    metric = "loss",
    mode = "min",
    max_t = num_epochs,
    grace_period = grace_period
)

In [None]:
search_alg = OptunaSearch(
    metric = "loss",
    mode = "min"
) 

In [None]:
attach_ray(manager = True)

In [None]:
ray_resources_manager = tune.with_resources(
    trainable = lambda param: parameter_train(param, num_epochs, train_data, test_data, input_shape, output_shape),
    # See: https://stackoverflow.com/questions/58967793/what-is-the-way-to-make-tune-run-parallel-trials-across-multiple-gpus
    resources = { "cpu": 5, "gpu": 0.5 }
)

tuner = tune.Tuner(
    ray_resources_manager,
    param_space = parameter_space,
    tune_config = tune.TuneConfig(
        scheduler = scheduler,
        search_alg = search_alg,
        num_samples = num_samples
    ),
    run_config = ray_train.RunConfig(
        name = "fully_connected"
    )
)

Start hyperparameter optimization

In [None]:
results = tuner.fit()

In [None]:
disconnect_ray()

In [None]:
# Save as csv file
result_grid = results.get_dataframe()
result_grid.to_csv(tune_path / "trail_grid_.csv")

In [None]:
result_grid.sort_values('loss').head(5)

In [None]:
best_result = result_grid.iloc[result_grid['loss'].idxmin()].to_dict()
trail_id = best_result['trial_id']

print(f"Trail ID from the best run: {trail_id}")

In [None]:
print(f"Best trail by loss value {best_result['loss']}", "\n------")
for key in best_result:
    if 'config' in key:
        print(f"Best trail: {key} value {best_result[key]}")

Do not forget to save all checkpoints on other devices of the trail if the training was carried out with a cluster.