In [1]:
import os

import numpy as np
import pandas as pd
import torch
from optuna.integration import SkoptSampler
from ray import tune
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.suggest.optuna import OptunaSearch

# Configs

In [2]:
config6 = {
    "lr": tune.loguniform(1e-5, 1e-1),
    "hidden_layers": tune.quniform(2, 10, 2),
    "hidden_layer_width": tune.quniform(30, 120, 15),
    "dropout": tune.uniform(0.0, 0.4),
    "l2": tune.loguniform(1e-6, 1e-1),
}

config5 = {
    "lr": tune.loguniform(1e-5, 1e-1),
    "hidden_layers": tune.quniform(2, 10, 2),
    "hidden_layer_width": tune.quniform(30, 120, 15),
    "dropout": tune.uniform(0.0, 0.4),
    "l2": 1e-3,
}

config4 = {
    "lr": tune.loguniform(1e-5, 1e-1),
    "hidden_layers": tune.quniform(2, 10, 2),
    "hidden_layer_width": tune.quniform(30, 120, 15),
    "dropout": tune.uniform(0.0, 0.4),
    "l2": 1e-3,
}

config3 = {
    "lr": tune.loguniform(1e-5, 1e-1),
    "hidden_layers": tune.quniform(2, 10, 2),
    "hidden_layer_width": tune.quniform(30, 120, 15),
    "dropout": 0.1,
    "l2": 1e-3,
}

config2 = {
    "lr": tune.loguniform(1e-5, 1e-1),
    "hidden_layers": tune.quniform(2, 10, 2),
    "hidden_layer_width": 75,
    "dropout": 0.1,
    "l2": 1e-3,
}

config1 = {
    "lr": tune.loguniform(1e-5, 1e-1),
    "hidden_layers": 6,
    "hidden_layer_width": 70,
    "dropout": 0.1,
    "l2": 1e-3,
}

# Constants

In [3]:
random_seed = 12345
ins = ['CHK', 'PWH', 'PDC', 'TWH', 'FGAS', 'FOIL']
outs = ['QTOT']

logdir_6 = "run_results/run6"
logdir_5 = "run_results/run5"
logdir_4 = "run_results/run4"
logdir_3 = "run_results/run3"
logdir_2 = "run_results/run2"
logdir_1 = "run_results/run1"

metric = "mean_square_error"
mode = "min"

# Data loaders


In [4]:
path = "dataset/training_set.csv"
train_set = pd.read_csv(path, index_col=0)
path = "dataset/validation_set.csv"
val_set = pd.read_csv(path, index_col=0)
path = "dataset/test_set.csv"
test_set = pd.read_csv(path, index_col=0)


def prepare_data(
        input_cols: [],
        output_cols: [],
        train_batch_size: int
):
    """
    Prepares the dataset to be used for HPO
    Converts to torch tensors and dataset loaders
    :param input_cols: list of strings
    :param output_cols: list of strings
    :param train_batch_size: Batch size
    :return:
    :return: train_loader, x_val, y_val, val_loader, x_test, y_test
    """
    # INPUT_COLS = ['CHK', 'PWH', 'PDC', 'TWH', 'FGAS', 'FOIL']
    # OUTPUT_COLS = ['QTOT']

    # Get input and output tensors and convert them to torch tensors
    x_train = torch.from_numpy(train_set[input_cols].values).to(torch.float)
    y_train = torch.from_numpy(train_set[output_cols].values).to(torch.float)

    x_val = torch.from_numpy(val_set[input_cols].values).to(torch.float)
    y_val = torch.from_numpy(val_set[output_cols].values).to(torch.float)

    # Create dataset loaders
    # Here we specify the batch size and if the dataset should be shuffled
    train_dataset = torch.utils.data.TensorDataset(x_train, y_train)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)

    val_dataset = torch.utils.data.TensorDataset(x_val, y_val)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=len(val_set), shuffle=False)

    # Get input and output as torch tensors
    x_test = torch.from_numpy(test_set[input_cols].values).to(torch.float)
    y_test = torch.from_numpy(test_set[output_cols].values).to(torch.float)

    return train_loader, x_val, y_val, val_loader, x_test, y_test

# Model

In [5]:
class Net(torch.nn.Module):
    def __init__(
            self,
            inputs: int,
            hidden_layers: int,
            hidden_layer_width: int,
            outputs: int,
            dropout_value: float
    ):
        """
        :param dropout_value: Dropout value to use. 0.0 If no dropout is desired
        :param inputs: Number of inputs
        :param hidden_layers: Number of hidden layers
        :param hidden_layer_width: Size of hidden layer
        :param outputs: Number of outputs
        """
        super().__init__()

        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'

        layers = [inputs] + [hidden_layer_width] * hidden_layers + [outputs]

        assert len(layers) >= 2, "At least two layers are required (incl. input and output layer)"
        self.layers = layers

        # Fully connected linear layers
        linear_layers = []

        for i in range(len(self.layers) - 1):
            n_in = self.layers[i]
            n_out = self.layers[i + 1]
            layer = torch.nn.Linear(n_in, n_out)

            # Initialize weights and biases
            a = 1 if i == 0 else 2
            layer.weight.data = torch.randn((n_out, n_in)) * np.sqrt(a / n_in)
            layer.bias.data = torch.zeros(n_out)

            # Add to list
            linear_layers.append(layer)

            # Add possible dropout
            if dropout_value:
                linear_layers.append(torch.nn.Dropout(dropout_value))

        # Modules/layers must be registered to enable saving of notebooks
        self.linear_layers = torch.nn.ModuleList(linear_layers)

        # Non-linearity (e.g. ReLU, ELU, or SELU)
        self.act = torch.nn.ReLU(inplace=False)

    def forward(self, input):
        """
        Forward pass to evaluate network for input values
        :param input: tensor assumed to be of size (batch_size, n_inputs)
        :return: output tensor
        """
        x = input
        for l in self.linear_layers[:-1]:
            x = l(x)
            x = self.act(x)

        output_layer = self.linear_layers[-1]
        return output_layer(x)

    def get_num_parameters(self):
        return sum(p.numel() for p in self.parameters())

    def save(self, path: str):
        """
        Save notebooks state
        :param path: Path to save notebooks state
        :return: None
        """
        torch.save({
            'model_state_dict': self.state_dict(),
        }, path)

    def load(self, path: str):
        """
        Load notebooks state from file
        :param path: Path to saved notebooks state
        :return: None
        """
        checkpoint = torch.load(path, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
        self.load_state_dict(checkpoint['model_state_dict'])

# Training function

In [6]:
def train(config, checkpoint_dir=None):
    """
    :param config: hyperparameter configuration
    :param checkpoint_dir: local checkpoint dir. Leave blank to use ~/ray_results
    :return:
    """
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)

    net = Net(
        len(ins),
        int(config["hidden_layers"]),
        int(config["hidden_layer_width"]),
        len(outs),
        dropout_value=config["dropout"]
    )

    net = net.to(net.device)

    # Define loss and optimizer
    criterion = torch.nn.MSELoss(reduction='mean')
    optimizer = torch.optim.Adam(net.parameters(), lr=config["lr"])

    # The `checkpoint_dir` parameter gets passed by Ray Tune when a checkpoint
    # should be restored.
    if checkpoint_dir:
        checkpoint = os.path.join(checkpoint_dir, "checkpoint")
        model_state, optimizer_state = torch.load(checkpoint)
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    # Import training, validation and test data
    train_loader, x_valid, y_valid, val_loader, x_test, y_test = prepare_data(
        input_cols=ins,
        output_cols=outs,
        train_batch_size=64
    )

    # Train Network
    for epoch in range(100):
        # specify that we are in training mode
        net.train()

        for inputs, labels in train_loader:

            inputs, labels = inputs.to(net.device), labels.to(net.device)
            # Zero the parameter gradients (from last iteration)
            optimizer.zero_grad()

            # Forward propagation
            outputs = net(inputs)

            # Compute cost function
            batch_mse = criterion(outputs, labels)

            reg_loss = 0
            for param in net.parameters():
                reg_loss += param.pow(2).sum()

            cost = batch_mse + config["l2"] * reg_loss

            # Backward propagation to compute gradient
            cost.backward()

            # Update parameters using gradient
            optimizer.step()

        # Specify that we are in evaluation mode
        net.eval()

        # Evaluate model on validation data
        mse_val = 0
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(net.device), labels.to(net.device)
            mse_val += torch.sum(torch.pow(labels - net(inputs), 2)).item()
        mse_val /= len(val_loader.dataset)

        # Here we save a checkpoint. It is automatically registered with
        # Ray Tune and will potentially be passed as the `checkpoint_dir`
        # parameter in future iterations.
        with tune.checkpoint_dir(step=epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save(
                (net.state_dict(), optimizer.state_dict()), path)
        tune.report(mean_square_error=mse_val)

# Optimizer

In [7]:
def optimize(config: {}, iterations: int, experiment_name: str, logdir: str):
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)

    sampler = SkoptSampler(
        skopt_kwargs={
            "base_estimator": "GP",
            "n_initial_points": 5,
            "acq_func": "EI",
            "acq_func_kwargs": {"xi": 0.05}
        }
    )

    algo = OptunaSearch(
        sampler=sampler,
        metric=metric,
        mode=mode,
    )
    algo = ConcurrencyLimiter(algo, max_concurrent=1)

    result = tune.run(
        tune.with_parameters(train),
        name=experiment_name,
        config=config,
        metric=metric,
        mode=mode,
        search_alg=algo,
        num_samples=iterations,
        verbose=1,
        checkpoint_score_attr="min-mean_square_error",
        keep_checkpoints_num=2,
        local_dir=logdir,
        resources_per_trial={"cpu": 1, "gpu": 0}
    )

# Experiment

In [8]:
def experiment():
    for i in range(0, 8):
        print("Starting New Experiment")
        experiment_name = "xp_" + str(i).rjust(3, "0")
        optimize(
            config=config3,
            iterations=100,
            experiment_name=experiment_name,
            logdir=logdir_3
        )

In [9]:
experiment()

2021-11-20 17:03:32,900	INFO tune.py:630 -- Total run time: 2919.89 seconds (2919.73 seconds for the tuning loop).
