In this notebook we have hand-adjusted the params of best performing ML model (Neural network implemenation `MLPRegressor`) selected from multiple models in notebook `02. Training ML models` to get even better performing one.


In [7]:
#loading dataset
import numpy as np

input_and_output = np.load("../final/dataset.npz")
inputs  = input_and_output["inputs"].astype(np.float64)
inputs  = input_and_output["inputs"].astype(np.float64)
outputs = input_and_output["outputs"].astype(np.float64)
dataset_size = inputs.shape[0]

# transforming time profiles to its orders of magnitude

LOWER_LIMIT = -9

def output_transform(outputs: np.array) -> np.array:
    x = np.copy(outputs)
    zeros_in_output = x <= 0
    x[zeros_in_output] = 1
    y = np.log10(x)
    y[zeros_in_output] = LOWER_LIMIT
    y[y < LOWER_LIMIT] = LOWER_LIMIT
    return y
    
def output_untransform(transformed_outputs: np.array) -> np.array:
    lower_limits = transformed_outputs <= LOWER_LIMIT
    z = 10 ** transformed_outputs
    z[lower_limits] = 0
    return z

outputs_order_of_magnitude = output_transform(outputs)


# dropping treatment column in input

def drop_treatment(input_data: np.ndarray) -> np.ndarray:
    """Drops treatment data from the dataset"""
    if input_data.shape[1] == 11:
        return input_data[:, 1:]

    return input_data

input_without_treatment = drop_treatment(inputs)

#splitting data into train, test, validate datasets 
train_size = int(dataset_size * 0.7)
test_size = int(dataset_size * 0.15)

X_train = input_without_treatment[:train_size, :]
Y_train = outputs_order_of_magnitude[:train_size, :]
print(f"train sizes: {X_train.shape}, {Y_train.shape}")
X_test = input_without_treatment[train_size:(train_size + test_size), :]
Y_test = outputs_order_of_magnitude[train_size:(train_size + test_size), :]
print(f"test sizes: {X_test.shape}, {Y_test.shape}")


# scaling inputs

import pickle
from pathlib import Path

LOGNORMAL_PARAMETERS = (1, 2)

class CustomScaler:
    def __init__(self):
        super().__init__()
        self.scaler = MinMaxScaler()
        self.plot_loval = [0.0] * len(LOGNORMAL_PARAMETERS)
        self.plot_hival = [1.0] * len(LOGNORMAL_PARAMETERS)

    def transform(self, x: np.ndarray, copy=None) -> np.ndarray:
        res = self.scaler.transform(x)
        for i, parameter_index in enumerate(LOGNORMAL_PARAMETERS):
            res[:, parameter_index] = (x[:, parameter_index] - self.plot_loval[i]) / (self.plot_hival[i] - self.plot_loval[i])

        return res

    def fit(self, x, copy=None):
        self.scaler.fit(x)
        for i, parameter_index in enumerate(LOGNORMAL_PARAMETERS):
            column_values = x[:, parameter_index]

            quantile_1, quantile_3 = np.quantile(column_values, [0.25, 0.75], axis=0)
            iqr = quantile_3 - quantile_1

            loval = quantile_1 - 1.5 * iqr
            hival = quantile_3 + 1.5 * iqr

            wiskhi = np.compress(column_values <= hival, column_values)
            wisklo = np.compress(column_values >= loval, column_values)
            actual_hival = np.max(wiskhi)
            actual_loval = np.min(wisklo)

            self.plot_loval[i] = actual_loval
            self.plot_hival[i] = actual_hival

        return self

    def inverse_transform(self, x, copy=None):
        res = self.scaler.inverse_transform(x)
        for i, parameter_index in enumerate(LOGNORMAL_PARAMETERS):
            res[:, parameter_index] = x[:, parameter_index] * (self.plot_hival[i] - self.plot_loval[i]) + self.plot_loval[i]
        return res

with Path(f"../final/scaler.pickle").open("rb") as scaler_file:
    scaler = pickle.load(scaler_file)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# applying principal component analysis

from sklearn.decomposition import PCA
PCA_COMPONENTS=12
with Path(f"../final/pca{PCA_COMPONENTS}.pickle").open("rb") as opened_file:
    pca = pickle.load(opened_file)
Y_train_pca = pca.transform(Y_train)


import time
from sklearn.metrics import mean_squared_error
import optuna

# Loading best performing optuna study 
study = optuna.load_study(study_name=f"MLPRegressor_constant_600", storage='sqlite:///../final/optuna.db')
model_params = study.best_trial.user_attrs["model_params"]
print(model_params)

train sizes: (700000, 10), (700000, 200)
test sizes: (150000, 10), (150000, 200)
{'alpha': 0.0021976121677802214, 'learning_rate_init': 0.0025086281095413575, 'power_t': 0.5, 'hidden_layer_sizes': [800, 250, 20], 'tol': 5e-06, 'n_iter_no_change': 10, 'random_state': 42, 'warm_start': False, 'max_iter': 10000, 'batch_size': 2000, 'learning_rate': 'constant'}


In [None]:
from sklearn.neural_network import MLPRegressor

# Training model without 10 minute training limit
training_start = time.time()
model = MLPRegressor(**model_params)
model.fit(X_train_scaled, Y_train_pca)
training_end = time.time()
Y_predict_pca = model.predict(X_test_scaled)
Y_predict = pca.inverse_transform(Y_predict_pca)
error = mean_squared_error(Y_test, Y_predict)

print(f"error: {error}, training_time: {training_end - training_start}")

In [None]:
training_start2 = time.time()
model2 = MLPRegressor(**model_params, tol=1e-6, batch_size=4000, warm_start=True, learning_rate_init=0.0005, max_iter=100)
model2.fit(X_train_scaled, Y_train_pca)
training_end2 = time.time()
Y_predict_pca = model.predict(X_test_scaled)
Y_predict = pca.inverse_transform(Y_predict_pca)
error2 = mean_squared_error(Y_test, Y_predict)
print(f"error: {error2}, training_time: {training_end2 - training_start2}")