In this notebook we have hand-adjusted the params of best performing ML model (Neural network implemenation `MLPRegressor`) selected from multiple models in notebook `02. Training ML models` to get even better performing one.


In [1]:
#loading dataset
import numpy as np

input_and_output = np.load("../final/dataset.npz")
inputs  = input_and_output["inputs"].astype(np.float64)
inputs  = input_and_output["inputs"].astype(np.float64)
outputs = input_and_output["outputs"].astype(np.float64)
dataset_size = inputs.shape[0]

print("loaded dataset")

# transforming time profiles to its orders of magnitude

LOWER_LIMIT = -9

def output_transform(outputs: np.array) -> np.array:
    x = np.copy(outputs)
    zeros_in_output = x <= 0
    x[zeros_in_output] = 1
    y = np.log10(x)
    y[zeros_in_output] = LOWER_LIMIT
    y[y < LOWER_LIMIT] = LOWER_LIMIT
    return y
    
def output_untransform(transformed_outputs: np.array) -> np.array:
    lower_limits = transformed_outputs <= LOWER_LIMIT
    z = 10 ** transformed_outputs
    z[lower_limits] = 0
    return z

outputs_order_of_magnitude = output_transform(outputs)
print("transformed to orders of magnitude")

# dropping treatment column in input

def drop_treatment(input_data: np.ndarray) -> np.ndarray:
    """Drops treatment data from the dataset"""
    if input_data.shape[1] == 11:
        return input_data[:, 1:]

    return input_data

input_without_treatment = drop_treatment(inputs)

print("dropped treatment column")

#splitting data into train, test, validate datasets 
train_size = int(dataset_size * 0.7)
test_size = int(dataset_size * 0.15)

X_train = input_without_treatment[:train_size, :]
Y_train = outputs_order_of_magnitude[:train_size, :]
print(f"train sizes: {X_train.shape}, {Y_train.shape}")
X_test = input_without_treatment[train_size:(train_size + test_size), :]
Y_test = outputs_order_of_magnitude[train_size:(train_size + test_size), :]
print(f"test sizes: {X_test.shape}, {Y_test.shape}")

print("train test split")

# scaling inputs

import pickle
from pathlib import Path

LOGNORMAL_PARAMETERS = (1, 2)

class CustomScaler:
    def __init__(self):
        super().__init__()
        self.scaler = MinMaxScaler()
        self.plot_loval = [0.0] * len(LOGNORMAL_PARAMETERS)
        self.plot_hival = [1.0] * len(LOGNORMAL_PARAMETERS)

    def transform(self, x: np.ndarray, copy=None) -> np.ndarray:
        res = self.scaler.transform(x)
        for i, parameter_index in enumerate(LOGNORMAL_PARAMETERS):
            res[:, parameter_index] = (x[:, parameter_index] - self.plot_loval[i]) / (self.plot_hival[i] - self.plot_loval[i])

        return res

    def fit(self, x, copy=None):
        self.scaler.fit(x)
        for i, parameter_index in enumerate(LOGNORMAL_PARAMETERS):
            column_values = x[:, parameter_index]

            quantile_1, quantile_3 = np.quantile(column_values, [0.25, 0.75], axis=0)
            iqr = quantile_3 - quantile_1

            loval = quantile_1 - 1.5 * iqr
            hival = quantile_3 + 1.5 * iqr

            wiskhi = np.compress(column_values <= hival, column_values)
            wisklo = np.compress(column_values >= loval, column_values)
            actual_hival = np.max(wiskhi)
            actual_loval = np.min(wisklo)

            self.plot_loval[i] = actual_loval
            self.plot_hival[i] = actual_hival

        return self

    def inverse_transform(self, x, copy=None):
        res = self.scaler.inverse_transform(x)
        for i, parameter_index in enumerate(LOGNORMAL_PARAMETERS):
            res[:, parameter_index] = x[:, parameter_index] * (self.plot_hival[i] - self.plot_loval[i]) + self.plot_loval[i]
        return res

with Path(f"../final/scaler.pickle").open("rb") as scaler_file:
    scaler = pickle.load(scaler_file)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("scaled")

# applying principal component analysis

from sklearn.decomposition import PCA
PCA_COMPONENTS=12
with Path(f"../final/pca{PCA_COMPONENTS}.pickle").open("rb") as opened_file:
    pca = pickle.load(opened_file)
Y_train_pca = pca.transform(Y_train)

print("applied pca")

import time
from sklearn.metrics import mean_squared_error
import optuna

# Loading best performing optuna study 
study = optuna.load_study(study_name=f"MLPRegressor_constant_600", storage='sqlite:///../final/optuna.db')
model_params = study.best_trial.user_attrs["model_params"]
print(model_params)

loaded dataset
transformed to orders of magnitude
dropped treatment column
train sizes: (700000, 10), (700000, 200)
test sizes: (150000, 10), (150000, 200)
train test split
scaled
applied pca
{'alpha': 0.0021976121677802214, 'learning_rate_init': 0.0025086281095413575, 'power_t': 0.5, 'hidden_layer_sizes': [800, 250, 20], 'tol': 5e-06, 'n_iter_no_change': 10, 'random_state': 42, 'warm_start': False, 'max_iter': 10000, 'batch_size': 2000, 'learning_rate': 'constant'}


In [3]:
from sklearn.neural_network import MLPRegressor

training_start = time.time()
model = MLPRegressor(**{**model_params, "batch_size": 20000, "learning_rate_init": 0.01, "alpha": 0.00001})
for i in range(4):
    for j in range(10):
        model.partial_fit(X_train_scaled, Y_train_pca)
    print(f"fitted {i} {time.time() - training_start:.1f}")
    error_test  = mean_squared_error(Y_test,  pca.inverse_transform(model.predict(X_test_scaled)))
    error_train = mean_squared_error(Y_train, pca.inverse_transform(model.predict(X_train_scaled)))
    print(f"error test: {error_test}, error train: {error_train} training_time: {time.time() - training_start}")

model2 = MLPRegressor(**{**model_params, "batch_size": 20000, "learning_rate_init": 0.003, "alpha": 0.00003})
for variable_name in ("coefs_", "t_", "n_outputs_", "n_layers_", "out_activation_", "intercepts_", "n_iter_", "loss_curve_", "best_loss_", "_no_improvement_count"):
    setattr(model2, variable_name, getattr(model, variable_name))

for i in range(4):
    for j in range(10):
        model2.partial_fit(X_train_scaled, Y_train_pca)
    error_test  = mean_squared_error(Y_test,  pca.inverse_transform(model2.predict(X_test_scaled)))
    error_train = mean_squared_error(Y_train, pca.inverse_transform(model2.predict(X_train_scaled)))
    print(f"error test: {error_test}, error train: {error_train} training_time: {time.time() - training_start}")    

model3 = MLPRegressor(**{**model_params, "batch_size": 20000, "learning_rate_init": 0.001, "alpha": 0.0001})
for variable_name in ("coefs_", "t_", "n_outputs_", "n_layers_", "out_activation_", "intercepts_", "n_iter_", "loss_curve_", "best_loss_", "_no_improvement_count"):
    setattr(model3, variable_name, getattr(model2, variable_name))

for i in range(4):
    for j in range(10):
        model3.partial_fit(X_train_scaled, Y_train_pca)
    error_test  = mean_squared_error(Y_test,  pca.inverse_transform(model3.predict(X_test_scaled)))
    error_train = mean_squared_error(Y_train, pca.inverse_transform(model3.predict(X_train_scaled)))
    print(f"error test: {error_test}, error train: {error_train} training_time: {time.time() - training_start}")    
    
training_end = time.time()

fitted 0 175.1
error test: 0.002599849381178007, error train: 0.0026001043892114263 training_time: 195.80107188224792
fitted 1 371.6
error test: 0.00196335076492695, error train: 0.0019461638820849646 training_time: 388.2139699459076
fitted 2 565.7
error test: 0.0012887083779221224, error train: 0.0012774304893470778 training_time: 581.5762078762054
fitted 3 757.8
error test: 0.005476050076365092, error train: 0.005447488901730414 training_time: 772.6894669532776
error test: 0.00042430121972095815, error train: 0.00041414605972777595 training_time: 965.0996699333191
error test: 0.0002658747142208517, error train: 0.00026015462826451264 training_time: 1156.9848630428314
error test: 0.00032315341588961085, error train: 0.00031774556200397957 training_time: 1346.0278718471527
error test: 0.00017830213237130787, error train: 0.00017415880066484182 training_time: 1540.6659517288208
error test: 0.00015665503752113576, error train: 0.00015141552560095424 training_time: 1729.6456530094147
erro

In [4]:
model4 = MLPRegressor(**{**model_params, "batch_size": 20000, "learning_rate_init": 0.001, "alpha": 0.0001})
for variable_name in ("coefs_", "t_", "n_outputs_", "n_layers_", "out_activation_", "intercepts_", "n_iter_", "loss_curve_", "best_loss_", "_no_improvement_count"):
    setattr(model4, variable_name, getattr(model3, variable_name))

for i in range(8):
    for j in range(10):
        model4.partial_fit(X_train_scaled, Y_train_pca)
    error_test  = mean_squared_error(Y_test,  pca.inverse_transform(model4.predict(X_test_scaled)))
    error_train = mean_squared_error(Y_train, pca.inverse_transform(model4.predict(X_train_scaled)))
    print(f"error test: {error_test}, error train: {error_train} training_time: {time.time() - training_start}")
    
    
model5 = MLPRegressor(**{**model_params, "batch_size": 20000, "learning_rate_init": 0.0003, "alpha": 0.0001})
for variable_name in ("coefs_", "t_", "n_outputs_", "n_layers_", "out_activation_", "intercepts_", "n_iter_", "loss_curve_", "best_loss_", "_no_improvement_count"):
    setattr(model5, variable_name, getattr(model4, variable_name))

for i in range(8):
    for j in range(10):
        model5.partial_fit(X_train_scaled, Y_train_pca)
    error_test  = mean_squared_error(Y_test,  pca.inverse_transform(model5.predict(X_test_scaled)))
    error_train = mean_squared_error(Y_train, pca.inverse_transform(model5.predict(X_train_scaled)))
    print(f"error test: {error_test}, error train: {error_train} training_time: {time.time() - training_start}")
    

error test: 0.00011486898482143657, error train: 0.00011041371032693849 training_time: 2531.1502380371094
error test: 0.00010632929972035227, error train: 0.00010249740427607707 training_time: 2718.584650039673
error test: 9.873049132474293e-05, error train: 9.486179975208429e-05 training_time: 2907.3779768943787
error test: 9.978048183507067e-05, error train: 9.593812279995784e-05 training_time: 3096.79953289032
error test: 0.00011802524270675386, error train: 0.00011403732743835556 training_time: 3284.9790318012238
error test: 0.0001332832874652715, error train: 0.00013007423276541492 training_time: 3475.213138103485
error test: 9.751010719360518e-05, error train: 9.328827910633931e-05 training_time: 3663.286122083664
error test: 0.00010846891993241584, error train: 0.0001041237291668056 training_time: 3853.488116979599
error test: 6.945236616957586e-05, error train: 6.55295224227693e-05 training_time: 4040.94211101532
error test: 6.798756467593612e-05, error train: 6.396874392459019

In [5]:
model6 = MLPRegressor(**{**model_params, "batch_size": 50000, "learning_rate_init": 0.0001, "alpha": 0.0002})
for variable_name in ("coefs_", "t_", "n_outputs_", "n_layers_", "out_activation_", "intercepts_", "n_iter_", "loss_curve_", "best_loss_", "_no_improvement_count"):
    setattr(model6, variable_name, getattr(model5, variable_name))

for i in range(8):
    for j in range(5):
        model6.partial_fit(X_train_scaled, Y_train_pca)
    error_test  = mean_squared_error(Y_test,  pca.inverse_transform(model6.predict(X_test_scaled)))
    error_train = mean_squared_error(Y_train, pca.inverse_transform(model6.predict(X_train_scaled)))
    print(f"error test: {error_test}, error train: {error_train} training_time: {time.time() - training_start}")

error test: 5.609778738136186e-05, error train: 5.23235884602929e-05 training_time: 5464.114265918732
error test: 5.5997045707412863e-05, error train: 5.221308705723219e-05 training_time: 5587.8838810920715
error test: 5.5912056454307255e-05, error train: 5.212551354245663e-05 training_time: 5713.504786014557
error test: 5.5834242390853904e-05, error train: 5.2045404459587845e-05 training_time: 5838.271425962448
error test: 5.574151744663473e-05, error train: 5.1945171524209724e-05 training_time: 5964.7364139556885
error test: 5.56358640145835e-05, error train: 5.1838309050848234e-05 training_time: 6090.327034950256
error test: 5.5523868366087516e-05, error train: 5.172095628331541e-05 training_time: 6213.642988920212
error test: 5.5413936416687965e-05, error train: 5.161083918506648e-05 training_time: 6336.716297149658


In [6]:
model7 = MLPRegressor(**{**model_params, "batch_size": 50000, "learning_rate_init": 0.00003, "alpha": 0.0004})
for variable_name in ("coefs_", "t_", "n_outputs_", "n_layers_", "out_activation_", "intercepts_", "n_iter_", "loss_curve_", "best_loss_", "_no_improvement_count"):
    setattr(model7, variable_name, getattr(model6, variable_name))

for i in range(4):
    for j in range(5):
        model7.partial_fit(X_train_scaled, Y_train_pca)
    error_test  = mean_squared_error(Y_test,  pca.inverse_transform(model7.predict(X_test_scaled)))
    error_train = mean_squared_error(Y_train, pca.inverse_transform(model7.predict(X_train_scaled)))
    print(f"error test: {error_test}, error train: {error_train} training_time: {time.time() - training_start}")

error test: 5.535181772134489e-05, error train: 5.155705752576035e-05 training_time: 6456.97175693512
error test: 5.527926948704048e-05, error train: 5.148240955166337e-05 training_time: 6577.05282998085
error test: 5.521603211735898e-05, error train: 5.141407538726785e-05 training_time: 6697.118595838547
error test: 5.514383523121379e-05, error train: 5.134020572192409e-05 training_time: 6815.493922948837


In [7]:
model8 = MLPRegressor(**{**model_params, "batch_size": 100000, "learning_rate_init": 0.00001, "alpha": 0.002})
for variable_name in ("coefs_", "t_", "n_outputs_", "n_layers_", "out_activation_", "intercepts_", "n_iter_", "loss_curve_", "best_loss_", "_no_improvement_count"):
    setattr(model8, variable_name, getattr(model7, variable_name))

training_start = time.time()
for i in range(4):
    for j in range(5):
        model8.partial_fit(X_train_scaled, Y_train_pca)
    error_test  = mean_squared_error(Y_test,  pca.inverse_transform(model8.predict(X_test_scaled)))
    error_train = mean_squared_error(Y_train, pca.inverse_transform(model8.predict(X_train_scaled)))
    print(f"error test: {error_test}, error train: {error_train} training_time: {time.time() - training_start}")

error test: 5.512047045051417e-05, error train: 5.1317610427962704e-05 training_time: 141.26645922660828
error test: 5.514184664254254e-05, error train: 5.133288972155514e-05 training_time: 276.6796371936798
error test: 5.5151747008367064e-05, error train: 5.1339569538849646e-05 training_time: 411.3547899723053
error test: 5.511458140607042e-05, error train: 5.130226615030502e-05 training_time: 544.7778451442719


In [8]:
with Path(f"../final/MLPRegressor.pickle").open("wb") as opened_file:
    pickle.dump(model8, opened_file)