Coefficient of variation, add warm-up for training the mean, implement $\beta$-NLL loss function during training, 10-fold cross validation

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.metrics import mean_absolute_error,root_mean_squared_error

import torch
import torch.nn as nn   
import torch.optim as optim
from torch.utils.data import DataLoader,TensorDataset
import torch.nn.functional as F

import optuna
import random

import pickle

In [None]:
training_cells = pd.read_csv("../Data_preprocessing/training.csv",header=None).to_numpy(dtype=str).reshape(-1,).tolist()
test_in_cells = pd.read_csv("../Data_preprocessing/test_in.csv",header=None).to_numpy(dtype=str).reshape(-1,).tolist()
test_out_cells = pd.read_csv("../Data_preprocessing/test_out.csv",header=None).to_numpy(dtype=str).reshape(-1,).tolist()

num_training_cells = len(training_cells)
num_test_in_cells = len(test_in_cells)
num_test_out_cells = len(test_out_cells)

a = np.loadtxt('../Empirical_model_fitting/Empirical_parameters_global_train_py.csv').item()

In [None]:
X_train_PCA = np.loadtxt("Processed_input_output/X_train_PCA.csv",delimiter=",")
X_test_in_PCA = np.loadtxt("Processed_input_output/X_test_in_PCA.csv",delimiter=",")
X_test_out_PCA = np.loadtxt("Processed_input_output/X_test_out_PCA.csv",delimiter=",")

N_train = np.loadtxt("Processed_input_output/N_train.csv",delimiter=",")
N_test_in = np.loadtxt("Processed_input_output/N_test_in.csv",delimiter=",")
N_test_out = np.loadtxt("Processed_input_output/N_test_out.csv",delimiter=",")

Q_train = np.array([np.linspace(1,0.8,21)] * num_training_cells)
Q_test_in = np.array([np.linspace(1,0.8,21)] * num_test_in_cells)
Q_test_out = np.array([np.linspace(1,0.8,21)] * num_test_out_cells)

Load predefined 10-fold cross validation

In [4]:
with open('training_cells_CV.pkl','rb') as f:
    training_cells_CV = pickle.load(f)

with open('val_cells_CV.pkl','rb') as f:
    val_cells_CV = pickle.load(f)

train_dataset_CV = []
val_dataset_CV = []
for fold in range(10):
    train_cells = training_cells_CV[fold]
    val_cells = val_cells_CV[fold]

    X_train_fold = X_train_PCA[train_cells]
    X_val_fold = X_train_PCA[val_cells]
   
    Y_train_fold = N_train[train_cells]
    Y_val_fold = N_train[val_cells]

    X_train_fold = torch.tensor(X_train_fold,dtype=torch.float32)
    X_val_fold = torch.tensor(X_val_fold,dtype=torch.float32)

    Y_train_fold = torch.tensor(Y_train_fold,dtype=torch.float32)
    Y_val_fold = torch.tensor(Y_val_fold,dtype=torch.float32)


    train_dataset = TensorDataset(X_train_fold,Y_train_fold)
    val_dataset = TensorDataset(X_val_fold,Y_val_fold)

    train_dataset_CV.append(train_dataset)
    val_dataset_CV.append(val_dataset)

Define end-to-end objective function and empirical models for evaluation

In [5]:
def empirical_model(global_p,b1,b2,b3,N_eq,b_weight=[1e-3,1e2,1e2]):
    a1 = global_p
    # Match the scaling for end-to-end formulation
    return 1 - b1*N_eq**a1*b_weight[0] - 1/(1+np.exp((b2*b_weight[1]-N_eq)/(b3*b_weight[2])))


def empirical_model_seperate(global_p,b1,b2,b3,N_eq,b_weight=[1e-3,1e2,1e2]):
    a1 = global_p
    # Match the scaling for end-to-end formulation
    return [1 - b1*N_eq**a1*b_weight[0] - 1/(1+np.exp((b2*b_weight[1]-N_eq)/(b3*b_weight[2]))),
            - b1*N_eq**a1*b_weight[0],
            - 1/(1+np.exp((b2*b_weight[1]-N_eq)/(b3*b_weight[2])))]

Type 1: Predict the coefficient of variation of the curve

In [6]:
# Define the NN model function
class Network_cof_variation(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(Network_cof_variation, self).__init__()
        # Define the layers
        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(input_size, hidden_sizes[0]))  # Input layer
        for i in range(1, len(hidden_sizes)):
            self.layers.append(nn.Linear(hidden_sizes[i-1], hidden_sizes[i]))  # Hidden layers
        self.mean_layer = nn.Linear(hidden_sizes[-1], output_size - 1)  # Output for b1, b2, b3
        self.coe_var_layer = nn.Linear(hidden_sizes[-1], 1)  # Output for coefficient of variance
        self._initialize_weights()
        
    def forward(self, x):
        # Pass through hidden layers with ReLU activations
        for layer in self.layers:
            x = torch.relu(layer(x))
        
        # Separate outputs for means (b1, b2, b3) and coefficient of variance
        # means = torch.sigmoid(self.mean_layer(x)) * (40 - 1) + 1  # Outputs in range [1, 40]
        means = self.mean_layer(x)
        # means = torch.relu(means)
        means = torch.sigmoid(means) * (40 - 1) + 1  # Outputs in range [1, 30]

        coe_var = F.softplus(self.coe_var_layer(x))  # Scale to [0, inf] for CV

        return torch.cat((means, coe_var), dim=1)  # Concatenate means and coe_var
    
    def _initialize_weights(self):
        # He Normal initialization for weights, zero for biases
        for layer in self.layers:
            if isinstance(layer, nn.Linear):
                torch.nn.init.kaiming_normal_(layer.weight, nonlinearity='relu')
                if layer.bias is not None:
                    torch.nn.init.zeros_(layer.bias)
                    
        # Xavier Normal initialization for mean_layer (sigmoid activation)
        torch.nn.init.xavier_normal_(self.mean_layer.weight)
        # torch.nn.init.kaiming_normal_(self.mean_layer.weight, nonlinearity='relu')
        torch.nn.init.zeros_(self.mean_layer.bias)
        
        # Xavier Normal initialization for coe_var_layer (Softplus activation)
        # torch.nn.init.xavier_normal_(self.coe_var_layer.weight)
        torch.nn.init.ones_(self.coe_var_layer.bias)
        torch.nn.init.zeros_(self.coe_var_layer.bias)


# Define the early stopping class
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

In [7]:
# Define the loss functions
# regular NLL loss for tracking traning performance
# Beta-NLL loss for backpropagation

def end_to_end_loss_NN_NLL_cov(output, target,a=0.5,warmup=True,b_weight=[1e-3,1e2,1e2]):
    # batch size and number of measurements
    (batch_size, len_interp) = target.size()

    # Predicted means (b1, b2, b3) and coefficient of variation
    b1 = output[:, 0].unsqueeze(1)  # b1 mean prediction
    b2 = output[:, 1].unsqueeze(1)  # b2 mean prediction
    b3 = output[:, 2].unsqueeze(1)  # b3 mean prediction
    # log_coe_var = output[:, 3].unsqueeze(1)  # log of coefficient of variation for all predictions
    coe_var = output[:, 3].unsqueeze(1)  # coefficient of variation for all predictions
    
    # Define ones
    I_nm = torch.ones_like(target, dtype=torch.float32)
    I_1m = torch.ones((1, len_interp), dtype=torch.float32)
    
    # Define Q (true capacity trajectories)
    Q = torch.linspace(1, 0.8, len_interp).unsqueeze(0).repeat(batch_size, 1) * 100 # in percentage scale
    
    # Calculate term_2
    term_2 = b1 * I_1m * b_weight[0] * torch.pow(target, a)
    
    # Calculate term_3
    exp_input = ((b2 * I_1m * b_weight[1]) - target) / ((b3 * I_1m * b_weight[2]) + 1e-6)
    # exp_input_clamped = 80 * torch.tanh(exp_input / 80)  # Smooth clipping using tanh to avoid overflow/underflow
    exp_input_clamped = torch.clamp(exp_input, min=-80, max=80)  # Clamp to avoid overflow/underflow
    term_3 = I_nm / (I_nm + torch.exp(exp_input_clamped))
    
    # Calculate predicted values based on term_2 and term_3
    predicted_values = (I_nm  -  term_2 - term_3) * 100
    # Calculate the variance and log-variance
    std = (100 - predicted_values) * coe_var # standard deviation based on coefficient of variation. As capacity fade increases, variance increases
    var = torch.pow(std, 2)
    var = torch.clamp(var, min=1e-6)  # Clamp variance to avoid division by zero and log of zero
    log_var = torch.log(var) 
       
    if warmup:
        log_var = 0
        var = 1

    # Negative log-likelihood loss based on Gaussian distribution        
    nll_loss = 0.5 * torch.mean((Q - predicted_values) ** 2 / var + log_var)
    
    return nll_loss


def end_to_end_loss_NN_betaNLL(output, target,beta=0.5,a=0.5,warmup=True,b_weight=[1e-3,1e2,1e2]):
    # batch size and number of measurements
    (batch_size, len_interp) = target.size()

    # Predicted means (b1, b2, b3) and coefficient of variation
    b1 = output[:, 0].unsqueeze(1)  # b1 mean prediction
    b2 = output[:, 1].unsqueeze(1)  # b2 mean prediction
    b3 = output[:, 2].unsqueeze(1)  # b3 mean prediction
    # log_coe_var = output[:, 3].unsqueeze(1)  # log of coefficient of variation for all predictions
    coe_var = output[:, 3].unsqueeze(1)  # coefficient of variation for all predictions
    
    # Define ones
    I_nm = torch.ones_like(target, dtype=torch.float32)
    I_1m = torch.ones((1, len_interp), dtype=torch.float32)
    
    # Define Q (true capacity trajectories)
    Q = torch.linspace(1, 0.8, len_interp).unsqueeze(0).repeat(batch_size, 1) * 100 # in percentage scale
    
    # Calculate term_2
    term_2 = b1 * I_1m * b_weight[0] * torch.pow(target, a)
    
    # Calculate term_3
    exp_input = ((b2 * I_1m * b_weight[1]) - target) / ((b3 * I_1m * b_weight[2]) + 1e-6)
    # exp_input_clamped = 80 * torch.tanh(exp_input / 80)  # Smooth clipping using tanh to avoid overflow/underflow
    exp_input_clamped = torch.clamp(exp_input, min=-80, max=80)  # Clamp to avoid overflow/underflow
    term_3 = I_nm / (I_nm + torch.exp(exp_input_clamped))
    
    # Calculate predicted values based on term_2 and term_3
    predicted_values = (I_nm  -  term_2 - term_3) * 100
    # Calculate the variance and log-variance
    std = (100 - predicted_values) * coe_var # standard deviation based on coefficient of variation. As capacity fade increases, variance increases
    var = torch.pow(std, 2)
    var = torch.clamp(var, min=1e-6)  # Clamp variance to avoid division by zero and log of zero
    log_var = torch.log(var) 
       
    if warmup or beta==0:
        log_var = 0
        var = 1
        # Negative log-likelihood loss based on Gaussian distribution        
        nll_loss = 0.5 * torch.mean((Q - predicted_values) ** 2 / var + log_var)
    else:
        nll_loss = torch.mean((0.5 * ((Q - predicted_values) ** 2 / var + log_var)) * var.detach() ** beta)
    
    return nll_loss

# Hyperparameter optimization with 10-fold CV

Set up neural network functions

In [8]:
seed_ = 42
random.seed(seed_)
torch.manual_seed(seed_)
np.random.seed(seed_)
torch.use_deterministic_algorithms(True)

Process data for training NN (create a validation subset for early stopping and model selection)

In [None]:
best_val_loss = np.inf
best_model_path = "Best_network/best_NN_NLL_cov_V2.pth"
b_weight = [5e-4,30,5] # Scaling weights for b1, b2, b3
def objective(trial):
    global best_val_loss

    val_loss_CV = []
    model_state_dict = {}

    # Define the input and output sizes
    input_size = 10  # 10 PCA features
    output_size = 4  # 3 empirical model parameters + 1 variance related value
    beta = 0.5 # Beta value for Beta-NLL loss
    max_grad_norm = 1.0

    # Define the hyperparameters to tune
    num_layer = trial.suggest_int('num_layer', 2, 5)
    num_neuron = trial.suggest_int('num_neuron', 5, 10)
    hidden_sizes = [num_neuron] * num_layer
    batch_size = trial.suggest_int('batch_size', 32, 45)
    lr = trial.suggest_float('lr', 5e-4, 5e-2, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-8, 1e-6, log=True)
    warmup_epochs = trial.suggest_int('warmup_epochs', 500, 800)

    for fold in range(10):
        train_dataset = train_dataset_CV[fold]
        val_dataset = val_dataset_CV[fold]
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
        # Create the model
        model = Network_cof_variation(input_size, hidden_sizes, output_size)
        criterion = end_to_end_loss_NN_betaNLL
        criterion_val = end_to_end_loss_NN_NLL_cov
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        warmup = True # Define this for loss function to use warmup or not
        # Freeze weights for coefficient of variation layer before warm-up epochs
        for param in model.coe_var_layer.parameters():
            param.requires_grad = False

        
        # Training loop
        num_epochs = 3000
        warmup_epochs_early_stopping = warmup_epochs + 100  # Early stopping warm-up epochs
        early_stopping = EarlyStopping(patience=10, min_delta=1e-4)
        
        for epoch in range(num_epochs):
            if epoch == warmup_epochs:
                warmup = False
                for param in model.coe_var_layer.parameters():
                    param.requires_grad = True
            # Training phase
            model.train()
            train_loss = 0.0
            for batch in train_loader:
                inputs, labels = batch
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels, beta, a, warmup, b_weight)
                loss_val = criterion_val(outputs, labels, a, warmup, b_weight)

                loss.backward()
                if epoch >= warmup_epochs:
                    # Gradient clipping
                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
                optimizer.step()
                train_loss += loss_val.item()
            train_loss /= len(train_loader)
            
            # Validation phase
            model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for inputs, targets in val_loader:
                    outputs = model(inputs)
                    loss = criterion_val(outputs, targets, a, warmup, b_weight)
                    val_loss += loss.item()
            val_loss /= len(val_loader)
            
            # Check early stopping condition after warmup
            if epoch > warmup_epochs_early_stopping:
                early_stopping(val_loss)
                if early_stopping.early_stop:
                    print(f"Early stopping at epoch {epoch + 1}")
                    break

        val_loss_CV.append(val_loss)
        model_state_dict[fold] = model.state_dict()
    
    val_loss_overall = np.mean(val_loss_CV)
    print(f"Validation loss: {val_loss_CV}")
    # Check if the current validation loss is the best
    if val_loss_overall < best_val_loss:
        best_val_loss = val_loss_overall
        # Save the best model
        torch.save({'model_state':model_state_dict,
                    'lr':lr,
                    'num_neuron':num_neuron,
                    'num_layer':num_layer,
                    'batch_size':batch_size,
                    'weight_decay':weight_decay,
                    'warmup_epochs':warmup_epochs}, best_model_path)

    return val_loss_overall

# Create an Optuna study
sampler = optuna.samplers.TPESampler(seed=seed_)  # Make the sampler deterministic
study = optuna.create_study(sampler=sampler, direction='minimize')

# Run the optimization
study.optimize(objective, n_trials=50)

# Print the best parameters
print("Best hyperparameters:", study.best_params)

[I 2024-10-30 21:33:38,757] A new study created in memory with name: no-name-1454c813-df69-420a-9050-b5f43ff5f674


Early stopping at epoch 662
Early stopping at epoch 665
Early stopping at epoch 663
Early stopping at epoch 658
Early stopping at epoch 673
Early stopping at epoch 671
Early stopping at epoch 658
Early stopping at epoch 660
Early stopping at epoch 658


[I 2024-10-30 21:33:56,861] Trial 0 finished with value: 5.872484445571899 and parameters: {'num_layer': 3, 'num_neuron': 10, 'batch_size': 42, 'lr': 0.007875660249889864, 'weight_decay': 2.0513382630874456e-08, 'warmup_epochs': 546}. Best is trial 0 with value: 5.872484445571899.


Early stopping at epoch 664
Validation loss: [4.984686374664307, 18.311635971069336, 2.1316568851470947, 5.804723262786865, 3.1161296367645264, 3.59780216217041, 11.753974914550781, 3.308239221572876, 2.4160423278808594, 3.2999536991119385]
Early stopping at epoch 924
Early stopping at epoch 920
Early stopping at epoch 905
Early stopping at epoch 910
Early stopping at epoch 917
Early stopping at epoch 912
Early stopping at epoch 903
Early stopping at epoch 911
Early stopping at epoch 915


[I 2024-10-30 21:34:23,392] Trial 1 finished with value: 626095.0360554695 and parameters: {'num_layer': 2, 'num_neuron': 10, 'batch_size': 40, 'lr': 0.013035123791853833, 'weight_decay': 1.0994335574766189e-08, 'warmup_epochs': 791}. Best is trial 0 with value: 5.872484445571899.


Early stopping at epoch 903
Validation loss: [4.04402494430542, 6260892.0, 3.4432997703552246, 5.707590579986572, 3.6203439235687256, 4.508126258850098, 2.239419460296631, 2.9309871196746826, 2.5648903846740723, 29.30187225341797]
Early stopping at epoch 769
Early stopping at epoch 769
Early stopping at epoch 769
Early stopping at epoch 776
Early stopping at epoch 785
Early stopping at epoch 769
Early stopping at epoch 769
Early stopping at epoch 912
Early stopping at epoch 769


[I 2024-10-30 21:34:50,118] Trial 2 finished with value: 3.0734760761260986 and parameters: {'num_layer': 5, 'num_neuron': 6, 'batch_size': 34, 'lr': 0.0011635338541918904, 'weight_decay': 4.0596116104842925e-08, 'warmup_epochs': 657}. Best is trial 2 with value: 3.0734760761260986.


Early stopping at epoch 769
Validation loss: [4.482012748718262, 11.650896072387695, 1.8591282367706299, 2.888173818588257, 1.8872982263565063, 2.1303932666778564, 2.1271414756774902, 1.012237310409546, 1.390867829322815, 1.3066117763519287]
Early stopping at epoch 723
Early stopping at epoch 722
Early stopping at epoch 728
Early stopping at epoch 722
Early stopping at epoch 745
Early stopping at epoch 723
Early stopping at epoch 746
Early stopping at epoch 793
Early stopping at epoch 857


[I 2024-10-30 21:35:13,392] Trial 3 finished with value: 21.18133510351181 and parameters: {'num_layer': 3, 'num_neuron': 6, 'batch_size': 40, 'lr': 0.0009505122659935177, 'weight_decay': 3.8396292998041626e-08, 'warmup_epochs': 610}. Best is trial 2 with value: 3.0734760761260986.


Early stopping at epoch 831
Validation loss: [3.449284791946411, 193.52212524414062, 1.6843541860580444, 4.559512615203857, 1.7659484148025513, 1.861024260520935, 1.2229540348052979, 1.167273998260498, 1.27794349193573, 1.3029299974441528]
Early stopping at epoch 633
Early stopping at epoch 626
Early stopping at epoch 632
Early stopping at epoch 625
Early stopping at epoch 629
Early stopping at epoch 633
Early stopping at epoch 679
Early stopping at epoch 630
Early stopping at epoch 628


[I 2024-10-30 21:35:33,140] Trial 4 finished with value: 14.381719279289246 and parameters: {'num_layer': 3, 'num_neuron': 9, 'batch_size': 34, 'lr': 0.005338741354740679, 'weight_decay': 1.530485212183145e-07, 'warmup_epochs': 513}. Best is trial 2 with value: 3.0734760761260986.


Early stopping at epoch 625
Validation loss: [4.344485282897949, 1.9656119346618652, 11.739871978759766, 2.914477586746216, 2.0960886478424072, 4.276647090911865, 5.419413089752197, 101.91710662841797, 3.3872830867767334, 5.756207466125488]
Early stopping at epoch 855
Early stopping at epoch 858
Early stopping at epoch 865
Early stopping at epoch 856
Early stopping at epoch 870
Early stopping at epoch 858
Early stopping at epoch 859
Early stopping at epoch 855
Early stopping at epoch 869


[I 2024-10-30 21:36:00,340] Trial 5 finished with value: 4.763686883449554 and parameters: {'num_layer': 4, 'num_neuron': 6, 'batch_size': 32, 'lr': 0.039513097748541164, 'weight_decay': 8.536189862866822e-07, 'warmup_epochs': 743}. Best is trial 2 with value: 3.0734760761260986.


Early stopping at epoch 887
Validation loss: [3.5443239212036133, 2.1599130630493164, 1.81303870677948, 2.7499871253967285, 2.411235809326172, 10.310951232910156, 7.573029041290283, 2.3210840225219727, 13.72806453704834, 1.0252413749694824]
Early stopping at epoch 761
Early stopping at epoch 794
Early stopping at epoch 852
Early stopping at epoch 776
Early stopping at epoch 781
Early stopping at epoch 761
Early stopping at epoch 761
Early stopping at epoch 761
Early stopping at epoch 761


[I 2024-10-30 21:36:20,462] Trial 6 finished with value: 1233.0753793239594 and parameters: {'num_layer': 3, 'num_neuron': 5, 'batch_size': 41, 'lr': 0.0037955524026413476, 'weight_decay': 1.754189348745074e-08, 'warmup_epochs': 649}. Best is trial 2 with value: 3.0734760761260986.


Early stopping at epoch 761
Validation loss: [3.141395092010498, 12308.3486328125, 2.9675862789154053, 3.61737060546875, 1.6235605478286743, 3.2208151817321777, 1.3494588136672974, 1.4900447130203247, 3.3275880813598633, 1.6673411130905151]
Early stopping at epoch 774
Early stopping at epoch 786
Early stopping at epoch 768
Early stopping at epoch 776
Early stopping at epoch 771
Early stopping at epoch 768
Early stopping at epoch 774
Early stopping at epoch 787
Early stopping at epoch 768


[I 2024-10-30 21:36:41,912] Trial 7 finished with value: 6.533229279518127 and parameters: {'num_layer': 2, 'num_neuron': 10, 'batch_size': 35, 'lr': 0.010568529720322864, 'weight_decay': 4.20167205437253e-08, 'warmup_epochs': 656}. Best is trial 2 with value: 3.0734760761260986.


Early stopping at epoch 769
Validation loss: [4.361969470977783, 3.330785036087036, 6.816019535064697, 3.496380567550659, 6.047537803649902, 6.06876277923584, 9.39062786102295, 16.993896484375, 3.173154592514038, 5.653158664703369]
Early stopping at epoch 881
Early stopping at epoch 915
Early stopping at epoch 893
Early stopping at epoch 882
Early stopping at epoch 890
Early stopping at epoch 881
Early stopping at epoch 882
Early stopping at epoch 888
Early stopping at epoch 886


[I 2024-10-30 21:37:03,065] Trial 8 finished with value: 6.0268386721611025 and parameters: {'num_layer': 4, 'num_neuron': 6, 'batch_size': 45, 'lr': 0.017751524290641543, 'weight_decay': 7.568292060167619e-07, 'warmup_epochs': 769}. Best is trial 2 with value: 3.0734760761260986.


Early stopping at epoch 882
Validation loss: [4.659225940704346, 21.550491333007812, 2.0994129180908203, 2.98039174079895, 2.540842056274414, 5.710744380950928, 8.207027435302734, 4.374622344970703, 6.398103713989258, 1.7475248575210571]
Early stopping at epoch 720
Early stopping at epoch 812
Early stopping at epoch 731
Early stopping at epoch 709
Early stopping at epoch 714
Early stopping at epoch 709
Early stopping at epoch 709
Early stopping at epoch 737
Early stopping at epoch 709


[I 2024-10-30 21:37:27,239] Trial 9 finished with value: 6.324145567417145 and parameters: {'num_layer': 4, 'num_neuron': 10, 'batch_size': 33, 'lr': 0.0012329223607243688, 'weight_decay': 1.2315571723665983e-08, 'warmup_epochs': 597}. Best is trial 2 with value: 3.0734760761260986.


Early stopping at epoch 788
Validation loss: [1.7328518629074097, 45.40230178833008, 2.3411684036254883, 2.4623358249664307, 2.254539966583252, 2.5044820308685303, 2.1788101196289062, 1.1605217456817627, 1.470639705657959, 1.7338042259216309]
Early stopping at epoch 819
Early stopping at epoch 819
Early stopping at epoch 976
Early stopping at epoch 822
Early stopping at epoch 888
Early stopping at epoch 1090
Early stopping at epoch 819
Early stopping at epoch 921
Early stopping at epoch 937


[I 2024-10-30 21:37:58,899] Trial 10 finished with value: 3.8879655241966247 and parameters: {'num_layer': 5, 'num_neuron': 8, 'batch_size': 37, 'lr': 0.000543117448696857, 'weight_decay': 1.62833383399089e-07, 'warmup_epochs': 707}. Best is trial 2 with value: 3.0734760761260986.


Early stopping at epoch 910
Validation loss: [7.365237712860107, 14.339088439941406, 1.0118458271026611, 4.648403167724609, 1.5466337203979492, 3.6550798416137695, 2.2684173583984375, 1.314466953277588, 1.307550311088562, 1.4229319095611572]
Early stopping at epoch 821
Early stopping at epoch 821
Early stopping at epoch 821
Early stopping at epoch 821
Early stopping at epoch 821
Early stopping at epoch 821
Early stopping at epoch 821
Early stopping at epoch 821
Early stopping at epoch 1372


[I 2024-10-30 21:38:29,011] Trial 11 finished with value: 2.9594900369644166 and parameters: {'num_layer': 5, 'num_neuron': 8, 'batch_size': 37, 'lr': 0.0005087534804133938, 'weight_decay': 1.9551476095874134e-07, 'warmup_epochs': 709}. Best is trial 11 with value: 2.9594900369644166.


Early stopping at epoch 821
Validation loss: [2.8412606716156006, 2.210505485534668, 1.6052097082138062, 11.793872833251953, 1.9536724090576172, 2.5214591026306152, 1.6832605600357056, 1.325116515159607, 1.376670241355896, 2.2838728427886963]
Early stopping at epoch 814
Early stopping at epoch 834
Early stopping at epoch 811
Early stopping at epoch 811
Early stopping at epoch 813
Early stopping at epoch 819
Early stopping at epoch 868
Early stopping at epoch 811
Early stopping at epoch 811


[I 2024-10-30 21:38:56,831] Trial 12 finished with value: 43.190457582473755 and parameters: {'num_layer': 5, 'num_neuron': 8, 'batch_size': 36, 'lr': 0.0021654217996898086, 'weight_decay': 3.35188262509159e-07, 'warmup_epochs': 699}. Best is trial 11 with value: 2.9594900369644166.


Early stopping at epoch 812
Validation loss: [8.125845909118652, 391.74163818359375, 2.9928529262542725, 3.907101631164551, 2.923259973526001, 3.056791067123413, 6.748580455780029, 3.639211893081665, 3.4226512908935547, 5.34664249420166]
Early stopping at epoch 808
Early stopping at epoch 808
Early stopping at epoch 937
Early stopping at epoch 808
Early stopping at epoch 833
Early stopping at epoch 808
Early stopping at epoch 1085
Early stopping at epoch 1348
Early stopping at epoch 821


[I 2024-10-30 21:39:28,061] Trial 13 finished with value: 1.8740141153335572 and parameters: {'num_layer': 5, 'num_neuron': 7, 'batch_size': 38, 'lr': 0.0005157728376864142, 'weight_decay': 7.116477814727243e-08, 'warmup_epochs': 696}. Best is trial 13 with value: 1.8740141153335572.


Early stopping at epoch 808
Validation loss: [2.7062628269195557, 3.8604869842529297, 1.1323682069778442, 1.5499956607818604, 1.7582646608352661, 2.336493730545044, 1.393746018409729, 0.9106433391571045, 1.4410609006881714, 1.6508188247680664]
Early stopping at epoch 825
Early stopping at epoch 825
Early stopping at epoch 860
Early stopping at epoch 825
Early stopping at epoch 825
Early stopping at epoch 825
Early stopping at epoch 825
Early stopping at epoch 1126
Early stopping at epoch 976


[I 2024-10-30 21:39:59,069] Trial 14 finished with value: 2.8321723461151125 and parameters: {'num_layer': 5, 'num_neuron': 7, 'batch_size': 38, 'lr': 0.0005440211675273811, 'weight_decay': 1.0676916501410118e-07, 'warmup_epochs': 713}. Best is trial 13 with value: 1.8740141153335572.


Early stopping at epoch 1047
Validation loss: [3.7359838485717773, 7.838833808898926, 1.37714684009552, 2.6634268760681152, 1.719116449356079, 2.057387590408325, 5.417938709259033, 1.373600959777832, 1.1275697946548462, 1.010718584060669]
Early stopping at epoch 889
Early stopping at epoch 1050
Early stopping at epoch 861
Early stopping at epoch 861
Early stopping at epoch 875
Early stopping at epoch 861
Early stopping at epoch 927
Early stopping at epoch 868
Early stopping at epoch 924


[I 2024-10-30 21:40:29,658] Trial 15 finished with value: 11.365179789066314 and parameters: {'num_layer': 5, 'num_neuron': 7, 'batch_size': 38, 'lr': 0.0022915500138460387, 'weight_decay': 7.793575055428743e-08, 'warmup_epochs': 749}. Best is trial 13 with value: 1.8740141153335572.


Early stopping at epoch 876
Validation loss: [8.094882011413574, 48.8746223449707, 5.24083948135376, 6.351700305938721, 3.8377206325531006, 9.051167488098145, 24.875873565673828, 2.4178130626678467, 3.786302328109741, 1.120876669883728]
Early stopping at epoch 798
Early stopping at epoch 928
Early stopping at epoch 798
Early stopping at epoch 798
Early stopping at epoch 798
Early stopping at epoch 798
Early stopping at epoch 1232
Early stopping at epoch 878
Early stopping at epoch 818


[I 2024-10-30 21:40:50,383] Trial 16 finished with value: 1.8329599499702454 and parameters: {'num_layer': 4, 'num_neuron': 7, 'batch_size': 43, 'lr': 0.0007921636286268318, 'weight_decay': 7.827545905064409e-08, 'warmup_epochs': 686}. Best is trial 16 with value: 1.8329599499702454.


Early stopping at epoch 1012
Validation loss: [2.4322516918182373, 1.5127856731414795, 3.1769630908966064, 2.2424676418304443, 1.7485451698303223, 1.9528415203094482, 1.0844274759292603, 1.4327366352081299, 1.419731855392456, 1.3268487453460693]
Early stopping at epoch 732
Early stopping at epoch 851
Early stopping at epoch 732
Early stopping at epoch 746
Early stopping at epoch 732
Early stopping at epoch 753
Early stopping at epoch 732
Early stopping at epoch 732
Early stopping at epoch 792


[I 2024-10-30 21:41:07,962] Trial 17 finished with value: 3.336972141265869 and parameters: {'num_layer': 4, 'num_neuron': 7, 'batch_size': 44, 'lr': 0.001857439054130012, 'weight_decay': 8.588420808476909e-08, 'warmup_epochs': 620}. Best is trial 16 with value: 1.8329599499702454.


Early stopping at epoch 732
Validation loss: [2.525721549987793, 2.1658737659454346, 1.4292676448822021, 6.396155834197998, 1.4877899885177612, 6.811934947967529, 6.6810150146484375, 2.9739274978637695, 1.2596304416656494, 1.6384047269821167]
Early stopping at epoch 792
Early stopping at epoch 792
Early stopping at epoch 814
Early stopping at epoch 858
Early stopping at epoch 818
Early stopping at epoch 803
Early stopping at epoch 1039
Early stopping at epoch 967
Early stopping at epoch 792


[I 2024-10-30 21:41:31,538] Trial 18 finished with value: 1.690043318271637 and parameters: {'num_layer': 4, 'num_neuron': 5, 'batch_size': 42, 'lr': 0.0008863564337367059, 'weight_decay': 3.327528914795916e-07, 'warmup_epochs': 680}. Best is trial 18 with value: 1.690043318271637.


Early stopping at epoch 1024
Validation loss: [2.604918956756592, 2.1824564933776855, 1.9211866855621338, 1.6293836832046509, 1.4592219591140747, 1.6823105812072754, 1.0117651224136353, 1.3369876146316528, 1.6722416877746582, 1.3999603986740112]
Early stopping at epoch 691
Early stopping at epoch 718
Early stopping at epoch 801
Early stopping at epoch 686
Early stopping at epoch 688
Early stopping at epoch 678
Early stopping at epoch 706
Early stopping at epoch 696
Early stopping at epoch 676


[I 2024-10-30 21:41:48,253] Trial 19 finished with value: 23.73389815092087 and parameters: {'num_layer': 4, 'num_neuron': 5, 'batch_size': 43, 'lr': 0.003714559377762824, 'weight_decay': 3.8409532513874557e-07, 'warmup_epochs': 564}. Best is trial 18 with value: 1.690043318271637.


Early stopping at epoch 676
Validation loss: [24.030811309814453, 2.2429676055908203, 2.4487504959106445, 2.3902032375335693, 3.04439640045166, 7.521435737609863, 1.1211196184158325, 191.0987548828125, 1.529484510421753, 1.911057710647583]
Early stopping at epoch 790
Early stopping at epoch 1322
Early stopping at epoch 858
Early stopping at epoch 779
Early stopping at epoch 806
Early stopping at epoch 795
Early stopping at epoch 779
Early stopping at epoch 969
Early stopping at epoch 1049


[I 2024-10-30 21:42:09,800] Trial 20 finished with value: 1.6530466556549073 and parameters: {'num_layer': 4, 'num_neuron': 5, 'batch_size': 43, 'lr': 0.0010279980160927279, 'weight_decay': 3.5722029462803684e-07, 'warmup_epochs': 667}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 881
Validation loss: [2.363574266433716, 2.3475871086120605, 0.9635247588157654, 2.4361093044281006, 1.4927858114242554, 1.767439365386963, 1.592334270477295, 0.9360367655754089, 1.2372785806655884, 1.3937963247299194]
Early stopping at epoch 787
Early stopping at epoch 787
Early stopping at epoch 787
Early stopping at epoch 787
Early stopping at epoch 787
Early stopping at epoch 787
Early stopping at epoch 988
Early stopping at epoch 1063
Early stopping at epoch 788


[I 2024-10-30 21:42:29,964] Trial 21 finished with value: 23.491838896274565 and parameters: {'num_layer': 4, 'num_neuron': 5, 'batch_size': 43, 'lr': 0.0008754478152979281, 'weight_decay': 4.291466392054536e-07, 'warmup_epochs': 675}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 827
Validation loss: [1.8298572301864624, 217.9237823486328, 1.5219717025756836, 3.1492886543273926, 1.3913012742996216, 1.730467677116394, 1.4557448625564575, 1.1109563112258911, 3.4149482250213623, 1.3900706768035889]
Early stopping at epoch 754
Early stopping at epoch 760
Early stopping at epoch 777
Early stopping at epoch 841
Early stopping at epoch 737
Early stopping at epoch 737
Early stopping at epoch 737
Early stopping at epoch 756
Early stopping at epoch 795


[I 2024-10-30 21:42:48,555] Trial 22 finished with value: 2.4700465559959413 and parameters: {'num_layer': 4, 'num_neuron': 5, 'batch_size': 45, 'lr': 0.0014423514273666199, 'weight_decay': 2.9945724537527633e-07, 'warmup_epochs': 625}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 929
Validation loss: [2.447892189025879, 1.9935511350631714, 1.3866592645645142, 3.2277743816375732, 2.377533435821533, 1.8241283893585205, 3.8945224285125732, 1.4500231742858887, 4.898909091949463, 1.1994720697402954]
Early stopping at epoch 871
Early stopping at epoch 1240
Early stopping at epoch 953
Early stopping at epoch 849
Early stopping at epoch 849
Early stopping at epoch 867
Early stopping at epoch 989
Early stopping at epoch 934
Early stopping at epoch 849


[I 2024-10-30 21:43:12,735] Trial 23 finished with value: 2.08427038192749 and parameters: {'num_layer': 4, 'num_neuron': 6, 'batch_size': 42, 'lr': 0.0008316493537526588, 'weight_decay': 5.480752357740461e-07, 'warmup_epochs': 737}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 851
Validation loss: [4.718855381011963, 1.578282356262207, 1.2047888040542603, 2.4718306064605713, 1.877047061920166, 2.6601035594940186, 1.4482961893081665, 1.3982435464859009, 1.8133643865585327, 1.6718919277191162]
Early stopping at epoch 793
Early stopping at epoch 793
Early stopping at epoch 992
Early stopping at epoch 796
Early stopping at epoch 846
Early stopping at epoch 793
Early stopping at epoch 1025
Early stopping at epoch 960
Early stopping at epoch 971


[I 2024-10-30 21:43:40,008] Trial 24 finished with value: 2.497421908378601 and parameters: {'num_layer': 3, 'num_neuron': 5, 'batch_size': 40, 'lr': 0.00076184745827792, 'weight_decay': 2.1865798444280878e-07, 'warmup_epochs': 681}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 994
Validation loss: [3.2462503910064697, 7.37576150894165, 1.3313894271850586, 2.585503578186035, 1.5566712617874146, 1.9001423120498657, 1.3061555624008179, 1.3411153554916382, 3.312143564224243, 1.0190861225128174]
Early stopping at epoch 754
Early stopping at epoch 754
Early stopping at epoch 754
Early stopping at epoch 754
Early stopping at epoch 765
Early stopping at epoch 784
Early stopping at epoch 754
Early stopping at epoch 754
Early stopping at epoch 772


[I 2024-10-30 21:43:58,049] Trial 25 finished with value: 83.04926553964614 and parameters: {'num_layer': 4, 'num_neuron': 9, 'batch_size': 43, 'lr': 0.0016601633605047973, 'weight_decay': 5.865015693022449e-07, 'warmup_epochs': 642}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 754
Validation loss: [4.244340896606445, 791.4924926757812, 7.020713806152344, 8.366578102111816, 7.474327564239502, 2.1506175994873047, 2.125896692276001, 1.4459789991378784, 4.739085674285889, 1.4326233863830566]
Early stopping at epoch 702
Early stopping at epoch 702
Early stopping at epoch 722
Early stopping at epoch 704
Early stopping at epoch 715
Early stopping at epoch 706
Early stopping at epoch 702
Early stopping at epoch 1190
Early stopping at epoch 708


[I 2024-10-30 21:44:16,851] Trial 26 finished with value: 3.3126475334167482 and parameters: {'num_layer': 3, 'num_neuron': 5, 'batch_size': 41, 'lr': 0.0028975985879699205, 'weight_decay': 2.5615723280490915e-07, 'warmup_epochs': 590}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 702
Validation loss: [3.0575380325317383, 9.19213581085205, 1.2863355875015259, 4.521904945373535, 1.6691129207611084, 1.810185432434082, 1.4256222248077393, 1.0740588903427124, 1.4398021697998047, 7.649779319763184]
Early stopping at epoch 784
Early stopping at epoch 784
Early stopping at epoch 1012
Early stopping at epoch 798
Early stopping at epoch 904
Early stopping at epoch 898
Early stopping at epoch 1153
Early stopping at epoch 907
Early stopping at epoch 828


[I 2024-10-30 21:44:38,049] Trial 27 finished with value: 334.4234673976898 and parameters: {'num_layer': 4, 'num_neuron': 6, 'batch_size': 44, 'lr': 0.000742961969614579, 'weight_decay': 1.2416455346521106e-07, 'warmup_epochs': 672}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 784
Validation loss: [2.4703586101531982, 3325.38720703125, 1.146224021911621, 4.219516754150391, 1.3974502086639404, 4.2265472412109375, 1.2630754709243774, 1.3970719575881958, 1.1961982250213623, 1.53102445602417]
Early stopping at epoch 840
Early stopping at epoch 840
Early stopping at epoch 840
Early stopping at epoch 855
Early stopping at epoch 900
Early stopping at epoch 841
Early stopping at epoch 842
Early stopping at epoch 870
Early stopping at epoch 840


[I 2024-10-30 21:45:00,113] Trial 28 finished with value: 12.552148580551147 and parameters: {'num_layer': 4, 'num_neuron': 9, 'batch_size': 42, 'lr': 0.0012893820029605968, 'weight_decay': 5.5626311077153354e-08, 'warmup_epochs': 728}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 846
Validation loss: [7.057521820068359, 83.93486785888672, 3.9204909801483154, 4.7977399826049805, 1.5536905527114868, 2.1778883934020996, 16.980709075927734, 1.537528157234192, 1.592538595199585, 1.968510389328003]
Early stopping at epoch 760
Early stopping at epoch 749
Early stopping at epoch 749
Early stopping at epoch 776
Early stopping at epoch 750
Early stopping at epoch 749
Early stopping at epoch 749
Early stopping at epoch 757
Early stopping at epoch 749


[I 2024-10-30 21:45:16,637] Trial 29 finished with value: 411.1581172823906 and parameters: {'num_layer': 3, 'num_neuron': 5, 'batch_size': 44, 'lr': 0.006898917564876366, 'weight_decay': 2.586200324205114e-08, 'warmup_epochs': 637}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 759
Validation loss: [4.4124064445495605, 4081.241455078125, 3.153385877609253, 1.884336233139038, 2.4139723777770996, 2.420827627182007, 2.367650032043457, 2.2071540355682373, 1.1621407270431519, 10.31784439086914]
Early stopping at epoch 685
Early stopping at epoch 685
Early stopping at epoch 703
Early stopping at epoch 687
Early stopping at epoch 685
Early stopping at epoch 708
Early stopping at epoch 685
Early stopping at epoch 685
Early stopping at epoch 767


[I 2024-10-30 21:45:33,124] Trial 30 finished with value: 2.7874136567115784 and parameters: {'num_layer': 2, 'num_neuron': 6, 'batch_size': 41, 'lr': 0.0024754306443546172, 'weight_decay': 1.2668717196846585e-07, 'warmup_epochs': 573}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 685
Validation loss: [2.7133119106292725, 5.859895706176758, 1.4895719289779663, 5.465113162994385, 3.009995937347412, 2.8637046813964844, 1.8994611501693726, 2.1997015476226807, 1.1056411266326904, 1.2677394151687622]
Early stopping at epoch 796
Early stopping at epoch 796
Early stopping at epoch 897
Early stopping at epoch 796
Early stopping at epoch 828
Early stopping at epoch 796
Early stopping at epoch 1080
Early stopping at epoch 887
Early stopping at epoch 1008


[I 2024-10-30 21:46:02,855] Trial 31 finished with value: 13.179349398612976 and parameters: {'num_layer': 5, 'num_neuron': 7, 'batch_size': 39, 'lr': 0.0006685739657215396, 'weight_decay': 7.162755250169641e-08, 'warmup_epochs': 684}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 796
Validation loss: [5.908875465393066, 113.97592163085938, 1.1439120769500732, 1.9256428480148315, 1.617347002029419, 1.6208544969558716, 1.2384445667266846, 1.5123567581176758, 1.2129441499710083, 1.6371949911117554]
Early stopping at epoch 809
Early stopping at epoch 809
Early stopping at epoch 953
Early stopping at epoch 809
Early stopping at epoch 977
Early stopping at epoch 819
Early stopping at epoch 809
Early stopping at epoch 809
Early stopping at epoch 1045


[I 2024-10-30 21:46:32,421] Trial 32 finished with value: 1.829542601108551 and parameters: {'num_layer': 5, 'num_neuron': 7, 'batch_size': 39, 'lr': 0.0006475580747041781, 'weight_decay': 6.133187661413388e-08, 'warmup_epochs': 697}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 822
Validation loss: [1.9533346891403198, 2.3698832988739014, 1.2519745826721191, 2.0545361042022705, 1.461111307144165, 2.2798423767089844, 2.1327426433563232, 1.592009425163269, 1.3528472185134888, 1.847144365310669]
Early stopping at epoch 778
Early stopping at epoch 778
Early stopping at epoch 779
Early stopping at epoch 778
Early stopping at epoch 778
Early stopping at epoch 778
Early stopping at epoch 793
Early stopping at epoch 778
Early stopping at epoch 900


[I 2024-10-30 21:46:53,001] Trial 33 finished with value: 5.71186615228653 and parameters: {'num_layer': 4, 'num_neuron': 8, 'batch_size': 42, 'lr': 0.000999889965690374, 'weight_decay': 5.164245436141689e-08, 'warmup_epochs': 666}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 815
Validation loss: [3.978311777114868, 3.316807270050049, 20.878786087036133, 10.10969066619873, 1.9269797801971436, 3.2032854557037354, 1.3987095355987549, 9.910924911499023, 1.051570177078247, 1.3435958623886108]
Early stopping at epoch 910
Early stopping at epoch 1028
Early stopping at epoch 910
Early stopping at epoch 1031
Early stopping at epoch 933
Early stopping at epoch 910
Early stopping at epoch 910
Early stopping at epoch 932
Early stopping at epoch 910


[I 2024-10-30 21:47:24,763] Trial 34 finished with value: 502.94523190259935 and parameters: {'num_layer': 5, 'num_neuron': 6, 'batch_size': 39, 'lr': 0.0011018640129482418, 'weight_decay': 2.8442616625166863e-08, 'warmup_epochs': 798}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 954
Validation loss: [4.956273555755615, 5010.6806640625, 1.638257622718811, 1.8976649045944214, 1.5778446197509766, 2.421668529510498, 1.5752662420272827, 1.612991213798523, 1.8790549039840698, 1.2126333713531494]
Early stopping at epoch 876
Early stopping at epoch 936
Early stopping at epoch 876
Early stopping at epoch 879
Early stopping at epoch 876
Early stopping at epoch 876
Early stopping at epoch 876
Early stopping at epoch 876
Early stopping at epoch 889


[I 2024-10-30 21:47:52,723] Trial 35 finished with value: 5.5001971364021305 and parameters: {'num_layer': 4, 'num_neuron': 7, 'batch_size': 40, 'lr': 0.0014953879951575294, 'weight_decay': 5.7482144523503304e-08, 'warmup_epochs': 764}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 877
Validation loss: [5.565990447998047, 34.62360382080078, 1.872551441192627, 2.2595372200012207, 1.7779483795166016, 3.2338593006134033, 1.535168170928955, 1.4777717590332031, 1.1472920179367065, 1.5082488059997559]
Early stopping at epoch 835
Early stopping at epoch 835
Early stopping at epoch 835
Early stopping at epoch 835
Early stopping at epoch 971
Early stopping at epoch 835
Early stopping at epoch 835
Early stopping at epoch 1358
Early stopping at epoch 932


[I 2024-10-30 21:48:17,739] Trial 36 finished with value: 2.995285177230835 and parameters: {'num_layer': 5, 'num_neuron': 6, 'batch_size': 42, 'lr': 0.0010376345888079717, 'weight_decay': 5.26670124411113e-07, 'warmup_epochs': 723}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 867
Validation loss: [6.287963390350342, 2.80173397064209, 1.874274492263794, 3.3468875885009766, 1.2191922664642334, 2.2812817096710205, 1.3279927968978882, 8.06800651550293, 1.2359061241149902, 1.5096129179000854]
Early stopping at epoch 804
Early stopping at epoch 804
Early stopping at epoch 804
Early stopping at epoch 804
Early stopping at epoch 973
Early stopping at epoch 804
Early stopping at epoch 981
Early stopping at epoch 804
Early stopping at epoch 1104


[I 2024-10-30 21:48:37,957] Trial 37 finished with value: 33.3797945857048 and parameters: {'num_layer': 3, 'num_neuron': 5, 'batch_size': 43, 'lr': 0.0006829263203664288, 'weight_decay': 1.767595687343601e-07, 'warmup_epochs': 692}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 943
Validation loss: [3.1616859436035156, 314.4872741699219, 1.5014514923095703, 2.0608761310577393, 1.5076338052749634, 5.139105796813965, 1.3289176225662231, 1.984518051147461, 1.3257877826690674, 1.3006950616836548]
Early stopping at epoch 627
Early stopping at epoch 627
Early stopping at epoch 627
Early stopping at epoch 627
Early stopping at epoch 627
Early stopping at epoch 627
Early stopping at epoch 810
Early stopping at epoch 627
Early stopping at epoch 1248


[I 2024-10-30 21:48:57,465] Trial 38 finished with value: 2.351237678527832 and parameters: {'num_layer': 4, 'num_neuron': 9, 'batch_size': 41, 'lr': 0.0006935390519732802, 'weight_decay': 3.185475576021876e-08, 'warmup_epochs': 515}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 629
Validation loss: [2.7791337966918945, 4.684900760650635, 2.701572895050049, 3.089765787124634, 1.691279411315918, 1.6694422960281372, 1.466521143913269, 2.6966488361358643, 1.0199424028396606, 1.7131694555282593]
Early stopping at epoch 782
Early stopping at epoch 791
Early stopping at epoch 780
Early stopping at epoch 789
Early stopping at epoch 783
Early stopping at epoch 783
Early stopping at epoch 785
Early stopping at epoch 796
Early stopping at epoch 782


[I 2024-10-30 21:49:15,442] Trial 39 finished with value: 10.606049585342408 and parameters: {'num_layer': 3, 'num_neuron': 6, 'batch_size': 45, 'lr': 0.0176127921798861, 'weight_decay': 2.4602307418366513e-07, 'warmup_epochs': 667}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 864
Validation loss: [3.647447347640991, 3.0135748386383057, 2.098081111907959, 2.330118179321289, 6.119406223297119, 60.887393951416016, 6.102048873901367, 3.5381040573120117, 6.229271411895752, 12.095049858093262]
Early stopping at epoch 779
Early stopping at epoch 768
Early stopping at epoch 783
Early stopping at epoch 771
Early stopping at epoch 770
Early stopping at epoch 769
Early stopping at epoch 772
Early stopping at epoch 768
Early stopping at epoch 809


[I 2024-10-30 21:49:31,955] Trial 40 finished with value: 11.869680345058441 and parameters: {'num_layer': 2, 'num_neuron': 7, 'batch_size': 44, 'lr': 0.03681805508420043, 'weight_decay': 9.958679628424277e-08, 'warmup_epochs': 654}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 767
Validation loss: [3.039391279220581, 3.751966953277588, 1.9068611860275269, 5.090364933013916, 83.03450012207031, 6.426702499389648, 2.855694055557251, 3.0895538330078125, 3.43796968460083, 6.063798904418945]
Early stopping at epoch 805
Early stopping at epoch 849
Early stopping at epoch 1026
Early stopping at epoch 805
Early stopping at epoch 827
Early stopping at epoch 806
Early stopping at epoch 1032
Early stopping at epoch 805
Early stopping at epoch 1260


[I 2024-10-30 21:50:04,415] Trial 41 finished with value: 2.0709542274475097 and parameters: {'num_layer': 5, 'num_neuron': 7, 'batch_size': 37, 'lr': 0.0006167569298124421, 'weight_decay': 6.630506658035215e-08, 'warmup_epochs': 693}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 1010
Validation loss: [2.768022060394287, 1.7611624002456665, 1.1501795053482056, 2.012138843536377, 1.6140601634979248, 1.7912285327911377, 1.201965093612671, 5.597176551818848, 1.234701156616211, 1.5789079666137695]
Early stopping at epoch 831
Early stopping at epoch 831
Early stopping at epoch 831
Early stopping at epoch 831
Early stopping at epoch 842
Early stopping at epoch 831
Early stopping at epoch 968
Early stopping at epoch 838
Early stopping at epoch 831


[I 2024-10-30 21:50:33,789] Trial 42 finished with value: 2.32921359539032 and parameters: {'num_layer': 5, 'num_neuron': 8, 'batch_size': 36, 'lr': 0.000503977249453131, 'weight_decay': 4.330512763238845e-08, 'warmup_epochs': 719}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 1041
Validation loss: [3.6145782470703125, 5.13544225692749, 2.0099306106567383, 2.4517629146575928, 1.7613015174865723, 2.9076576232910156, 1.244571328163147, 1.600304126739502, 1.5010364055633545, 1.0655509233474731]
Early stopping at epoch 810
Early stopping at epoch 954
Early stopping at epoch 812
Early stopping at epoch 810
Early stopping at epoch 810
Early stopping at epoch 810
Early stopping at epoch 872
Early stopping at epoch 822
Early stopping at epoch 817


[I 2024-10-30 21:51:02,261] Trial 43 finished with value: 2.17194344997406 and parameters: {'num_layer': 5, 'num_neuron': 7, 'batch_size': 39, 'lr': 0.000910213642410022, 'weight_decay': 1.2733860322964777e-07, 'warmup_epochs': 698}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 810
Validation loss: [4.8741888999938965, 2.437579393386841, 1.6925013065338135, 3.1888020038604736, 1.7573367357254028, 1.7929259538650513, 1.4514870643615723, 1.437082052230835, 1.572710633277893, 1.5148204565048218]
Early stopping at epoch 748
Early stopping at epoch 747
Early stopping at epoch 747
Early stopping at epoch 779
Early stopping at epoch 781
Early stopping at epoch 747
Early stopping at epoch 934
Early stopping at epoch 747
Early stopping at epoch 905


[I 2024-10-30 21:51:29,460] Trial 44 finished with value: 1.7654189467430115 and parameters: {'num_layer': 5, 'num_neuron': 6, 'batch_size': 40, 'lr': 0.0011778765704150004, 'weight_decay': 9.29353320962666e-07, 'warmup_epochs': 635}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 747
Validation loss: [2.328446865081787, 1.9397014379501343, 1.5939279794692993, 2.705760955810547, 1.5443949699401855, 1.9633712768554688, 1.4595600366592407, 1.3475892543792725, 1.0923302173614502, 1.6791064739227295]
Early stopping at epoch 743
Early stopping at epoch 918
Early stopping at epoch 743
Early stopping at epoch 743
Early stopping at epoch 743
Early stopping at epoch 748
Early stopping at epoch 770
Early stopping at epoch 752
Early stopping at epoch 798


[I 2024-10-30 21:51:55,071] Trial 45 finished with value: 1.973783028125763 and parameters: {'num_layer': 4, 'num_neuron': 6, 'batch_size': 40, 'lr': 0.0011535512844887542, 'weight_decay': 9.923983747934685e-07, 'warmup_epochs': 631}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 805
Validation loss: [3.2976832389831543, 1.6099762916564941, 1.549251914024353, 3.616166830062866, 1.7024991512298584, 2.3257062435150146, 1.260523796081543, 1.4840245246887207, 1.4993805885314941, 1.3926177024841309]
Early stopping at epoch 735
Early stopping at epoch 724
Early stopping at epoch 724
Early stopping at epoch 724
Early stopping at epoch 724
Early stopping at epoch 731
Early stopping at epoch 786
Early stopping at epoch 724
Early stopping at epoch 724


[I 2024-10-30 21:52:16,779] Trial 46 finished with value: 2.6541247844696043 and parameters: {'num_layer': 5, 'num_neuron': 5, 'batch_size': 41, 'lr': 0.0017916033639855309, 'weight_decay': 7.069234553288865e-07, 'warmup_epochs': 612}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 828
Validation loss: [4.411935806274414, 5.6447858810424805, 1.2157642841339111, 3.964144229888916, 2.290989398956299, 1.66295325756073, 1.3178616762161255, 1.5764994621276855, 1.700002908706665, 2.7563109397888184]
Early stopping at epoch 765
Early stopping at epoch 765
Early stopping at epoch 765
Early stopping at epoch 801
Early stopping at epoch 765
Early stopping at epoch 765
Early stopping at epoch 783
Early stopping at epoch 765
Early stopping at epoch 765


[I 2024-10-30 21:52:37,197] Trial 47 finished with value: 613896.6507045984 and parameters: {'num_layer': 4, 'num_neuron': 6, 'batch_size': 42, 'lr': 0.0013511201888522573, 'weight_decay': 4.515033487719981e-07, 'warmup_epochs': 653}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 765
Validation loss: [2.55228328704834, 6138947.5, 2.5385477542877197, 1.903812050819397, 1.727954626083374, 3.548551559448242, 1.4167394638061523, 1.9315441846847534, 1.5205374956130981, 1.8670755624771118]
Early stopping at epoch 771
Early stopping at epoch 783
Early stopping at epoch 771
Early stopping at epoch 773
Early stopping at epoch 771
Early stopping at epoch 771
Early stopping at epoch 771
Early stopping at epoch 771
Early stopping at epoch 775


[I 2024-10-30 21:53:03,484] Trial 48 finished with value: 75912.64843539 and parameters: {'num_layer': 5, 'num_neuron': 5, 'batch_size': 40, 'lr': 0.004789444071084909, 'weight_decay': 7.282281486411192e-07, 'warmup_epochs': 659}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 777
Validation loss: [3.019744634628296, 759094.5, 1.223127841949463, 3.4810173511505127, 2.444221258163452, 9.436936378479004, 7.002040386199951, 1.4709240198135376, 2.1856231689453125, 1.7207188606262207]
Early stopping at epoch 705
Early stopping at epoch 705
Early stopping at epoch 705
Early stopping at epoch 724
Early stopping at epoch 1125
Early stopping at epoch 705
Early stopping at epoch 705
Early stopping at epoch 927
Early stopping at epoch 770


[I 2024-10-30 21:53:22,869] Trial 49 finished with value: 1.774815607070923 and parameters: {'num_layer': 4, 'num_neuron': 6, 'batch_size': 43, 'lr': 0.0009027238512416587, 'weight_decay': 1.861176803145822e-08, 'warmup_epochs': 593}. Best is trial 20 with value: 1.6530466556549073.


Early stopping at epoch 790
Validation loss: [2.1141748428344727, 2.304706573486328, 1.4957414865493774, 2.028783082962036, 1.3187745809555054, 2.9198496341705322, 1.237215280532837, 1.5420382022857666, 1.3971918821334839, 1.3896805047988892]
Best hyperparameters: {'num_layer': 4, 'num_neuron': 5, 'batch_size': 43, 'lr': 0.0010279980160927279, 'weight_decay': 3.5722029462803684e-07, 'warmup_epochs': 667}


# Case 1: Single model for each fold

In [None]:
best_model_dict = torch.load(best_model_path)
hidden_sizes = [best_model_dict['num_neuron']]*best_model_dict['num_layer']

input_size = 10
output_size = 4
# Test features as tensor
X_train_PCA_tensor = torch.tensor(X_train_PCA,dtype = torch.float32)
X_test_in_PCA_tensor = torch.tensor(X_test_in_PCA,dtype=torch.float32)
X_test_out_PCA_tensor = torch.tensor(X_test_out_PCA,dtype=torch.float32)

b_train_pred = []
b_test_in_pred = []
b_test_out_pred = []

Q_train_pred = []
Q_test_in_pred = []
Q_test_out_pred = []

cov_train_pred = []
cov_test_in_pred = []
cov_test_out_pred = []

for fold in range(10):
    # Load the model for the current fold
    model = Network_cof_variation(input_size,hidden_sizes,output_size)
    model.load_state_dict(best_model_dict['model_state'][fold])

    # Evaluate the model with the completed training set and test sets
    model.eval()
    with torch.no_grad():
        b_train_pred_fold = model(X_train_PCA_tensor).numpy()
        b_test_in_pred_fold = model(X_test_in_PCA_tensor).numpy()
        b_test_out_pred_fold = model(X_test_out_PCA_tensor).numpy()

    Q_train_pred_fold = []
    cov_train_pred_fold = []
    for i in range(num_training_cells):
        b = b_train_pred_fold[i,:3]
        cov = b_train_pred_fold[i,3]
        Q_train_pred_fold.append(np.clip(empirical_model(a,*b,N_train[i],b_weight),0,1))
        cov_train_pred_fold.append(cov)
    
    Q_test_in_pred_fold = []
    cov_test_in_pred_fold = []
    for i in range(num_test_in_cells):
        b = b_test_in_pred_fold[i,:3]
        cov = b_test_in_pred_fold[i,3]
        Q_test_in_pred_fold.append(np.clip(empirical_model(a,*b,N_test_in[i],b_weight),0,1))
        cov_test_in_pred_fold.append(cov)

    Q_test_out_pred_fold = []
    cov_test_out_pred_fold = []
    for i in range(num_test_out_cells):
        b = b_test_out_pred_fold[i,:3]
        cov = b_test_out_pred_fold[i,3]
        Q_test_out_pred_fold.append(np.clip(empirical_model(a,*b,N_test_out[i],b_weight),0,1))
        cov_test_out_pred_fold.append(cov)

    # Save the results from the current fold
    b_train_pred.append(np.array(b_train_pred_fold))
    b_test_in_pred.append(np.array(b_test_in_pred_fold))
    b_test_out_pred.append(np.array(b_test_out_pred_fold))

    Q_train_pred.append(np.array(Q_train_pred_fold))
    Q_test_in_pred.append(np.array(Q_test_in_pred_fold))
    Q_test_out_pred.append(np.array(Q_test_out_pred_fold))

    cov_train_pred.append(np.array(cov_train_pred_fold))
    cov_test_in_pred.append(np.array(cov_test_in_pred_fold))
    cov_test_out_pred.append(np.array(cov_test_out_pred_fold))


# Write the results to files
with open('Empirical_parameter_results/E2E_NNE_V2/b_train_pred_1.pkl','wb') as f:
    pickle.dump(b_train_pred,f)

with open('Empirical_parameter_results/E2E_NNE_V2/b_test_in_pred_1.pkl','wb') as f:
    pickle.dump(b_test_in_pred,f)

with open('Empirical_parameter_results/E2E_NNE_V2/b_test_out_pred_1.pkl','wb') as f:
    pickle.dump(b_test_out_pred,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_train_pred_1.pkl','wb') as f:
    pickle.dump(Q_train_pred,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_test_in_pred_1.pkl','wb') as f:
    pickle.dump(Q_test_in_pred,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_test_out_pred_1.pkl','wb') as f:
    pickle.dump(Q_test_out_pred,f)

with open('Empirical_parameter_results/E2E_NNE_V2/cov_train_pred_1.pkl','wb') as f:
    pickle.dump(cov_train_pred,f)

with open('Empirical_parameter_results/E2E_NNE_V2/cov_test_in_pred_1.pkl','wb') as f:
    pickle.dump(cov_test_in_pred,f)

with open('Empirical_parameter_results/E2E_NNE_V2/cov_test_out_pred_1.pkl','wb') as f:
    pickle.dump(cov_test_out_pred,f)

In [None]:
MAE_training = []
MAE_test_in = []
MAE_test_out = []

RMSE_training = []
RMSE_test_in = []
RMSE_test_out = []

for fold in range(10):
    MAE_training_fold = []
    MAE_test_in_fold = []
    MAE_test_out_fold = []

    RMSE_training_fold = []
    RMSE_test_in_fold = []
    RMSE_test_out_fold = []

    for i in range(num_training_cells):
        MAE_training_fold.append(mean_absolute_error(Q_train[i],Q_train_pred[fold][i]))
        RMSE_training_fold.append(root_mean_squared_error(Q_train[i],Q_train_pred[fold][i]))

    for i in range(num_test_in_cells):
        MAE_test_in_fold.append(mean_absolute_error(Q_test_in[i],Q_test_in_pred[fold][i]))
        RMSE_test_in_fold.append(root_mean_squared_error(Q_test_in[i],Q_test_in_pred[fold][i]))

    for i in range(num_test_out_cells):
        MAE_test_out_fold.append(mean_absolute_error(Q_test_out[i],Q_test_out_pred[fold][i]))
        RMSE_test_out_fold.append(root_mean_squared_error(Q_test_out[i],Q_test_out_pred[fold][i]))

    MAE_training.append(np.mean(MAE_training_fold)*100)
    MAE_test_in.append(np.mean(MAE_test_in_fold)*100)
    MAE_test_out.append(np.mean(MAE_test_out_fold)*100)

    RMSE_training.append(np.mean(RMSE_training_fold)*100)
    RMSE_test_in.append(np.mean(RMSE_test_in_fold)*100)
    RMSE_test_out.append(np.mean(RMSE_test_out_fold)*100)

result_dict = {'MAE_training':MAE_training,
                'MAE_test_in':MAE_test_in,
                'MAE_test_out':MAE_test_out,
                'RMSE_training':RMSE_training,
                'RMSE_test_in':RMSE_test_in,
                'RMSE_test_out':RMSE_test_out}

with open('Empirical_parameter_results/E2E_NNE_V2/result_dict.pkl','wb') as f:
    pickle.dump(result_dict,f)

mean_MAE_training = np.mean(MAE_training)
mean_MAE_test_in = np.mean(MAE_test_in)
mean_MAE_test_out = np.mean(MAE_test_out)

mean_RMSE_training = np.mean(RMSE_training)
mean_RMSE_test_in = np.mean(RMSE_test_in)
mean_RMSE_test_out = np.mean(RMSE_test_out)

print(f"Mean MAE training: {mean_MAE_training:.2f}")
print(f"Mean MAE test in: {mean_MAE_test_in:.2f}")
print(f"Mean MAE test out: {mean_MAE_test_out:.2f}")

print(f"Mean RMSE training: {mean_RMSE_training:.2f}")
print(f"Mean RMSE test in: {mean_RMSE_test_in:.2f}")
print(f"Mean RMSE test out: {mean_RMSE_test_out:.2f}")

Mean MAE training: 1.85
Mean MAE test in: 2.43
Mean MAE test out: 7.84
Mean RMSE training: 2.24
Mean RMSE test in: 2.97
Mean RMSE test out: 9.96


In [13]:
print(f"MAE training: {MAE_training}")
print(f"MAE test in: {MAE_test_in}")
print(f"MAE test out: {MAE_test_out}")

MAE training: [1.643545350505334, 2.09967585249802, 1.7676023752392962, 1.974062259304656, 1.8837080791359464, 1.8206665489696234, 1.7522265953285927, 1.8638849341687598, 1.9714080606586553, 1.7470637235898319]
MAE test in: [2.3389947311835124, 2.4640858057345683, 2.363188725121361, 2.564984971141379, 3.4211871588080895, 2.102207843626132, 2.2165959025676902, 1.986713193687822, 2.4934434410148967, 2.3846918264725514]
MAE test out: [12.384958694318204, 7.712989836741765, 4.418280810455669, 13.134221757590508, 6.314898340426757, 3.045295932947437, 4.014358531770969, 5.434284287702664, 11.997625725366785, 9.924846708968296]


# Case 2: Ensemble of 5 models for each fold

In [None]:
# Load the hyperparameters for the best model
hidden_sizes = [best_model_dict['num_neuron']]*best_model_dict['num_layer']
lr = best_model_dict['lr']
batch_size = best_model_dict['batch_size']
weight_decay = best_model_dict['weight_decay']
warmup_epochs = best_model_dict['warmup_epochs']
# Define the warm-up epochs for early stopping
warmup_epochs_early_stopping = warmup_epochs + 100
num_epochs = 3000
# Number of models to train for ensemble
M = 5
# Beta value for Beta-NLL loss
beta = 0.5
# Gradient clipping value
max_grad_norm = 1.0

model_state_dict_5 = {}

for fold in range(10):
    model_state_dict_5[fold] = []
    val_loss_fold = []
    for m in range(M):
        model = Network_cof_variation(input_size, hidden_sizes, output_size)
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        criterion = end_to_end_loss_NN_betaNLL
        criterion_val = end_to_end_loss_NN_NLL_cov
        early_stopping = EarlyStopping(patience=10, min_delta=1e-4)
        # Freeze weights for coefficient of variation layer before warm-up epochs
        warmup = True
        for param in model.coe_var_layer.parameters():
            param.requires_grad = False

        train_dataset = train_dataset_CV[fold]
        val_dataset = val_dataset_CV[fold]

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

        for epoch in range(num_epochs):
            if epoch == warmup_epochs:
                warmup = False
                for param in model.coe_var_layer.parameters():
                    param.requires_grad = True

            model.train()
            train_loss = 0.0
            for batch in train_loader:
                inputs, labels = batch
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels, beta, a, warmup, b_weight)
                loss_val = criterion_val(outputs, labels, a, warmup, b_weight)
                loss.backward()
                if epoch >= warmup_epochs:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
                optimizer.step()
                train_loss += loss_val.item()
            train_loss /= len(train_loader)

            model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for inputs, targets in val_loader:
                    outputs = model(inputs)
                    loss = criterion_val(outputs, targets, a, warmup, b_weight)
                    val_loss += loss.item()
            val_loss /= len(val_loader)

            if epoch > warmup_epochs_early_stopping:
                early_stopping(val_loss)
                if early_stopping.early_stop:
                    print(f"Early stopping at epoch {epoch + 1}")
                    break
        
        model_state_dict_5[fold].append(model.state_dict())
        val_loss_fold.append(val_loss)
        print(f"Fold {fold + 1}, Model {m + 1}, Validation loss: {val_loss}")
    
with open('Best_network/best_NN_NLL_cov_V2_5.pth','wb') as f:
    torch.save({'model_state':model_state_dict_5,
                'lr':lr,
                'num_neuron':best_model_dict['num_neuron'],
                'num_layer':best_model_dict['num_layer'],
                'batch_size':batch_size,
                'weight_decay':weight_decay,
                'warmup_epochs':warmup_epochs},f)

        
        

Early stopping at epoch 779
Fold 1, Model 1, Validation loss: 3.8215935230255127
Early stopping at epoch 779
Fold 1, Model 2, Validation loss: 2.4502148628234863
Early stopping at epoch 779
Fold 1, Model 3, Validation loss: 3.09334397315979
Early stopping at epoch 779
Fold 1, Model 4, Validation loss: 3.1151938438415527
Early stopping at epoch 779
Fold 1, Model 5, Validation loss: 2.4279656410217285
Early stopping at epoch 779
Fold 2, Model 1, Validation loss: 2.0148041248321533
Early stopping at epoch 779
Fold 2, Model 2, Validation loss: 3.1719396114349365
Early stopping at epoch 779
Fold 2, Model 3, Validation loss: 1.9256962537765503
Early stopping at epoch 852
Fold 2, Model 4, Validation loss: 3.7551651000976562
Early stopping at epoch 779
Fold 2, Model 5, Validation loss: 12.877677917480469
Early stopping at epoch 779
Fold 3, Model 1, Validation loss: 1.2635449171066284
Early stopping at epoch 779
Fold 3, Model 2, Validation loss: 1.644436001777649
Early stopping at epoch 779
Fol

In [29]:
b_train_pred_5_all = {}
b_test_in_pred_5_all = {}
b_test_out_pred_5_all = {}

for fold in range(10):
    b_train_pred_5 = []
    b_test_in_pred_5 = []
    b_test_out_pred_5 = []

    for m in range(M):
        model = Network_cof_variation(input_size, hidden_sizes, output_size)
        model.load_state_dict(model_state_dict_5[fold][m])

        model.eval()
        with torch.no_grad():
            b_train_pred_fold_m = model(X_train_PCA_tensor).numpy()
            b_test_in_pred_fold_m = model(X_test_in_PCA_tensor).numpy()
            b_test_out_pred_fold_m = model(X_test_out_PCA_tensor).numpy()

        b_train_pred_5.append(b_train_pred_fold_m)
        b_test_in_pred_5.append(b_test_in_pred_fold_m)
        b_test_out_pred_5.append(b_test_out_pred_fold_m)

    b_train_pred_5_all[fold] = np.array(b_train_pred_5)
    b_test_in_pred_5_all[fold] = np.array(b_test_in_pred_5)
    b_test_out_pred_5_all[fold] = np.array(b_test_out_pred_5)

In [None]:
with open('Empirical_parameter_results/E2E_NNE_V2/b_train_pred_5_all.pkl','wb') as f:
    pickle.dump(b_train_pred_5_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/b_test_in_pred_5_all.pkl','wb') as f:
    pickle.dump(b_test_in_pred_5_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/b_test_out_pred_5_all.pkl','wb') as f:
    pickle.dump(b_test_out_pred_5_all,f)

In [None]:
Q_train_pred_5_all = {}
Q_test_in_pred_5_all = {}
Q_test_out_pred_5_all = {}

std_train_pred_5_all = {}
std_test_in_pred_5_all = {}
std_test_out_pred_5_all = {}

Q_train_pred_5_ensemble = {}
Q_test_in_pred_5_ensemble = {}
Q_test_out_pred_5_ensemble = {}

std_train_pred_5_ensemble = {}
std_test_in_pred_5_ensemble = {}
std_test_out_pred_5_ensemble = {}

for fold in range(10):
    Q_train_hat_all_models_fold = np.array([[empirical_model(a, b1, b2, b3, N_train[i],b_weight)*100 for i, (b1, b2, b3, _) in enumerate(model_b_train_hat)] for model_b_train_hat in b_train_pred_5_all[fold]])
    Q_test_in_hat_all_models_fold = np.array([[empirical_model(a, b1, b2, b3, N_test_in[i],b_weight)*100 for i, (b1, b2, b3, _) in enumerate(model_b_test_in_hat)] for model_b_test_in_hat in b_test_in_pred_5_all[fold]])
    Q_test_out_hat_all_models_fold = np.array([[empirical_model(a, b1, b2, b3, N_test_out[i],b_weight)*100 for i, (b1, b2, b3, _) in enumerate(model_b_test_out_hat)] for model_b_test_out_hat in b_test_out_pred_5_all[fold]])

    coe_var_train_all_models_fold = np.array([np.repeat(model_b_train_hat[:, 3][:, np.newaxis], Q_train.shape[1], axis=1) for model_b_train_hat in b_train_pred_5_all[fold]])
    coe_var_test_in_all_models_fold = np.array([np.repeat(model_b_test_in_hat[:, 3][:, np.newaxis], Q_test_in.shape[1], axis=1) for model_b_test_in_hat in b_test_in_pred_5_all[fold]])
    coe_var_test_out_all_models_fold = np.array([np.repeat(model_b_test_out_hat[:, 3][:, np.newaxis], Q_test_out.shape[1], axis=1) for model_b_test_out_hat in b_test_out_pred_5_all[fold]])

    # Calculate the variance for each individual model
    std_train_all_models_fold = coe_var_train_all_models_fold * (100 - Q_train_hat_all_models_fold)
    std_test_in_all_models_fold = coe_var_test_in_all_models_fold * (100 - Q_test_in_hat_all_models_fold)
    std_test_out_all_models_fold = coe_var_test_out_all_models_fold * (100 - Q_test_out_hat_all_models_fold)

    var_train_all_models_fold = std_train_all_models_fold ** 2
    var_test_in_all_models_fold = std_test_in_all_models_fold ** 2
    var_test_out_all_models_fold = std_test_out_all_models_fold ** 2

    Q_train_hat_combined_fold = np.mean(Q_train_hat_all_models_fold, axis=0)
    Q_test_in_hat_combined_fold = np.mean(Q_test_in_hat_all_models_fold, axis=0)
    Q_test_out_hat_combined_fold = np.mean(Q_test_out_hat_all_models_fold, axis=0)

    # Calculate the variance of the combined predictions
    var_train_combined_fold = np.mean(var_train_all_models_fold + Q_train_hat_all_models_fold**2 - Q_train_hat_combined_fold**2, axis=0)
    var_test_in_combined_fold = np.mean(var_test_in_all_models_fold + Q_test_in_hat_all_models_fold**2 - Q_test_in_hat_combined_fold**2, axis=0)
    var_test_out_combined_fold = np.mean(var_test_out_all_models_fold + Q_test_out_hat_all_models_fold**2 - Q_test_out_hat_combined_fold**2, axis=0)

    std_train_combined_fold = np.sqrt(var_train_combined_fold)
    std_test_in_combined_fold = np.sqrt(var_test_in_combined_fold)
    std_test_out_combined_fold = np.sqrt(var_test_out_combined_fold)

    Q_train_pred_5_all[fold] = Q_train_hat_all_models_fold
    Q_test_in_pred_5_all[fold] = Q_test_in_hat_all_models_fold
    Q_test_out_pred_5_all[fold] = Q_test_out_hat_all_models_fold

    std_train_pred_5_all[fold] = std_train_all_models_fold
    std_test_in_pred_5_all[fold] = std_test_in_all_models_fold
    std_test_out_pred_5_all[fold] = std_test_out_all_models_fold

    Q_train_pred_5_ensemble[fold] = Q_train_hat_combined_fold
    Q_test_in_pred_5_ensemble[fold] = Q_test_in_hat_combined_fold
    Q_test_out_pred_5_ensemble[fold] = Q_test_out_hat_combined_fold

    std_train_pred_5_ensemble[fold] = std_train_combined_fold
    std_test_in_pred_5_ensemble[fold] = std_test_in_combined_fold
    std_test_out_pred_5_ensemble[fold] = std_test_out_combined_fold

with open('Empirical_parameter_results/E2E_NNE_V2/Q_train_pred_5_all.pkl','wb') as f:
    pickle.dump(Q_train_pred_5_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_test_in_pred_5_all.pkl','wb') as f:
    pickle.dump(Q_test_in_pred_5_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_test_out_pred_5_all.pkl','wb') as f:
    pickle.dump(Q_test_out_pred_5_all,f)

with open('Empirical-parameter_results/E2E_NNE_V2/std_train_pred_5_all.pkl','wb') as f:
    pickle.dump(std_train_pred_5_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/std_test_in_pred_5_all.pkl','wb') as f:
    pickle.dump(std_test_in_pred_5_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/std_test_out_pred_5_all.pkl','wb') as f:
    pickle.dump(std_test_out_pred_5_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_train_pred_5_ensemble.pkl','wb') as f:
    pickle.dump(Q_train_pred_5_ensemble,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_test_in_pred_5_ensemble.pkl','wb') as f:
    pickle.dump(Q_test_in_pred_5_ensemble,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_test_out_pred_5_ensemble.pkl','wb') as f:
    pickle.dump(Q_test_out_pred_5_ensemble,f)

with open('Empirical_parameter_results/E2E_NNE_V2/std_train_pred_5_ensemble.pkl','wb') as f:
    pickle.dump(std_train_pred_5_ensemble,f)

with open('Empirical_parameter_results/E2E_NNE_V2/std_test_in_pred_5_ensemble.pkl','wb') as f:
    pickle.dump(std_test_in_pred_5_ensemble,f)

with open('Empirical_parameter_results/E2E_NNE_V2/std_test_out_pred_5_ensemble.pkl','wb') as f:
    pickle.dump(std_test_out_pred_5_ensemble,f)


In [None]:
MAE_training = []
MAE_test_in = []
MAE_test_out = []

RMSE_training = []
RMSE_test_in = []
RMSE_test_out = []

for fold in range(10):
    MAE_training_fold = []
    MAE_test_in_fold = []
    MAE_test_out_fold = []

    RMSE_training_fold = []
    RMSE_test_in_fold = []
    RMSE_test_out_fold = []

    for i in range(num_training_cells):
        MAE_training_fold.append(mean_absolute_error(Q_train[i]*100,Q_train_pred_5_ensemble[fold][i]))
        RMSE_training_fold.append(root_mean_squared_error(Q_train[i]*100,Q_train_pred_5_ensemble[fold][i]))

    for i in range(num_test_in_cells):
        MAE_test_in_fold.append(mean_absolute_error(Q_test_in[i]*100,Q_test_in_pred_5_ensemble[fold][i]))
        RMSE_test_in_fold.append(root_mean_squared_error(Q_test_in[i]*100,Q_test_in_pred_5_ensemble[fold][i]))

    for i in range(num_test_out_cells):
        MAE_test_out_fold.append(mean_absolute_error(Q_test_out[i]*100,Q_test_out_pred_5_ensemble[fold][i]))
        RMSE_test_out_fold.append(root_mean_squared_error(Q_test_out[i]*100,Q_test_out_pred_5_ensemble[fold][i]))

    MAE_training.append(np.mean(MAE_training_fold))
    MAE_test_in.append(np.mean(MAE_test_in_fold))
    MAE_test_out.append(np.mean(MAE_test_out_fold))

    RMSE_training.append(np.mean(RMSE_training_fold))
    RMSE_test_in.append(np.mean(RMSE_test_in_fold))
    RMSE_test_out.append(np.mean(RMSE_test_out_fold))

result_dict = {'MAE_training':MAE_training,
                'MAE_test_in':MAE_test_in,
                'MAE_test_out':MAE_test_out,
                'RMSE_training':RMSE_training,
                'RMSE_test_in':RMSE_test_in,
                'RMSE_test_out':RMSE_test_out}

with open('Empirical_parameter_results/E2E_NNE_V2/result_dict_5.pkl','wb') as f:
    pickle.dump(result_dict,f)

mean_MAE_training = np.mean(MAE_training)
mean_MAE_test_in = np.mean(MAE_test_in)
mean_MAE_test_out = np.mean(MAE_test_out)

mean_RMSE_training = np.mean(RMSE_training)
mean_RMSE_test_in = np.mean(RMSE_test_in)
mean_RMSE_test_out = np.mean(RMSE_test_out)

print(f"Mean MAE training: {mean_MAE_training:.2f}")
print(f"Mean MAE test in: {mean_MAE_test_in:.2f}")
print(f"Mean MAE test out: {mean_MAE_test_out:.2f}")

print(f"Mean RMSE training: {mean_RMSE_training:.2f}")
print(f"Mean RMSE test in: {mean_RMSE_test_in:.2f}")
print(f"Mean RMSE test out: {mean_RMSE_test_out:.2f}")

Mean MAE training: 1.70
Mean MAE test in: 2.00
Mean MAE test out: 6.44
Mean RMSE training: 2.10
Mean RMSE test in: 2.45
Mean RMSE test out: 8.05


In [33]:
print(f"MAE training: {MAE_training}")
print(f"MAE test in: {MAE_test_in}")
print(f"MAE test out: {MAE_test_out}")

MAE training: [1.78373181841315, 1.6457707954245309, 1.6905657949088835, 1.6914115805063543, 1.6920497789455553, 1.7432345106643212, 1.689637178767997, 1.6798989018940038, 1.6583976309139477, 1.7403471134382769]
MAE test in: [2.0343837807806238, 1.9400271014985158, 2.2426159246530264, 2.0262238336288414, 2.0114062041676566, 1.9822969096112968, 2.001414718506352, 2.0507026018673056, 1.9423711434600974, 1.7585559089400231]
MAE test out: [4.658718441482848, 7.166958282987388, 9.293466977805746, 4.6115828530037035, 5.957440617093438, 4.959447638833681, 6.003882856250859, 8.961618578398028, 7.003241850328285, 5.793628848759678]


# Case 3: Ensemble of 10 models for each fold

In [None]:
# Load the hyperparameters for the best model
hidden_sizes = [best_model_dict['num_neuron']]*best_model_dict['num_layer']
lr = best_model_dict['lr']
batch_size = best_model_dict['batch_size']
weight_decay = best_model_dict['weight_decay']
warmup_epochs = best_model_dict['warmup_epochs']
# Define the warm-up epochs for early stopping
warmup_epochs_early_stopping = warmup_epochs + 100
num_epochs = 3000
# Number of models to train for ensemble
M = 10
# Beta value for Beta-NLL loss
beta = 0.5
# Gradient clipping value
max_grad_norm = 1.0

model_state_dict_10 = {}

for fold in range(10):
    model_state_dict_10[fold] = []
    val_loss_fold = []
    for m in range(M):
        model = Network_cof_variation(input_size, hidden_sizes, output_size)
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        criterion = end_to_end_loss_NN_betaNLL
        criterion_val = end_to_end_loss_NN_NLL_cov
        early_stopping = EarlyStopping(patience=10, min_delta=1e-4)
        # Freeze weights for coefficient of variation layer before warm-up epochs
        warmup = True
        for param in model.coe_var_layer.parameters():
            param.requires_grad = False

        train_dataset = train_dataset_CV[fold]
        val_dataset = val_dataset_CV[fold]

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

        for epoch in range(num_epochs):
            if epoch == warmup_epochs:
                warmup = False
                for param in model.coe_var_layer.parameters():
                    param.requires_grad = True

            model.train()
            train_loss = 0.0
            for batch in train_loader:
                inputs, labels = batch
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels, beta, a, warmup, b_weight)
                loss_val = criterion_val(outputs, labels, a, warmup, b_weight)
                loss.backward()
                if epoch >= warmup_epochs:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
                optimizer.step()
                train_loss += loss_val.item()
            train_loss /= len(train_loader)

            model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for inputs, targets in val_loader:
                    outputs = model(inputs)
                    loss = criterion_val(outputs, targets, a, warmup, b_weight)
                    val_loss += loss.item()
            val_loss /= len(val_loader)

            if epoch > warmup_epochs_early_stopping:
                early_stopping(val_loss)
                if early_stopping.early_stop:
                    print(f"Early stopping at epoch {epoch + 1}")
                    break
        
        model_state_dict_10[fold].append(model.state_dict())
        val_loss_fold.append(val_loss)
        print(f"Fold {fold + 1}, Model {m + 1}, Validation loss: {val_loss}")
    
with open('Best_network/best_NN_NLL_cov_V2_10.pth','wb') as f:
    torch.save({'model_state':model_state_dict_10,
                'lr':lr,
                'num_neuron':best_model_dict['num_neuron'],
                'num_layer':best_model_dict['num_layer'],
                'batch_size':batch_size,
                'weight_decay':weight_decay,
                'warmup_epochs':warmup_epochs},f)


Early stopping at epoch 779
Fold 1, Model 1, Validation loss: 3.361151933670044
Early stopping at epoch 779
Fold 1, Model 2, Validation loss: 2.5899298191070557
Early stopping at epoch 779
Fold 1, Model 3, Validation loss: 2.2884302139282227
Early stopping at epoch 780
Fold 1, Model 4, Validation loss: 2.742144823074341
Early stopping at epoch 779
Fold 1, Model 5, Validation loss: 6.782578468322754
Early stopping at epoch 779
Fold 1, Model 6, Validation loss: 2.1262478828430176
Early stopping at epoch 779
Fold 1, Model 7, Validation loss: 3.5219531059265137
Early stopping at epoch 779
Fold 1, Model 8, Validation loss: 2.6193976402282715
Early stopping at epoch 790
Fold 1, Model 9, Validation loss: 2.7740678787231445
Early stopping at epoch 779
Fold 1, Model 10, Validation loss: 8.166481018066406
Early stopping at epoch 876
Fold 2, Model 1, Validation loss: 3.3756256103515625
Early stopping at epoch 1114
Fold 2, Model 2, Validation loss: 3.0240554809570312
Early stopping at epoch 779
Fo

In [24]:
b_train_pred_10_all = {}
b_test_in_pred_10_all = {}
b_test_out_pred_10_all = {}

for fold in range(10):
    b_train_pred_10 = []
    b_test_in_pred_10 = []
    b_test_out_pred_10 = []

    for m in range(M):
        model = Network_cof_variation(input_size, hidden_sizes, output_size)
        model.load_state_dict(model_state_dict_10[fold][m])

        model.eval()
        with torch.no_grad():
            b_train_pred_fold_m = model(X_train_PCA_tensor).numpy()
            b_test_in_pred_fold_m = model(X_test_in_PCA_tensor).numpy()
            b_test_out_pred_fold_m = model(X_test_out_PCA_tensor).numpy()

        b_train_pred_10.append(b_train_pred_fold_m)
        b_test_in_pred_10.append(b_test_in_pred_fold_m)
        b_test_out_pred_10.append(b_test_out_pred_fold_m)

    b_train_pred_10_all[fold] = np.array(b_train_pred_10)
    b_test_in_pred_10_all[fold] = np.array(b_test_in_pred_10)
    b_test_out_pred_10_all[fold] = np.array(b_test_out_pred_10)


In [None]:

with open('Empirical_parameter_results/E2E_NNE_V2/b_train_pred_10_all.pkl','wb') as f:
    pickle.dump(b_train_pred_10_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/b_test_in_pred_10_all.pkl','wb') as f:
    pickle.dump(b_test_in_pred_10_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/b_test_out_pred_10_all.pkl','wb') as f:
    pickle.dump(b_test_out_pred_10_all,f)

In [None]:
Q_train_pred_10_all = {}
Q_test_in_pred_10_all = {}
Q_test_out_pred_10_all = {}

std_train_pred_10_all = {}
std_test_in_pred_10_all = {}
std_test_out_pred_10_all = {}

Q_train_pred_10_ensemble = {}
Q_test_in_pred_10_ensemble = {}
Q_test_out_pred_10_ensemble = {}

std_train_pred_10_ensemble = {}
std_test_in_pred_10_ensemble = {}
std_test_out_pred_10_ensemble = {}

for fold in range(10):
    Q_train_hat_all_models_fold = np.array([[empirical_model(a, b1, b2, b3, N_train[i],b_weight)*100 for i, (b1, b2, b3, _) in enumerate(model_b_train_hat)] for model_b_train_hat in b_train_pred_10_all[fold]])
    Q_test_in_hat_all_models_fold = np.array([[empirical_model(a, b1, b2, b3, N_test_in[i],b_weight)*100 for i, (b1, b2, b3, _) in enumerate(model_b_test_in_hat)] for model_b_test_in_hat in b_test_in_pred_10_all[fold]])
    Q_test_out_hat_all_models_fold = np.array([[empirical_model(a, b1, b2, b3, N_test_out[i],b_weight)*100 for i, (b1, b2, b3, _) in enumerate(model_b_test_out_hat)] for model_b_test_out_hat in b_test_out_pred_10_all[fold]])

    coe_var_train_all_models_fold = np.array([np.repeat(model_b_train_hat[:, 3][:, np.newaxis], Q_train.shape[1], axis=1) for model_b_train_hat in b_train_pred_10_all[fold]])
    coe_var_test_in_all_models_fold = np.array([np.repeat(model_b_test_in_hat[:, 3][:, np.newaxis], Q_test_in.shape[1], axis=1) for model_b_test_in_hat in b_test_in_pred_10_all[fold]])
    coe_var_test_out_all_models_fold = np.array([np.repeat(model_b_test_out_hat[:, 3][:, np.newaxis], Q_test_out.shape[1], axis=1) for model_b_test_out_hat in b_test_out_pred_10_all[fold]])

    # Calculate the variance for each individual model
    std_train_all_models_fold = coe_var_train_all_models_fold * (100 - Q_train_hat_all_models_fold)
    std_test_in_all_models_fold = coe_var_test_in_all_models_fold * (100 - Q_test_in_hat_all_models_fold)
    std_test_out_all_models_fold = coe_var_test_out_all_models_fold * (100 - Q_test_out_hat_all_models_fold)

    var_train_all_models_fold = std_train_all_models_fold ** 2
    var_test_in_all_models_fold = std_test_in_all_models_fold ** 2
    var_test_out_all_models_fold = std_test_out_all_models_fold ** 2

    Q_train_hat_combined_fold = np.mean(Q_train_hat_all_models_fold, axis=0)
    Q_test_in_hat_combined_fold = np.mean(Q_test_in_hat_all_models_fold, axis=0)
    Q_test_out_hat_combined_fold = np.mean(Q_test_out_hat_all_models_fold, axis=0)

    # Calculate the variance of the combined predictions
    var_train_combined_fold = np.mean(var_train_all_models_fold + Q_train_hat_all_models_fold**2 - Q_train_hat_combined_fold**2, axis=0)
    var_test_in_combined_fold = np.mean(var_test_in_all_models_fold + Q_test_in_hat_all_models_fold**2 - Q_test_in_hat_combined_fold**2, axis=0)
    var_test_out_combined_fold = np.mean(var_test_out_all_models_fold + Q_test_out_hat_all_models_fold**2 - Q_test_out_hat_combined_fold**2, axis=0)

    std_train_combined_fold = np.sqrt(var_train_combined_fold)
    std_test_in_combined_fold = np.sqrt(var_test_in_combined_fold)
    std_test_out_combined_fold = np.sqrt(var_test_out_combined_fold)

    Q_train_pred_10_all[fold] = Q_train_hat_all_models_fold
    Q_test_in_pred_10_all[fold] = Q_test_in_hat_all_models_fold
    Q_test_out_pred_10_all[fold] = Q_test_out_hat_all_models_fold

    std_train_pred_10_all[fold] = std_train_all_models_fold
    std_test_in_pred_10_all[fold] = std_test_in_all_models_fold
    std_test_out_pred_10_all[fold] = std_test_out_all_models_fold

    Q_train_pred_10_ensemble[fold] = Q_train_hat_combined_fold
    Q_test_in_pred_10_ensemble[fold] = Q_test_in_hat_combined_fold
    Q_test_out_pred_10_ensemble[fold] = Q_test_out_hat_combined_fold

    std_train_pred_10_ensemble[fold] = std_train_combined_fold
    std_test_in_pred_10_ensemble[fold] = std_test_in_combined_fold
    std_test_out_pred_10_ensemble[fold] = std_test_out_combined_fold

with open('Empirical_parameter_results/E2E_NNE_V2/Q_train_pred_10_all.pkl','wb') as f:
    pickle.dump(Q_train_pred_10_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_test_in_pred_10_all.pkl','wb') as f:
    pickle.dump(Q_test_in_pred_10_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_test_out_pred_10_all.pkl','wb') as f:
    pickle.dump(Q_test_out_pred_10_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/std_train_pred_10_all.pkl','wb') as f:
    pickle.dump(std_train_pred_10_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/std_test_in_pred_10_all.pkl','wb') as f:
    pickle.dump(std_test_in_pred_10_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/std_test_out_pred_10_all.pkl','wb') as f:
    pickle.dump(std_test_out_pred_10_all,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_train_pred_10_ensemble.pkl','wb') as f:
    pickle.dump(Q_train_pred_10_ensemble,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_test_in_pred_10_ensemble.pkl','wb') as f:
    pickle.dump(Q_test_in_pred_10_ensemble,f)

with open('Empirical_parameter_results/E2E_NNE_V2/Q_test_out_pred_10_ensemble.pkl','wb') as f:
    pickle.dump(Q_test_out_pred_10_ensemble,f)

with open('Empirical_parameter_results/E2E_NNE_V2/std_train_pred_10_ensemble.pkl','wb') as f:
    pickle.dump(std_train_pred_10_ensemble,f)

with open('Empirical_parameter_results/E2E_NNE_V2/std_test_in_pred_10_ensemble.pkl','wb') as f:
    pickle.dump(std_test_in_pred_10_ensemble,f)

with open('Empirical_parameter_results/E2E_NNE_V2/std_test_out_pred_10_ensemble.pkl','wb') as f:
    pickle.dump(std_test_out_pred_10_ensemble,f)

In [None]:
MAE_training = []
MAE_test_in = []
MAE_test_out = []

RMSE_training = []
RMSE_test_in = []
RMSE_test_out = []

for fold in range(10):
    MAE_training_fold = []
    MAE_test_in_fold = []
    MAE_test_out_fold = []

    RMSE_training_fold = []
    RMSE_test_in_fold = []
    RMSE_test_out_fold = []

    for i in range(num_training_cells):
        MAE_training_fold.append(mean_absolute_error(Q_train[i]*100,Q_train_pred_10_ensemble[fold][i]))
        RMSE_training_fold.append(root_mean_squared_error(Q_train[i]*100,Q_train_pred_10_ensemble[fold][i]))

    for i in range(num_test_in_cells):
        MAE_test_in_fold.append(mean_absolute_error(Q_test_in[i]*100,Q_test_in_pred_10_ensemble[fold][i]))
        RMSE_test_in_fold.append(root_mean_squared_error(Q_test_in[i]*100,Q_test_in_pred_10_ensemble[fold][i]))

    for i in range(num_test_out_cells):
        MAE_test_out_fold.append(mean_absolute_error(Q_test_out[i]*100,Q_test_out_pred_10_ensemble[fold][i]))
        RMSE_test_out_fold.append(root_mean_squared_error(Q_test_out[i]*100,Q_test_out_pred_10_ensemble[fold][i]))

    MAE_training.append(np.mean(MAE_training_fold))
    MAE_test_in.append(np.mean(MAE_test_in_fold))
    MAE_test_out.append(np.mean(MAE_test_out_fold))

    RMSE_training.append(np.mean(RMSE_training_fold))
    RMSE_test_in.append(np.mean(RMSE_test_in_fold))
    RMSE_test_out.append(np.mean(RMSE_test_out_fold))

result_dict = {'MAE_training':MAE_training,
                'MAE_test_in':MAE_test_in,
                'MAE_test_out':MAE_test_out,
                'RMSE_training':RMSE_training,
                'RMSE_test_in':RMSE_test_in,
                'RMSE_test_out':RMSE_test_out}

with open('Empirical_parameter_results/E2E_NNE_V2/result_dict_10.pkl','wb') as f:
    pickle.dump(result_dict,f)

mean_MAE_training = np.mean(MAE_training)
mean_MAE_test_in = np.mean(MAE_test_in)
mean_MAE_test_out = np.mean(MAE_test_out)

mean_RMSE_training = np.mean(RMSE_training)
mean_RMSE_test_in = np.mean(RMSE_test_in)
mean_RMSE_test_out = np.mean(RMSE_test_out)

print(f"Mean MAE training: {mean_MAE_training:.2f}")
print(f"Mean MAE test in: {mean_MAE_test_in:.2f}")
print(f"Mean MAE test out: {mean_MAE_test_out:.2f}")

print(f"Mean RMSE training: {mean_RMSE_training:.2f}")
print(f"Mean RMSE test in: {mean_RMSE_test_in:.2f}")
print(f"Mean RMSE test out: {mean_RMSE_test_out:.2f}")

Mean MAE training: 1.73
Mean MAE test in: 2.05
Mean MAE test out: 7.14
Mean RMSE training: 2.11
Mean RMSE test in: 2.47
Mean RMSE test out: 8.82


In [27]:
print(f"MAE training: {MAE_training}")
print(f"MAE test in: {MAE_test_in}")
print(f"MAE test out: {MAE_test_out}")

MAE training: [1.784060336859118, 1.8386336529389424, 1.632692972460559, 1.626906289616892, 1.7307085151023, 1.7469161020868753, 1.6925760692281258, 1.703419231528423, 1.7322012733345071, 1.7724808515131016]
MAE test in: [2.2784796480191947, 2.1662178337546334, 1.9765103565248399, 1.9790225691551302, 1.7690244412170895, 2.2759907825362307, 1.7942676720690807, 1.9604396249642178, 2.277860275059515, 2.043930906690721]
MAE test out: [8.528357093958984, 7.5104533379726846, 8.198342718556477, 6.460058254895792, 3.9085260366471757, 8.791309163180296, 5.630577073762923, 6.4752537960282845, 8.233896754263895, 7.631773181479316]
