In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

import lightgbm as lgb

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import optuna
from optuna.pruners import MedianPruner

from regression_net import RegressionNet
from data import normalize_columns

%load_ext autoreload
%autoreload 2

random_state = 42
np.random.seed(random_state)
torch.manual_seed(random_state)

<torch._C.Generator at 0x7ae6ad1e0d30>

Parse input and create dataframe

MaterialConfigurations --> generated samples of parameter-configurations; 
First line = name of each parameter
ParameterBouunds --> ranges used for the different parameters during sampling;
First line = name of each parameter
IDP_Results and ROM_ Results --> files with the output generated by model simulations; 
First line = applied moment


Train/Val dataset

In [2]:
ds_path = "datasets/Large/"
placeholder = ""

LoadCases = ['Flexion', 'AxialRotation', 'Extension', 'LateralBending']

output_files = [
    {'Name': 'Flexion', 'ROM': f"{ds_path}ROM_Results_Flexion_{placeholder}wMoment.csv", 'IDP': f"{ds_path}IDP_Results_Flexion_{placeholder}wMoment.csv"},
    {'Name': 'AxialRotation', 'ROM': f"{ds_path}ROM_Results_AxialRotation_{placeholder}wMoment.csv", 'IDP': f"{ds_path}IDP_Results_AxialRotation_{placeholder}wMoment.csv"},
    {'Name': 'Extension', 'ROM': f"{ds_path}ROM_Results_Extension_{placeholder}wMoment.csv", 'IDP': f"{ds_path}IDP_Results_Extension_{placeholder}wMoment.csv"},
    {'Name': 'LateralBending', 'ROM': f"{ds_path}ROM_Results_LateralBending_{placeholder}wMoment.csv", 'IDP': f"{ds_path}IDP_Results_LateralBending_{placeholder}wMoment.csv"},
]

input_file = ds_path + f"MaterialConfigurations{"_" + placeholder[:-1] if len(placeholder) else ""}.csv"
bounds_file = ds_path + "ParameterBounds.csv"

train_column_bounds = pd.read_csv(bounds_file)
df = load_df(input_file, output_files)

# normalize both input and output columns
df, train_column_bounds = normalize_columns(df, column_bounds=train_column_bounds, train=True)

df

Unnamed: 0,config_id,LoadCase,Moment,C10Nucleus,C01Nucleus,C10Annulus,K1Annulus,K2Annulus,Kappa,K1Circ,K2Circ,K1Rad,K2Rad,FiberAngle,FiberAngleCirc,FiberAngleRad,y_ROM,y_IDP
0,0,0.0,0.0,0.803711,0.372070,0.012695,0.454102,0.940430,0.883789,0.448242,0.026367,0.420898,0.493164,0.526367,0.653320,0.317383,0.126046,0.530391
1,1,0.0,0.0,0.385742,0.579102,0.416992,0.981445,0.227539,0.487305,0.602539,0.022461,0.307617,0.088867,0.215820,0.506836,0.532227,0.068406,0.522663
2,2,0.0,0.0,0.088867,0.211914,0.788086,0.172852,0.954102,0.172852,0.858398,0.348633,0.016602,0.600586,0.395508,0.745117,0.860352,0.041758,0.524259
3,3,0.0,0.0,0.190430,0.098633,0.016602,0.065430,0.827148,0.661133,0.961914,0.901367,0.641602,0.733398,0.237305,0.750977,0.319336,0.186156,0.526288
4,4,0.0,0.0,0.741211,0.545898,0.682617,0.670898,0.233398,0.625977,0.411133,0.469727,0.825195,0.588867,0.645508,0.725586,0.334961,0.037950,0.527357
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20475,1019,1.0,1.0,0.166016,0.900391,0.257812,0.212891,0.921875,0.050781,0.902344,0.275391,0.746094,0.535156,0.431641,0.216797,0.921875,0.120541,0.632732
20476,1020,1.0,1.0,0.037109,0.291016,0.820312,0.220703,0.228516,0.140625,0.726562,0.410156,0.634766,0.816406,0.480469,0.199219,0.822266,0.102200,0.586695
20477,1021,1.0,1.0,0.716797,0.064453,0.230469,0.828125,0.949219,0.939453,0.890625,0.347656,0.410156,0.972656,0.357422,0.187500,0.193359,0.144379,0.467184
20478,1022,1.0,1.0,0.765625,0.220703,0.050781,0.945312,0.212891,0.107422,0.929688,0.552734,0.400391,0.230469,0.890625,0.984375,0.335937,0.059934,0.653710


Test set

In [3]:
ds_path = "datasets/Test/"
output_files = [
    {'Name': 'Flexion', 'ROM': ds_path + "ROM_Results_Flexion_TestData_64_wMoment.csv", 'IDP': ds_path + "IDP_Results_Flexion_TestData_64_wMoment.csv"},
    {'Name': 'AxialRotation', 'ROM': ds_path + "ROM_Results_AxialRotation_TestData_64_wMoment.csv", 'IDP': ds_path + "IDP_Results_AxialRotation_TestData_64_wMoment.csv"},
    {'Name': 'Extension', 'ROM': ds_path + "ROM_Results_Extension_TestData_64_wMoment.csv", 'IDP': ds_path + "IDP_Results_Extension_TestData_64_wMoment.csv"},
    {'Name': 'LateralBending', 'ROM': ds_path + "ROM_Results_LateralBending_TestData_64_wMoment.csv", 'IDP': ds_path + "IDP_Results_LateralBending_TestData_64_wMoment.csv"},
]
input_file = ds_path + "MaterialConfigurations_TestData_64.csv"

test_df = load_df(input_file, output_files)
test_df = normalize_columns(test_df, column_bounds=train_column_bounds, train=False, keep_target=True)

test_df

Unnamed: 0,config_id,LoadCase,Moment,C10Nucleus,C01Nucleus,C10Annulus,K1Annulus,K2Annulus,Kappa,K1Circ,K2Circ,K1Rad,K2Rad,FiberAngle,FiberAngleCirc,FiberAngleRad,y_ROM,y_IDP
0,0,0.0,0.0,0.029725,0.942940,0.664148,0.921854,0.092080,0.259683,0.663973,0.062818,0.020170,0.489396,0.239752,0.374661,0.971191,0.049952,0.523323
1,1,0.0,0.0,0.247972,0.942209,0.018898,0.157741,0.202646,0.135846,0.026879,0.858016,0.591501,0.994841,0.630346,0.474571,0.689127,0.093415,0.539389
2,2,0.0,0.0,0.635258,0.026542,0.809374,0.424060,0.456829,0.003396,0.467026,0.574693,0.328489,0.009960,0.021167,0.189954,0.710109,0.066601,0.513135
3,3,0.0,0.0,0.952170,0.654910,0.704036,0.679229,0.608636,0.926893,0.823352,0.712242,0.841573,0.131671,0.627868,0.832815,0.686986,0.060443,0.519654
4,4,0.0,0.0,0.089696,0.389177,0.680306,0.043599,0.828607,0.000181,0.726765,0.897064,0.251119,0.632284,0.301170,0.700509,0.519569,0.068396,0.517184
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1275,59,1.0,1.0,0.212410,0.577395,0.553552,0.950991,0.366432,0.461563,0.216497,0.901203,0.310985,0.848445,0.805681,0.514956,0.925940,0.065768,0.603970
1276,60,1.0,1.0,0.991926,0.131097,0.621420,0.931875,0.745443,0.830001,0.827799,0.266717,0.953118,0.392787,0.768022,0.878251,0.530897,0.089262,0.573318
1277,61,1.0,1.0,0.581443,0.122382,0.503752,0.636114,0.917489,0.946015,0.747485,0.329838,0.152910,0.391322,0.132333,0.505382,0.916703,0.150653,0.462616
1278,62,1.0,1.0,0.710095,0.058459,0.116247,0.230498,0.118430,0.438265,0.172031,0.072344,0.136880,0.182430,0.804260,0.812411,0.201419,0.132869,0.594048


Try linear and non-linear ML models

In [10]:
target_columns = ['y_ROM']
eval_validation = False
'''
# Small dataset
gp_kernel = C(constant_value=1e-05,constant_value_bounds="fixed") * RBF(length_scale=1.4270972012234615, length_scale_bounds="fixed" )
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(max_depth=14, n_estimators= 121, random_state=random_state),
    "SVR": SVR(C=96.81294212057983, gamma='scale', kernel= 'rbf'),
    #"Gradient Boosting": GradientBoostingRegressor(n_estimators=185, learning_rate=0.3214585218102217, max_depth=5, random_state=random_state),
    #"XGBoost": xgb.XGBRegressor(n_estimators=198, max_depth=5, learning_rate=0.3165429507603652, random_state=random_state),
    "LightGBM": lgb.LGBMRegressor(random_state=random_state, verbose=-1),
    "GP": GaussianProcessRegressor(kernel=gp_kernel, n_restarts_optimizer=0, normalize_y=True, random_state=random_state),
}

'''
# Medium dataset
gp_kernel = C(constant_value=1e-05,constant_value_bounds="fixed") * RBF(length_scale=1.4270972012234615, length_scale_bounds="fixed" )

models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(max_depth=26, n_estimators=146, random_state=random_state),
    "SVR": SVR(C=13.171289753772092, gamma="scale", kernel="rbf"),
    #"Gradient Boosting": GradientBoostingRegressor(n_estimators= 141, learning_rate=0.14233612312010724, max_depth=6, random_state=random_state),
    #"XGBoost": xgb.XGBRegressor(n_estimators=173, max_depth=6, learning_rate=0.11597899680228448, random_state=random_state),
    "LightGBM": lgb.LGBMRegressor(num_leaves=31, max_depth=8, learning_rate=0.11036437743275099, n_estimators=173, random_state=random_state, verbose=-1),
    "GP": GaussianProcessRegressor(kernel=gp_kernel, n_restarts_optimizer=0, normalize_y=True, random_state=random_state),
}
'''
# Large dataset
gp_kernel = C(constant_value=1.3535458162698054e-05,constant_value_bounds="fixed") * RBF(length_scale=0.9739157609228299, length_scale_bounds="fixed" )
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(max_depth=31, n_estimators=51, random_state=random_state),
    "SVR": SVR(C=2.479459357922161, gamma="auto", kernel="rbf"),
    #"Gradient Boosting": GradientBoostingRegressor(n_estimators= 84, learning_rate= 0.17251713060020465, max_depth=7, random_state=random_state),
    #"XGBoost": xgb.XGBRegressor(n_estimators=179, max_depth=6, learning_rate= 0.11678452806022886, random_state=random_state),
    "LightGBM": lgb.LGBMRegressor(num_leaves=71, max_depth=24, learning_rate=0.0959080973897064, n_estimators=183, random_state=random_state, verbose=-1),
    "GP": GaussianProcessRegressor(kernel=gp_kernel, n_restarts_optimizer=0, normalize_y=True, random_state=random_state),
}
'''


# Function to train and evaluate models
def train_and_evaluate(model, X_train, X_val, y_train, y_val, bounds=None):
    model.fit(X_train, y_train.values.ravel())
    predictions = model.predict(X_val)

    if bounds is not None:
        predictions = predictions * (bounds['y_ROM']['max'] - bounds['y_ROM']['min']) + bounds['y_ROM']['min']
        y_val = y_val * (bounds['y_ROM']['max'] - bounds['y_ROM']['min']) + bounds['y_ROM']['min']

    #mse = mean_squared_error(y_val, predictions)
    #mae = mean_absolute_error(y_val, predictions)
    #r2 = r2_score(y_val, predictions)

    lc_r2 = []
    lc_mae = []
    for load_case in test_df['LoadCase'].unique():
        mask = test_df['LoadCase'] == load_case
        lc_r2.append(r2_score(y_val[mask], predictions[mask]))
        lc_mae.append(mean_absolute_error(y_val[mask], predictions[mask]))

    return lc_r2, lc_mae

# KFold cross-validation
kf = KFold(n_splits=4, shuffle=True, random_state=random_state)
for name, model in models.items():
    print(f"Evaluating {name}")
    scores = {score: list() for score in ['mse', 'mae', 'r2']}

    for fold, (train_index, val_index) in enumerate(kf.split(df['config_id'].unique())):
        # get values by id
        df_train = df[df['config_id'].isin(train_index)]
        df_val = df[df['config_id'].isin(val_index)]

        X_train, X_val, X_test = df_train.drop(['config_id', 'y_ROM', 'y_IDP'], axis=1), df_val.drop(['config_id', 'y_ROM', 'y_IDP'], axis=1), test_df.drop(['config_id', 'y_ROM', 'y_IDP'], axis=1),
        y_train, y_val, y_test = df_train[target_columns] , df_val[target_columns], test_df[target_columns]

        if eval_validation:
            lc_r2, lc_mae = train_and_evaluate(model, X_train, X_val, y_train, y_val)
        else:
            lc_r2, lc_mae = train_and_evaluate(model, X_train, X_test, y_train, y_test, bounds=train_column_bounds)
            

        scores['mae'].append(lc_mae)
        scores['r2'].append(lc_r2)

    # compute average and std per load case for R2 and MAE scores
    scores['mae'] = np.array(scores['mae'])
    scores['r2'] = np.array(scores['r2'])
    lc_mae_means = list()
    lc_r2_means = list()
    for i, lc in enumerate(LoadCases):
        lc_mae_mean = np.mean(scores['mae'][:, i])
        lc_r2_mean = np.mean(scores['r2'][:, i])
        
        lc_mae_means.append(lc_mae_mean)
        lc_r2_means.append(lc_r2_mean)
        
        print(f"Load Case {lc}: Average MAE: {lc_mae_mean:.4f} (Std: {np.std(scores['mae'][:, i]):.4f}), Average R²: {lc_r2_mean:.4f} (Std: {np.std(scores['r2'][:, i]):.4f})")

    # Compute average and std acroos and load cases for R2 and MAE scores
    mae_mean = np.mean(lc_mae_means)
    mae_std = np.std(lc_mae_means)
    r2_mean = np.mean(lc_r2_means)
    r2_std = np.std(lc_r2_means)
    print(f"Average MAE: {mae_mean:.4f} (Std: {mae_std:.4f}), Average R²: {r2_mean:.4f} (Std: {r2_std:.4f})")
    

Evaluating Linear Regression
Load Case Flexion: Average MAE: 0.7231 (Std: 0.0179), Average R²: 0.7951 (Std: 0.0035)
Load Case AxialRotation: Average MAE: 1.3170 (Std: 0.0218), Average R²: 0.4683 (Std: 0.0184)
Load Case Extension: Average MAE: 1.2896 (Std: 0.0109), Average R²: 0.5070 (Std: 0.0134)
Load Case LateralBending: Average MAE: 0.8772 (Std: 0.0254), Average R²: 0.4008 (Std: 0.0315)
Average MAE: 1.0517 (Std: 0.2576), Average R²: 0.5428 (Std: 0.1505)
Evaluating Random Forest
Load Case Flexion: Average MAE: 0.4524 (Std: 0.0116), Average R²: 0.8984 (Std: 0.0068)
Load Case AxialRotation: Average MAE: 0.6941 (Std: 0.0209), Average R²: 0.7855 (Std: 0.0229)
Load Case Extension: Average MAE: 0.4761 (Std: 0.0329), Average R²: 0.9438 (Std: 0.0138)
Load Case LateralBending: Average MAE: 0.2319 (Std: 0.0109), Average R²: 0.9408 (Std: 0.0043)
Average MAE: 0.4636 (Std: 0.1636), Average R²: 0.8922 (Std: 0.0641)
Evaluating SVR
Load Case Flexion: Average MAE: 0.8748 (Std: 0.0193), Average R²: 0.7

NN Training

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [5]:
target_columns = ['y_ROM']


# Small dataset
hparams = {
    'input_dim': 15,
    'output_dim': len(target_columns),
    'activation': 'relu',
    'num_units': 128, 
    'num_layers': 5, 
    'batch_size': 14, 
    'num_epochs': 231, 
    'lr': 0.0005598762669544375, 
    'weight_decay': 0.0005164420694702161, 
    'early_stopping_patience': 26,
    'dropout_p': 0.01, 
}
# Large and Medium dataset
hparams.update({'num_units': 256, 
                'num_layers': 5, 
                'batch_size': 20, 
                'num_epochs': 300, 
                'lr': 0.0005555051127271821, 
                'weight_decay': 5.162935106110616e-06, 
                'early_stopping_patience': 23,
                })

kf = KFold(n_splits=4, shuffle=True, random_state=random_state)

fold = 0
scores = {score: list() for score in ['mae', 'r2']}
trained_models = list()
for train_index, val_index in kf.split(df['config_id'].unique()):
    df_train = df[df['config_id'].isin(train_index)]
    df_val = df[df['config_id'].isin(val_index)]

    X_train, X_val, X_test = df_train.drop(['config_id', 'y_ROM', 'y_IDP'], axis=1), df_val.drop(['config_id', 'y_ROM', 'y_IDP'], axis=1), test_df.drop(['config_id', 'y_ROM', 'y_IDP'], axis=1)
    y_train, y_val, y_test = df_train[target_columns], df_val[target_columns], test_df[target_columns]

    # Converting to PyTorch tensors
    X_train_torch = torch.tensor(X_train.values, dtype=torch.float32)
    X_val_torch = torch.tensor(X_val.values, dtype=torch.float32, device=device)
    X_test_torch = torch.tensor(X_test.values, dtype=torch.float32, device=device)
    y_train_torch = torch.tensor(y_train.values, dtype=torch.float32)

    # Creating datasets and dataloaders
    train_dataset = TensorDataset(X_train_torch, y_train_torch)
    train_loader = DataLoader(train_dataset, batch_size=hparams['batch_size'], shuffle=True)

    # Model, loss function, and optimizer
    nn_model = RegressionNet(hparams)
    nn_model.to(device)
    criterion = nn.MSELoss() #nn.L1Loss()
    optimizer = optim.Adam(nn_model.parameters(), lr=hparams['lr'], weight_decay=hparams['weight_decay'])

    # Early stopping parameters
    best_mae = float('inf')
    epochs_no_improve = 0
    
    fold_train_loss, fold_val_loss = list(), list()
    for epoch in tqdm(range(hparams['num_epochs'])):
        total_loss = 0
        num_batches = 0
        # Training the model
        nn_model.train()
        for data in train_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = nn_model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * inputs.size(0)  # Multiply by batch size
            num_batches += inputs.size(0)
        
        average_loss = total_loss / num_batches
        fold_train_loss.append(average_loss)
        
        # Evaluating the model
        nn_model.eval()
        with torch.no_grad():
            predictions = nn_model(X_val_torch)
            mae = mean_absolute_error(y_val, predictions.cpu().numpy())
            fold_val_loss.append(mae)

        # Check for improvement
        if mae < best_mae:
            best_mae = mae
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
        
        if epochs_no_improve >= hparams['early_stopping_patience']:
            print("Early stopping, fold %d epoch %d" % (fold, epoch))
            break  # Early stopping

        nn_model.train()
    
    # Evaluating the model
    nn_model.eval()
    with torch.no_grad():  
        predictions = nn_model(X_test_torch).cpu().numpy()
        predictions = predictions * (train_column_bounds['y_ROM']['max'] - train_column_bounds['y_ROM']['min']) + train_column_bounds['y_ROM']['min']
        y_test = y_test * (train_column_bounds['y_ROM']['max'] - train_column_bounds['y_ROM']['min']) + train_column_bounds['y_ROM']['min']

    lc_r2 = []
    lc_mae = []
    for load_case in test_df['LoadCase'].unique():
        mask = test_df['LoadCase'] == load_case
        lc_r2.append(r2_score(y_test[mask], predictions[mask]))
        lc_mae.append(mean_absolute_error(y_test[mask], predictions[mask]))

    scores['mae'].append(lc_mae)
    scores['r2'].append(lc_r2)
    trained_models.append(nn_model)
    fold += 1 
    break

scores['mae'] = np.array(scores['mae'])
scores['r2'] = np.array(scores['r2'])
lc_mae_means = list()
lc_r2_means = list()
for i, lc in enumerate(LoadCases):
    lc_mae_mean = np.mean(scores['mae'][:, i])
    lc_r2_mean = np.mean(scores['r2'][:, i])
    
    lc_mae_means.append(lc_mae_mean)
    lc_r2_means.append(lc_r2_mean)
    
    print(f"Load Case {lc}: Average MAE: {lc_mae_mean:.4f} (Std: {np.std(scores['mae'][:, i]):.4f}), Average R²: {lc_r2_mean:.4f} (Std: {np.std(scores['r2'][:, i]):.4f})")

# Compute average and std acroos and load cases for R2 and MAE scores
mae_mean = np.mean(lc_mae_means)
mae_std = np.std(lc_mae_means)
r2_mean = np.mean(lc_r2_means)
r2_std = np.std(lc_r2_means)
print(f"Average MAE: {mae_mean:.4f} (Std: {mae_std:.4f}), Average R²: {r2_mean:.4f} (Std: {r2_std:.4f})")
    

 27%|██▋       | 80/300 [00:38<01:46,  2.07it/s]

Early stopping, fold 0 epoch 80
Load Case Flexion: Average MAE: 0.1443 (Std: 0.0000), Average R²: 0.9898 (Std: 0.0000)
Load Case AxialRotation: Average MAE: 0.1918 (Std: 0.0000), Average R²: 0.9847 (Std: 0.0000)
Load Case Extension: Average MAE: 0.1553 (Std: 0.0000), Average R²: 0.9906 (Std: 0.0000)
Load Case LateralBending: Average MAE: 0.0888 (Std: 0.0000), Average R²: 0.9895 (Std: 0.0000)
Average MAE: 0.1451 (Std: 0.0370), Average R²: 0.9886 (Std: 0.0023)





NN Hyperparam search

In [None]:
target_columns = ['y_ROM']

kf = KFold(n_splits=4, shuffle=True, random_state=random_state)
criterion = nn.L1Loss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def search_objective(trial):
    # Hyperparameters to tune  
    num_units = trial.suggest_categorical('num_units', [128, 256, 512, 1024])
    num_layers = trial.suggest_categorical('num_layers', [4, 5, 6, 7, 8, ])
    batch_size = trial.suggest_int('batch_size', 12, 64)
    num_epochs = trial.suggest_int('num_epochs', 150, 500)
    lr = trial.suggest_float('lr', 1e-6, 1e-3, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)
    dropout_p = 0 #trial.suggest_categorical('dropout_p', [0, 0.1, 0.2, 0.3, 0.4, 0.5])
    early_stopping_patience = trial.suggest_int('early_stopping_patience', 5, 30)

    hparams = {
        'input_dim': len(df.columns) - len(target_columns) - 1,
        'output_dim': len(target_columns),
        'num_units': num_units, 
        'num_layers': num_layers, 
        'batch_size': batch_size, 
        'num_epochs': num_epochs, 
        'lr': lr, 
        'weight_decay': weight_decay, 
        'dropout_p': dropout_p, 
        'early_stopping_patience': early_stopping_patience,
    }
    
    mae_scores = list()
    fold_id = 0
    for train_index, test_index in kf.split(df['config_id'].unique()):
        df_train = df[df['config_id'].isin(train_index)]
        df_test = df[df['config_id'].isin(test_index)]

        X_train, X_test = df_train.drop(['config_id'] + target_columns, axis=1), df_test.drop(['config_id'] + target_columns, axis=1)
        y_train, y_test = df_train[target_columns], df_test[target_columns]

        # Converting to PyTorch tensors
        X_train_torch = torch.tensor(X_train.values, dtype=torch.float32)
        y_train_torch = torch.tensor(y_train.values, dtype=torch.float32)
        X_test_torch = torch.tensor(X_test.values, dtype=torch.float32, device=device)

        # Creating datasets and dataloaders
        train_dataset = TensorDataset(X_train_torch, y_train_torch)
        train_loader = DataLoader(train_dataset, batch_size=hparams['batch_size'], shuffle=True, num_workers=4, pin_memory=True)

        # Update the model with new hyperparameters
        nn_model = RegressionNet(hparams)
        nn_model.to(device)
        optimizer = optim.Adam(nn_model.parameters(), lr=hparams['lr'], weight_decay=hparams['weight_decay'])

        # Early stopping parameters
        best_mae = float('inf')
        epochs_no_improve = 0

        # Training the model
        for epoch in tqdm(range(hparams['num_epochs'])):
            epoch_loss = 0.0
            nn_model.train()
            for data in train_loader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = nn_model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()

            # Average loss for the epoch
            epoch_loss /= len(train_loader)

            # Evaluating the model
            nn_model.eval()
            with torch.no_grad():
                predictions = nn_model(X_test_torch)
                mae = mean_absolute_error(y_test, predictions.cpu().numpy())

            # Check for improvement
            if mae < best_mae:
                best_mae = mae
                epochs_no_improve = 0
            else:
                epochs_no_improve += 1
            
            if epochs_no_improve >= hparams['early_stopping_patience']:
                break  # Early stopping
            
        mae_scores.append(best_mae)
        #fold_id += 1
        # a single fold to speed up
        break

    avg_mae = np.mean(mae_scores)
    return avg_mae # Optuna min the objective

pruner = MedianPruner()
study = optuna.create_study(direction='minimize', pruner=pruner)

# Execute an optimization
study.optimize(search_objective, n_trials=80, show_progress_bar=True)

print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.values, study.best_trial.params)

GP Search Params

In [8]:
target_columns = ['y_ROM']

kf = KFold(n_splits=4, shuffle=True, random_state=random_state)
for train_index, test_index in kf.split(df['config_id'].unique()):
    df_train = df[df['config_id'].isin(train_index)]
    df_test = df[df['config_id'].isin(test_index)]
    X_train, X_test = df_train.drop(['config_id'] + target_columns, axis=1), df_test.drop(['config_id'] + target_columns, axis=1)
    y_train, y_test = df_train[target_columns], df_test[target_columns]
    break

# Define the space of hyperparameters to optimize
space  = [
    Real(1e-5, 1e3, "log-uniform", name='kernel__k1__constant_value'),
    Real(1e-3, 1e3, "log-uniform", name='kernel__k2__length_scale')
]

# Define the kernel using hyperparameters
def create_model(params):
    kernel = C(constant_value=params["kernel__k1__constant_value"], constant_value_bounds="fixed") \
        * RBF(length_scale=params['kernel__k2__length_scale'], length_scale_bounds="fixed")

    return GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=0, normalize_y=True)

# Define the objective function to minimize
@use_named_args(space)
def objective(**params):
    model = create_model(params)
    model.fit(X_train, y_train)  # Assuming X_train and y_train are predefined
    # Calculate the mean squared error on the test set
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    return mse  # We minimize MSE directly

# Perform optimization
result = gp_minimize(objective, space, n_calls=50, random_state=random_state)

# Print the best found parameters and the corresponding value of the objective function
print("Best parameters:", result.x)
print("Best mean squared error:", result.fun)


Best parameters: [429.5514931978272, 0.6554197303387509]
Best mean squared error: 0.0010438637464197948


Other models search

In [7]:
import optuna
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
import xgboost as xgb
import lightgbm as lgb

target_columns = ['y_ROM']

kf = KFold(n_splits=4, shuffle=True, random_state=random_state)
for train_index, test_index in kf.split(df['config_id'].unique()):
    df_train = df[df['config_id'].isin(train_index)]
    df_test = df[df['config_id'].isin(test_index)]
    X_train, X_test = df_train.drop(['config_id'] + target_columns, axis=1), df_test.drop(['config_id'] + target_columns, axis=1)
    y_train, y_test = df_train[target_columns], df_test[target_columns]
    break

# Define the objective function for each model
def objective(trial, model_name):
    if model_name == "Random Forest":
        max_depth = trial.suggest_int("max_depth", 2, 32)
        n_estimators = trial.suggest_int("n_estimators", 10, 200)
        model = RandomForestRegressor(max_depth=max_depth, n_estimators=n_estimators, random_state=42)

    elif model_name == "SVR":
        C = trial.suggest_loguniform("C", 1e-1, 1e2)
        gamma = trial.suggest_categorical("gamma", ["scale", "auto"])
        kernel = trial.suggest_categorical("kernel", ["rbf", "linear", "poly"])
        model = SVR(C=C, gamma=gamma, kernel=kernel)

    elif model_name == "Gradient Boosting":
        n_estimators = trial.suggest_int("n_estimators", 10, 200)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.5)
        max_depth = trial.suggest_int("max_depth", 2, 32)
        model = GradientBoostingRegressor(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, random_state=42)

    elif model_name == "XGBoost":
        n_estimators = trial.suggest_int("n_estimators", 10, 200)
        max_depth = trial.suggest_int("max_depth", 2, 32)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.5)
        model = xgb.XGBRegressor(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate, random_state=42)

    elif model_name == "LightGBM":
        num_leaves = trial.suggest_int("num_leaves", 2, 256)
        max_depth = trial.suggest_int("max_depth", 2, 32)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.5)
        n_estimators = trial.suggest_int("n_estimators", 10, 200)
        model = lgb.LGBMRegressor(num_leaves=num_leaves, max_depth=max_depth, learning_rate=learning_rate, n_estimators=n_estimators, verbose=-1, random_state=42)

    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test.values, preds.ravel()))
    return rmse

models = {
    "Random Forest": RandomForestRegressor(random_state=random_state),
    "SVR": SVR(),
    "Gradient Boosting": GradientBoostingRegressor(random_state=random_state),
    "XGBoost": xgb.XGBRegressor(random_state=random_state),
    "LightGBM": lgb.LGBMRegressor(random_state=random_state),
}

# Run optimization
res = dict()
for model_name in models.keys():
    study = optuna.create_study(direction='minimize')
    study.optimize(lambda trial: objective(trial, model_name), n_trials=50)
    res[model_name] = study.best_params

for model_name, best_params in res.items():
    print(f"Best parameters for {model_name}: {best_params}")


[I 2024-05-13 13:45:06,696] A new study created in memory with name: no-name-772a9c2b-a5e8-4857-8134-771639cb88ff


  return fit_method(estimator, *args, **kwargs)
[I 2024-05-13 13:45:10,759] Trial 0 finished with value: 0.029189352853743626 and parameters: {'max_depth': 14, 'n_estimators': 63}. Best is trial 0 with value: 0.029189352853743626.
  return fit_method(estimator, *args, **kwargs)
[I 2024-05-13 13:45:15,654] Trial 1 finished with value: 0.02873982077103532 and parameters: {'max_depth': 23, 'n_estimators': 68}. Best is trial 1 with value: 0.02873982077103532.
  return fit_method(estimator, *args, **kwargs)
[I 2024-05-13 13:45:16,063] Trial 2 finished with value: 0.04503602713349913 and parameters: {'max_depth': 7, 'n_estimators': 12}. Best is trial 1 with value: 0.02873982077103532.
  return fit_method(estimator, *args, **kwargs)
[I 2024-05-13 13:45:17,205] Trial 3 finished with value: 0.07951072126978105 and parameters: {'max_depth': 2, 'n_estimators': 127}. Best is trial 1 with value: 0.02873982077103532.
  return fit_method(estimator, *args, **kwargs)
[I 2024-05-13 13:45:18,566] Trial 4

Best parameters for Random Forest: {'max_depth': 31, 'n_estimators': 51}
Best parameters for SVR: {'C': 2.479459357922161, 'gamma': 'auto', 'kernel': 'rbf'}
Best parameters for Gradient Boosting: {'n_estimators': 84, 'learning_rate': 0.17251713060020465, 'max_depth': 7}
Best parameters for XGBoost: {'n_estimators': 179, 'max_depth': 6, 'learning_rate': 0.11678452806022886}
Best parameters for LightGBM: {'num_leaves': 71, 'max_depth': 24, 'learning_rate': 0.0959080973897064, 'n_estimators': 183}


Save a model and dataset to file

In [6]:
# Parameters to save along with the model
model_params = {
    'hparams': hparams,
    'state_dict': trained_models[0].state_dict(),
    'feature_names': df.columns,
    'feature_bounds': train_column_bounds
}

# Save the model and parameters to a file
torch.save(model_params, 'model_large_mse.pth')

In [4]:
kf = KFold(n_splits=4, shuffle=True, random_state=random_state)

for train_index, val_index in kf.split(df['config_id'].unique()):
    df_train = df[df['config_id'].isin(train_index)]
    df_val = df[df['config_id'].isin(val_index)]

    df_train.to_csv('datasets/Large/train_df_large_rom_norm.csv', index=False)
    df_val.to_csv('datasets/Large/val_df_large_rom_norm.csv', index=False)

    break