In [2]:
import os
import numpy as np
import pandas as pd
import sys
import importlib
import ipynbname
%matplotlib inline

In [3]:
base_path = os.path.dirname(ipynbname.path())
parent_path = os.path.dirname(base_path)  
src_path = parent_path + '/src'
filtered_data_path = parent_path + '/data/filtered'
saved_models_path = src_path + '/config'

print(f"parent path: {parent_path}")
print(f"base path: {base_path}")
print(f"filtered data path: {filtered_data_path}")
print(f"saved models path: {saved_models_path}")

parent path: /home/sid/coding/mass_recon
base path: /home/sid/coding/mass_recon/notebooks
filtered data path: /home/sid/coding/mass_recon/data/filtered
saved models path: /home/sid/coding/mass_recon/src/config


In [174]:
# import ml model file
models_path = os.path.join(src_path, "models")
utils_path = os.path.join(parent_path, "utils")

sys.path.append(models_path)
sys.path.append(utils_path)

import ml_model, data_utils
importlib.reload(ml_model)
importlib.reload(data_utils)
from ml_model import *
from data_utils import *

In [175]:
side = 'electron'

## 1. Without Scattering

In [176]:
path_train_n, path_test_n = get_data_path(side = side, 
                                          scat = 'n', 
                                          data_path = filtered_data_path)
print(f"Train:\t{path_train_n},\nTest:\t{path_test_n}")

Train:	/home/sid/coding/mass_recon/data/filtered/ElectronCoords_no_scat.dat,
Test:	/home/sid/coding/mass_recon/data/filtered/ElectronSort_no_scat.dat


In [177]:
for target in ['P', 'ip', 'oop']:
    electron = Analysis(side=side, 
                        scat=scat,
                        train_path = path_train_n,
                        test_path = path_test_n,
                        target = target,
                        n_jobs = 12)
    electron.run_optimization(n_trials=50, 
                              base_path=saved_models_path)

[I 2025-03-14 10:10:27,009] A new study created in memory with name: no-name-6fa7b26c-fe4b-4d5a-ad95-be4c659e8fde


Starting hyperparameter tuning...


[I 2025-03-14 10:10:43,192] Trial 0 finished with value: 0.04173457815181232 and parameters: {'n_estimators': 605, 'learning_rate': 0.06694607642658897, 'max_depth': 17, 'min_child_weight': 10, 'gamma': 2.4495265728069135, 'reg_alpha': 5.268378654949011, 'reg_lambda': 5.405091566330986, 'subsample': 0.715784025350994, 'colsample_bytree': 0.6829330335849366}. Best is trial 0 with value: 0.04173457815181232.
[I 2025-03-14 10:10:56,582] Trial 1 finished with value: 0.03014646520977156 and parameters: {'n_estimators': 730, 'learning_rate': 0.029855074330871728, 'max_depth': 19, 'min_child_weight': 3, 'gamma': 3.969625660082023, 'reg_alpha': 0.41689916129108157, 'reg_lambda': 0.3163820630291074, 'subsample': 0.6626672289712126, 'colsample_bytree': 0.7997495715332017}. Best is trial 1 with value: 0.03014646520977156.
[I 2025-03-14 10:11:25,794] Trial 2 finished with value: 0.023373417766078593 and parameters: {'n_estimators': 1241, 'learning_rate': 0.017178894143763154, 'max_depth': 9, 'min_

Best Parameters saved to: /home/sid/coding/mass_recon/results/trained_models/electron_P_no_scat_params_dict.json
Best Parameters: {'n_estimators': 664, 'learning_rate': 0.03467788802847974, 'max_depth': 7, 'min_child_weight': 4, 'gamma': 0.03287037455982368, 'reg_alpha': 0.10947107079411166, 'reg_lambda': 0.0054080930578136325, 'subsample': 0.9652755093059073, 'colsample_bytree': 0.9305013590566352}
Best RMSE: 0.012749378907992196
Model optimized in : 757.26 seconds


[I 2025-03-14 10:23:05,837] A new study created in memory with name: no-name-ae73f035-4b12-450c-8bca-80fb8c287981


Starting hyperparameter tuning...


[I 2025-03-14 10:23:15,510] Trial 0 finished with value: 0.08662753836502854 and parameters: {'n_estimators': 278, 'learning_rate': 0.01710626093716342, 'max_depth': 3, 'min_child_weight': 10, 'gamma': 0.6779457737776834, 'reg_alpha': 0.24067715266610284, 'reg_lambda': 1.6052814998587053, 'subsample': 0.648205557551761, 'colsample_bytree': 0.6808547203998269}. Best is trial 0 with value: 0.08662753836502854.
[I 2025-03-14 10:23:22,125] Trial 1 finished with value: 0.01984372730797304 and parameters: {'n_estimators': 1072, 'learning_rate': 0.06657694962624086, 'max_depth': 9, 'min_child_weight': 10, 'gamma': 1.4766404726032134, 'reg_alpha': 0.003288828047007816, 'reg_lambda': 0.005937359615455376, 'subsample': 0.6790799478660227, 'colsample_bytree': 0.6744530039206235}. Best is trial 1 with value: 0.01984372730797304.
[I 2025-03-14 10:23:28,629] Trial 2 finished with value: 0.01913960712655251 and parameters: {'n_estimators': 1292, 'learning_rate': 0.04687656642765787, 'max_depth': 7, '

Best Parameters saved to: /home/sid/coding/mass_recon/results/trained_models/electron_ip_no_scat_params_dict.json
Best Parameters: {'n_estimators': 409, 'learning_rate': 0.04303454898730122, 'max_depth': 13, 'min_child_weight': 10, 'gamma': 0.008263731487506476, 'reg_alpha': 0.08267840022331176, 'reg_lambda': 0.004024501395117737, 'subsample': 0.7886358219188255, 'colsample_bytree': 0.8381807652063727}
Best RMSE: 0.006425080122676773
Model optimized in : 590.72 seconds


[I 2025-03-14 10:32:57,825] A new study created in memory with name: no-name-9ef8e6e6-de85-4401-af2c-c314c2414b93


Starting hyperparameter tuning...


[I 2025-03-14 10:33:08,960] Trial 0 finished with value: 0.09328282676121977 and parameters: {'n_estimators': 1014, 'learning_rate': 0.06798972710609714, 'max_depth': 20, 'min_child_weight': 3, 'gamma': 4.165523654113288, 'reg_alpha': 4.771787702469212, 'reg_lambda': 0.0018611154012834771, 'subsample': 0.6065212260458617, 'colsample_bytree': 0.7294941454084783}. Best is trial 0 with value: 0.09328282676121977.
[I 2025-03-14 10:33:13,553] Trial 1 finished with value: 0.1048631099247342 and parameters: {'n_estimators': 700, 'learning_rate': 0.1344910497518922, 'max_depth': 6, 'min_child_weight': 8, 'gamma': 2.2573299775320748, 'reg_alpha': 0.9238578010172218, 'reg_lambda': 8.53312560988887, 'subsample': 0.6513595262427269, 'colsample_bytree': 0.8957133303563826}. Best is trial 0 with value: 0.09328282676121977.
[I 2025-03-14 10:33:30,495] Trial 2 finished with value: 0.06030519138153317 and parameters: {'n_estimators': 965, 'learning_rate': 0.05920881463966908, 'max_depth': 14, 'min_chil

Best Parameters saved to: /home/sid/coding/mass_recon/results/trained_models/electron_oop_no_scat_params_dict.json
Best Parameters: {'n_estimators': 334, 'learning_rate': 0.057246667871517914, 'max_depth': 15, 'min_child_weight': 5, 'gamma': 0.00039431834968052526, 'reg_alpha': 0.17177571969097258, 'reg_lambda': 0.1059823734956776, 'subsample': 0.8725688202590988, 'colsample_bytree': 0.9860545655397677}
Best RMSE: 0.044782585062724145
Model optimized in : 1154.53 seconds


# 2. With Scattering

In [181]:
scat = 'y'

In [178]:
path_train_n, path_test_n = get_data_path(side = side, 
                                          scat = 'y', 
                                          data_path = filtered_data_path)
print(f"Train:\t{path_train_n},\nTest:\t{path_test_n}")

Train:	/home/sid/coding/mass_recon/data/filtered/ElectronCoords_wide_acp.dat,
Test:	/home/sid/coding/mass_recon/data/filtered/ElectronSort_signal.dat


In [180]:
for target in ['P', 'ip', 'oop']:
    electron = Analysis(side=side, 
                        scat=scat,
                        train_path = path_train_n,
                        test_path = path_test_n,
                        target = target,
                        n_jobs = 12)
    electron.run_optimization(n_trials=50, 
                              base_path=saved_models_path)

[I 2025-03-14 10:53:39,278] A new study created in memory with name: no-name-076b3dc5-c84a-4493-92c9-a6758610b2a7


Starting hyperparameter tuning...


[I 2025-03-14 10:54:15,246] Trial 0 finished with value: 0.026512716247052086 and parameters: {'n_estimators': 1315, 'learning_rate': 0.011522048749281729, 'max_depth': 8, 'min_child_weight': 7, 'gamma': 1.6380341357834705, 'reg_alpha': 2.65111856481889, 'reg_lambda': 1.3353554208483003, 'subsample': 0.926706510641798, 'colsample_bytree': 0.8616443521095049}. Best is trial 0 with value: 0.026512716247052086.
[I 2025-03-14 10:54:44,048] Trial 1 finished with value: 0.04714244556559367 and parameters: {'n_estimators': 543, 'learning_rate': 0.03937575196375745, 'max_depth': 15, 'min_child_weight': 8, 'gamma': 0.06353341277187119, 'reg_alpha': 0.005226020573713201, 'reg_lambda': 4.937244857521577, 'subsample': 0.9946051866760469, 'colsample_bytree': 0.7099968707450119}. Best is trial 0 with value: 0.026512716247052086.
[I 2025-03-14 10:54:49,651] Trial 2 finished with value: 0.031300159271706395 and parameters: {'n_estimators': 1358, 'learning_rate': 0.13395660229504014, 'max_depth': 7, 'm

Best Parameters saved to: /home/sid/coding/mass_recon/notebooks/electron_P_no_scat_params_dict.json
Best Parameters: {'n_estimators': 740, 'learning_rate': 0.03887104113303092, 'max_depth': 4, 'min_child_weight': 2, 'gamma': 0.0075110709849699865, 'reg_alpha': 1.7788713363754776, 'reg_lambda': 0.024914619948148014, 'subsample': 0.660177929764001, 'colsample_bytree': 0.9262664588902263}
Best RMSE: 0.020927869339880435
Model optimized in : 748.78 seconds


[I 2025-03-14 11:06:09,253] A new study created in memory with name: no-name-d4cabba3-4c5e-4738-a25d-538aeae76bde


Starting hyperparameter tuning...


[I 2025-03-14 11:06:18,507] Trial 0 finished with value: 0.07643860598325089 and parameters: {'n_estimators': 882, 'learning_rate': 0.08490038307860538, 'max_depth': 19, 'min_child_weight': 4, 'gamma': 3.3452697796411686, 'reg_alpha': 0.0030341122764589367, 'reg_lambda': 0.059698016608436835, 'subsample': 0.9046977564349061, 'colsample_bytree': 0.6301661329143787}. Best is trial 0 with value: 0.07643860598325089.
[I 2025-03-14 11:06:27,897] Trial 1 finished with value: 0.07594176421485578 and parameters: {'n_estimators': 670, 'learning_rate': 0.04641453601372751, 'max_depth': 4, 'min_child_weight': 6, 'gamma': 2.813328255018936, 'reg_alpha': 0.012233753649043605, 'reg_lambda': 0.00234342351856658, 'subsample': 0.8705331397486609, 'colsample_bytree': 0.8285332042216252}. Best is trial 1 with value: 0.07594176421485578.
[I 2025-03-14 11:07:02,074] Trial 2 finished with value: 0.0989105307002038 and parameters: {'n_estimators': 701, 'learning_rate': 0.010659153181104665, 'max_depth': 7, '

Best Parameters saved to: /home/sid/coding/mass_recon/notebooks/electron_ip_no_scat_params_dict.json
Best Parameters: {'n_estimators': 904, 'learning_rate': 0.04384397737526142, 'max_depth': 6, 'min_child_weight': 5, 'gamma': 0.0018908190660060914, 'reg_alpha': 0.005204715665456473, 'reg_lambda': 0.25868181050924066, 'subsample': 0.9327637785751924, 'colsample_bytree': 0.8730413535565283}
Best RMSE: 0.0693550716293269
Model optimized in : 765.53 seconds


[I 2025-03-14 11:18:56,037] A new study created in memory with name: no-name-6423a9b2-0677-4fdc-a3b0-34eb21a24b34


Starting hyperparameter tuning...


[I 2025-03-14 11:19:05,035] Trial 0 finished with value: 1.433066757318215 and parameters: {'n_estimators': 386, 'learning_rate': 0.06576204306700428, 'max_depth': 20, 'min_child_weight': 8, 'gamma': 0.4210529574433114, 'reg_alpha': 0.3628173548805886, 'reg_lambda': 7.374526426421487, 'subsample': 0.6299336998615789, 'colsample_bytree': 0.9678001711038043}. Best is trial 0 with value: 1.433066757318215.
[I 2025-03-14 11:19:07,592] Trial 1 finished with value: 1.4294790912439845 and parameters: {'n_estimators': 826, 'learning_rate': 0.1614460625136562, 'max_depth': 14, 'min_child_weight': 2, 'gamma': 4.272096778205958, 'reg_alpha': 0.1476396229172357, 'reg_lambda': 0.06384856122531869, 'subsample': 0.6001237188580902, 'colsample_bytree': 0.7583153192487996}. Best is trial 1 with value: 1.4294790912439845.
[I 2025-03-14 11:19:53,895] Trial 2 finished with value: 1.4153785989078262 and parameters: {'n_estimators': 1351, 'learning_rate': 0.020042653800441363, 'max_depth': 3, 'min_child_wei

Best Parameters saved to: /home/sid/coding/mass_recon/notebooks/electron_oop_no_scat_params_dict.json
Best Parameters: {'n_estimators': 1369, 'learning_rate': 0.02150918875847171, 'max_depth': 7, 'min_child_weight': 5, 'gamma': 1.3342730923700203, 'reg_alpha': 0.4212180295310984, 'reg_lambda': 1.2618809060478595, 'subsample': 0.9294825243957562, 'colsample_bytree': 0.7897022795064867}
Best RMSE: 1.4021943754620954
Model optimized in : 1301.44 seconds
