In [1]:
import os
import numpy as np
import pandas as pd
import sys
import importlib
import ipynbname
%matplotlib inline

In [2]:
base_path = os.path.dirname(ipynbname.path())
parent_path = os.path.dirname(base_path)  
src_path = parent_path + '/src'
filtered_data_path = parent_path + '/data/filtered'
saved_models_path = src_path + '/config'

print(f"parent path: {parent_path}")
print(f"base path: {base_path}")
print(f"filtered data path: {filtered_data_path}")
print(f"saved models path: {saved_models_path}")

parent path: /home/sid/coding/mass_recon
base path: /home/sid/coding/mass_recon/notebooks
filtered data path: /home/sid/coding/mass_recon/data/filtered
saved models path: /home/sid/coding/mass_recon/results/trained_models


In [3]:
# import ml model file
models_path = os.path.join(src_path, "models")
utils_path = os.path.join(parent_path, "utils")

sys.path.append(models_path)
sys.path.append(utils_path)

import ml_model, data_utils
importlib.reload(ml_model)
importlib.reload(data_utils)
from ml_model import *
from data_utils import *

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
side = 'positron'

## 1. Without Scattering

In [6]:
path_train_n, path_test_n = get_data_path(side = side, 
                                          scat = 'n', 
                                          data_path = filtered_data_path)
print(f"Train:\t{path_train_n},\nTest:\t{path_test_n}")

Train:	/home/sid/coding/mass_recon/data/filtered/PositronCoords_no_scat.dat,
Test:	/home/sid/coding/mass_recon/data/filtered/PositronSort_no_scat.dat


In [7]:
for target in ['P', 'ip', 'oop']:
    electron = Analysis(side=side, 
                        scat='n',
                        train_path = path_train_n,
                        test_path = path_test_n,
                        target = target,
                        n_jobs = 12)
    electron.run_optimization(n_trials=50, 
                              base_path=saved_models_path)

[I 2025-03-14 16:56:15,581] A new study created in memory with name: no-name-3ead2723-3e22-41e4-976b-2b98b7599540


Starting hyperparameter tuning...


[I 2025-03-14 16:56:16,986] Trial 0 finished with value: 0.01602645660291672 and parameters: {'n_estimators': 745, 'learning_rate': 0.1781758533979484, 'max_depth': 9, 'min_child_weight': 7, 'gamma': 2.2961865672313575, 'reg_alpha': 0.12371846237221219, 'reg_lambda': 0.40760273211676945, 'subsample': 0.9582141911710548, 'colsample_bytree': 0.7978284934220502}. Best is trial 0 with value: 0.01602645660291672.
[I 2025-03-14 16:56:21,346] Trial 1 finished with value: 0.01789875504530638 and parameters: {'n_estimators': 1435, 'learning_rate': 0.0547229581025589, 'max_depth': 10, 'min_child_weight': 3, 'gamma': 3.51380158618133, 'reg_alpha': 0.2590373946326673, 'reg_lambda': 7.9638744219507, 'subsample': 0.871843928152049, 'colsample_bytree': 0.931648230226789}. Best is trial 0 with value: 0.01602645660291672.
[I 2025-03-14 16:56:24,548] Trial 2 finished with value: 0.014445741531772522 and parameters: {'n_estimators': 636, 'learning_rate': 0.12487045709711887, 'max_depth': 13, 'min_child_w

Best Parameters saved to: /home/sid/coding/mass_recon/results/trained_models/positron_ip_no_scat_params_dict.json
Best Parameters: {'n_estimators': 714, 'learning_rate': 0.014024764426503303, 'max_depth': 16, 'min_child_weight': 3, 'gamma': 0.0002678988568274865, 'reg_alpha': 0.0012138695158967325, 'reg_lambda': 0.021703613858353902, 'subsample': 0.9006943387997323, 'colsample_bytree': 0.8689038684929457}
Best RMSE: 0.005590815491631888
Model optimized in : 1045.09 seconds


[I 2025-03-14 17:13:41,700] A new study created in memory with name: no-name-92a88e4f-130e-40b3-85da-4f97ceeca14e


Starting hyperparameter tuning...


[I 2025-03-14 17:13:52,310] Trial 0 finished with value: 0.06024588671066792 and parameters: {'n_estimators': 934, 'learning_rate': 0.04661155171262619, 'max_depth': 9, 'min_child_weight': 8, 'gamma': 4.47309271971824, 'reg_alpha': 0.23278459726342066, 'reg_lambda': 0.2136982203135945, 'subsample': 0.9754633508396454, 'colsample_bytree': 0.9840362702893128}. Best is trial 0 with value: 0.06024588671066792.
[I 2025-03-14 17:14:04,799] Trial 1 finished with value: 0.06705058284663207 and parameters: {'n_estimators': 1316, 'learning_rate': 0.043973311241939914, 'max_depth': 15, 'min_child_weight': 10, 'gamma': 3.99595719715594, 'reg_alpha': 8.003463139289936, 'reg_lambda': 3.258069387451166, 'subsample': 0.9294759283510418, 'colsample_bytree': 0.7837669758926022}. Best is trial 0 with value: 0.06024588671066792.
[I 2025-03-14 17:14:13,662] Trial 2 finished with value: 0.0642409583941609 and parameters: {'n_estimators': 863, 'learning_rate': 0.06369133585425768, 'max_depth': 20, 'min_child

Best Parameters saved to: /home/sid/coding/mass_recon/results/trained_models/positron_oop_no_scat_params_dict.json
Best Parameters: {'n_estimators': 1142, 'learning_rate': 0.0103996853532653, 'max_depth': 11, 'min_child_weight': 2, 'gamma': 0.060715696051840906, 'reg_alpha': 2.1637628853355957, 'reg_lambda': 0.8137914444549483, 'subsample': 0.6480032634255093, 'colsample_bytree': 0.9095046438791202}
Best RMSE: 0.04644109327553908
Model optimized in : 2185.86 seconds


# 2. With Scattering

In [8]:
path_train_n, path_test_n = get_data_path(side = side, 
                                          scat = 'y', 
                                          data_path = filtered_data_path)
print(f"Train:\t{path_train_n},\nTest:\t{path_test_n}")

Train:	/home/sid/coding/mass_recon/data/filtered/PositronCoords_wide_acp.dat,
Test:	/home/sid/coding/mass_recon/data/filtered/PositronSort_signal.dat


In [9]:
for target in ['P', 'ip', 'oop']:
    electron = Analysis(side=side, 
                        scat='y',
                        train_path = path_train_n,
                        test_path = path_test_n,
                        target = target,
                        n_jobs = 12)
    electron.run_optimization(n_trials=50, 
                              base_path=saved_models_path)

[I 2025-03-14 17:58:17,704] A new study created in memory with name: no-name-c9b10d26-1139-42d4-bff8-c900b4fdc848


Starting hyperparameter tuning...


[I 2025-03-14 17:58:33,444] Trial 0 finished with value: 0.043325691394828926 and parameters: {'n_estimators': 757, 'learning_rate': 0.049056749375541456, 'max_depth': 19, 'min_child_weight': 2, 'gamma': 2.462817088449242, 'reg_alpha': 9.639547715932878, 'reg_lambda': 0.9897280431264396, 'subsample': 0.7207536311653983, 'colsample_bytree': 0.7691941712956435}. Best is trial 0 with value: 0.043325691394828926.
[I 2025-03-14 17:58:41,515] Trial 1 finished with value: 0.04236015271286943 and parameters: {'n_estimators': 1087, 'learning_rate': 0.09728114818120137, 'max_depth': 19, 'min_child_weight': 7, 'gamma': 1.101825848262568, 'reg_alpha': 3.5134872105631056, 'reg_lambda': 0.4094520448957108, 'subsample': 0.9004983915794054, 'colsample_bytree': 0.8739871463030557}. Best is trial 1 with value: 0.04236015271286943.
[I 2025-03-14 17:58:45,535] Trial 2 finished with value: 0.03167814123223041 and parameters: {'n_estimators': 1354, 'learning_rate': 0.16006954447104507, 'max_depth': 5, 'min_

Best Parameters saved to: /home/sid/coding/mass_recon/results/trained_models/positron_P_scat_params_dict.json
Best Parameters: {'n_estimators': 1113, 'learning_rate': 0.07510231352690135, 'max_depth': 4, 'min_child_weight': 7, 'gamma': 0.0034279650666254845, 'reg_alpha': 1.8863322563933784, 'reg_lambda': 0.6423909434950223, 'subsample': 0.7518947266700299, 'colsample_bytree': 0.8520690427602549}
Best RMSE: 0.02626649470247872
Model optimized in : 705.18 seconds


[I 2025-03-14 18:10:03,919] A new study created in memory with name: no-name-aaca9a77-2016-493f-a5d3-58b4b9b4244d


Starting hyperparameter tuning...


[I 2025-03-14 18:10:46,756] Trial 0 finished with value: 0.07044410123492373 and parameters: {'n_estimators': 1437, 'learning_rate': 0.029879846600157717, 'max_depth': 3, 'min_child_weight': 4, 'gamma': 3.859176939243434, 'reg_alpha': 0.0032511119679240206, 'reg_lambda': 1.4392834470404463, 'subsample': 0.9916807753496762, 'colsample_bytree': 0.6524045983039045}. Best is trial 0 with value: 0.07044410123492373.
[I 2025-03-14 18:11:17,562] Trial 1 finished with value: 0.055295815695932046 and parameters: {'n_estimators': 1218, 'learning_rate': 0.02094241393873653, 'max_depth': 13, 'min_child_weight': 9, 'gamma': 0.2761479664169242, 'reg_alpha': 0.0011691348282536563, 'reg_lambda': 0.007169019230766591, 'subsample': 0.9389756532190813, 'colsample_bytree': 0.7929156773184153}. Best is trial 1 with value: 0.055295815695932046.
[I 2025-03-14 18:11:23,753] Trial 2 finished with value: 0.05880509977662995 and parameters: {'n_estimators': 338, 'learning_rate': 0.06599212685306403, 'max_depth':

Best Parameters saved to: /home/sid/coding/mass_recon/results/trained_models/positron_ip_scat_params_dict.json
Best Parameters: {'n_estimators': 1060, 'learning_rate': 0.010049293178018186, 'max_depth': 9, 'min_child_weight': 10, 'gamma': 0.02318558829352202, 'reg_alpha': 0.018389461914403424, 'reg_lambda': 0.01151748622595998, 'subsample': 0.6006707594626928, 'colsample_bytree': 0.9847448296581085}
Best RMSE: 0.05185364764818315
Model optimized in : 1522.35 seconds


[I 2025-03-14 18:35:27,331] A new study created in memory with name: no-name-48913905-9284-418b-aed8-1237ea35d4c1


Starting hyperparameter tuning...


[I 2025-03-14 18:35:39,910] Trial 0 finished with value: 1.0080014723580424 and parameters: {'n_estimators': 714, 'learning_rate': 0.07800000818404419, 'max_depth': 6, 'min_child_weight': 7, 'gamma': 2.765016269509135, 'reg_alpha': 1.5830206698839762, 'reg_lambda': 0.5655672580531138, 'subsample': 0.7190285386037314, 'colsample_bytree': 0.7525259930933635}. Best is trial 0 with value: 1.0080014723580424.
[I 2025-03-14 18:35:43,591] Trial 1 finished with value: 1.0111861922087366 and parameters: {'n_estimators': 597, 'learning_rate': 0.16392654507489815, 'max_depth': 8, 'min_child_weight': 9, 'gamma': 4.714168019920716, 'reg_alpha': 0.0035520956069690405, 'reg_lambda': 1.1638610955502842, 'subsample': 0.9069507792871542, 'colsample_bytree': 0.9354020292506785}. Best is trial 0 with value: 1.0080014723580424.
[I 2025-03-14 18:35:50,952] Trial 2 finished with value: 1.0136145435728792 and parameters: {'n_estimators': 970, 'learning_rate': 0.09899701310580802, 'max_depth': 11, 'min_child_w

Best Parameters saved to: /home/sid/coding/mass_recon/results/trained_models/positron_oop_scat_params_dict.json
Best Parameters: {'n_estimators': 986, 'learning_rate': 0.025254148402036707, 'max_depth': 6, 'min_child_weight': 5, 'gamma': 0.7469046051511374, 'reg_alpha': 3.103560336213422, 'reg_lambda': 0.04412665978470143, 'subsample': 0.7913230925955044, 'colsample_bytree': 0.926165689430549}
Best RMSE: 1.0070579504564547
Model optimized in : 1611.41 seconds
