In [1]:
import itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scienceplots
import shap

import events_package.utils as utils
from events_package.Experiment import Experiment
from events_package.config import FIVE_LAYERS
from events_package.input_getters import get_Y_1, get_X_3, get_X_4, get_X_5

In [2]:
Experiment.__version__

'5.0'

# 1. Importing Single Particle Data

## 1.1 Electrons

In [3]:
# importing data
dataset_df = pd.read_parquet(
    r"C:\Users\User1\Desktop\MSci_Project\Data\6_data\Electron\Parquet\1m_electron_pq_3"
)

electrons = Experiment(dataset_df, config=FIVE_LAYERS)
del dataset_df
electrons.length

444840

In [None]:
electrons.standard_procedure()

## 1.2 Photons

In [None]:
dataset_df = pd.read_parquet(
    r"C:\Users\User1\Desktop\MSci_Project\Data\6_data\Photon\Parquet\1m_photon_pq"
)

photons = Experiment(dataset_df, config=FIVE_LAYERS)
del dataset_df
photons.length

In [None]:
photons.standard_procedure()

## 1.3 Neutral Pions

In [None]:
dataset_df = pd.read_parquet(
    r"C:\Users\User1\Desktop\MSci_Project\Data\6_data\PiZero\Parquet\pq_pi0_2"
)

pi0 = Experiment(dataset_df, config=FIVE_LAYERS)
del dataset_df
pi0.length

In [None]:
pi0.standard_procedure()

## 1.4 Charged Pions

In [None]:
dataset_df = pd.read_parquet(
    r"C:\Users\User1\Desktop\MSci_Project\Data\6_data\PiPlusMinus\Parquet\pq_piplusminus_2"
)

pi_char = Experiment(dataset_df, config=FIVE_LAYERS)
del dataset_df
pi_char.length

In [None]:
pi_char.standard_procedure()

# 2. Model With Input Features Version 3

The new idea here is to introduce normalisation to the energy inputs, it proved to improve accuracy slightly.

In [None]:
# hyperparameters used
params = {
    "objective": "reg:squarederror",
    "max_depth": 6,
    "learning_rate": 0.18,
    "colsample_bytree": 0.8,
    "eval_metric": "rmse",
    "n_estimators": 600,
}

In [None]:
print(get_X_3.__doc__)

In [None]:
# split electron data into training and testing, next train XGBoost model
electrons.train_test_split(get_X=get_X_3, get_Y=get_Y_1, test_size=0.2)
print(electrons.X_test.shape)

electrons.train_xgboost_model(params)

In [None]:
# split photon data into training and testing, next train XGBoost model
photons.train_test_split(get_X=get_X_3, get_Y=get_Y_1, test_size=0.2)
print(photons.X_test.shape)

photons.train_xgboost_model(params)

In [None]:
# split neutral pion data into training and testing, next train XGBoost model
pi0.train_test_split(get_X=get_X_3, get_Y=get_Y_1, test_size=0.2)
print(pi0.X_test.shape)

pi0.train_xgboost_model(params)

In [None]:
# split charged pion data into training and testing, next train XGBoost model
pi_char.train_test_split(get_X=get_X_3, get_Y=get_Y_1, test_size=0.2)
print(pi_char.X_test.shape)

pi_char.train_xgboost_model(params)

## 2.1 SHAP for get_X_3 Model

In [None]:
model = electrons.model
explainer = shap.TreeExplainer(model, check_additivity=False)

shap_values = explainer.shap_values(electrons.X_train, check_additivity=False)

In [None]:
feature_names = [
    "psb eta",
    "emb1 eta",
    "emb2 eta",
    "emb3 eta",
    "hab1 eta",
    "psb - emb1 eta",
    "emb1 - emb2 eta",
    "emb2 - emb3 eta",
    "emb3 - hab1 eta",
    "psb (0)",
    "psb (1)",
    "psb (2)",
    "psb (3)",
    "psb (4)",
    "psb (5)",
    "psb (6)",
    "psb (7)",
    "psb (8)",
    "emb1 (0)",
    "emb1 (1)",
    "emb1 (2)",
    "emb1 (3)",
    "emb1 (4)",
    "emb1 (5)",
    "emb1 (6)",
    "emb1 (7)",
    "emb1 (8)",
    "emb1 (9)",
    "emb1 (10)",
]


shap.summary_plot(shap_values, electrons.X_train, feature_names=feature_names)

# 3. Model With Input Features Version 4

In [None]:
print(get_X_4.__doc__)

In [None]:
# split electron data into training and testing, next train XGBoost model
electrons.train_test_split(get_X=get_X_4, get_Y=get_Y_1, test_size=0.2)
print(electrons.X_test.shape)

electrons.train_xgboost_model(params)

In [None]:
# split photon data into training and testing, next train XGBoost model
photons.train_test_split(get_X=get_X_4, get_Y=get_Y_1, test_size=0.2)
print(photons.X_test.shape)

photons.train_xgboost_model(params)

In [None]:
# split neutral pion data into training and testing, next train XGBoost model
pi0.train_test_split(get_X=get_X_4, get_Y=get_Y_1, test_size=0.2)
print(pi0.X_test.shape)

pi0.train_xgboost_model(params)

In [None]:
# split charged pion data into training and testing, next train XGBoost model
pi_char.train_test_split(get_X=get_X_4, get_Y=get_Y_1, test_size=0.2)
print(pi_char.X_test.shape)

pi_char.train_xgboost_model(params)

# 4. Model With Input Features Version 5

In [None]:
# split electron data into training and testing, next train XGBoost model
electrons.train_test_split(get_X=get_X_5, get_Y=get_Y_1, test_size=0.2)
print(electrons.X_test.shape)

electrons.train_xgboost_model(params)

In [None]:
# split photon data into training and testing, next train XGBoost model
photons.train_test_split(get_X=get_X_5, get_Y=get_Y_1, test_size=0.2)
print(photons.X_test.shape)

photons.train_xgboost_model(params)

In [None]:
# split neutral pion data into training and testing, next train XGBoost model
pi0.train_test_split(get_X=get_X_5, get_Y=get_Y_1, test_size=0.2)
print(pi0.X_test.shape)

pi0.train_xgboost_model(params)

In [None]:
# split charged pion data into training and testing, next train XGBoost model
pi_char.train_test_split(get_X=get_X_5, get_Y=get_Y_1, test_size=0.2)
print(pi_char.X_test.shape)

pi_char.train_xgboost_model(params)