In [1]:
from autogluon.tabular import TabularDataset, TabularPredictor
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scienceplots
import shap

import events_package.utils as utils
from events_package.Experiment import Experiment
from events_package.config import FIVE_LAYERS
from events_package.input_getters import get_Y_1, get_X_3, get_X_4, get_X_5

In [2]:
Experiment.__version__

'5.0'

# 1. Importing Single Particle Data

## 1.1 Electrons

In [3]:
# importing data
dataset_df = pd.read_parquet(
    r"C:\Users\User1\Desktop\MSci_Project\Data\6_data\Electron\Parquet\1m_electron_pq_3"
)

electrons = Experiment(dataset_df, config=FIVE_LAYERS)
del dataset_df
electrons.length

444840

In [None]:
electrons.standard_procedure()

## 1.2 Photons

In [None]:
dataset_df = pd.read_parquet(
    r"C:\Users\User1\Desktop\MSci_Project\Data\6_data\Photon\Parquet\1m_photon_pq"
)

photons = Experiment(dataset_df, config=FIVE_LAYERS)
del dataset_df
photons.length

In [None]:
photons.standard_procedure()

## 1.3 Neutral Pions

In [None]:
dataset_df = pd.read_parquet(
    r"C:\Users\User1\Desktop\MSci_Project\Data\6_data\PiZero\Parquet\pq_pi0_2"
)

pi0 = Experiment(dataset_df, config=FIVE_LAYERS)
del dataset_df
pi0.length

In [None]:
pi0.standard_procedure()

## 1.4 Charged Pions

In [None]:
dataset_df = pd.read_parquet(
    r"C:\Users\User1\Desktop\MSci_Project\Data\6_data\PiPlusMinus\Parquet\pq_piplusminus_2"
)

pi_char = Experiment(dataset_df, config=FIVE_LAYERS)
del dataset_df
pi_char.length

In [None]:
pi_char.standard_procedure()

# AutoGluon Stuff

In [None]:
# add types to allow for identification later
electrons.add_physics_object_type(typ="electron")
photons.add_physics_object_type(typ="photon")
pi0.add_physics_object_type(typ="pi0")
pi_char.add_physics_object_type(typ="pi_char")

experiment = electrons + photons + pi0 + pi_char

# all previous datasets have already been denoisified, duplicates were removed, no need to do it now
# in fact, doing it would delete some good events
experiment.shuffle_dataset(repeats=11)
print(experiment.length)

In [None]:
experiment.train_test_split(get_X=get_X_5, get_Y=get_Y_1, test_size=0.2)
experiment.X_test.shape

In [None]:
feature_names = [
    "psb eta",
    "emb1 eta",
    "emb2 eta",
    "emb3 eta",
    "hab1 eta",
    "psb - emb1 eta",
    "emb1 - emb2 eta",
    "emb2 - emb3 eta",
    "emb3 - hab1 eta",
    "psb(4)",
    "psb(5) - psb(3)",
    "emb1(8)",
    "emb1(9) - emb1(7)",
    "emb1(10) - emb1(6)",
    "emb1(11) - emb1(5)",
    "emb1(12) - emb1(4)",
    "emb2(4)",
    "emb2(5) - emb2(3)",
    "emb3(4)",
    "emb3(5) - emb3(3)",
    "hab1(4)",
    "hab1(5) - hab1(3)",
]

In [None]:
X_train_df = pd.DataFrame(experiment.X_train, columns=feature_names)

y_train_df = pd.DataFrame({"target z": experiment.y_train.values})


X_test_df = pd.DataFrame(experiment.X_test, columns=feature_names)
y_test_df = pd.DataFrame({"target z": experiment.y_test.values})

In [None]:
df_train = pd.concat([y_train_df, X_train_df], axis=1)
df_test = pd.concat([y_test_df, X_test_df], axis=1)


df_train

In [None]:
label_column = "target z"

predictor = TabularPredictor(label=label_column)

In [None]:
predictor.fit(train_data=df_train)

# Make predictions on the test set
predictions = predictor.predict(df_test.drop(label_column, axis=1))

# Evaluate the model performance on the test set
performance = predictor.evaluate(df_test)
print(performance)