In [8]:
import numpy as np
import pandas as pd

# ---------------------------------------------
# 1. Load Phase 2 features (ΔR, width, pT)
# ---------------------------------------------

phase2_data = np.load("../processed/engineered_features.npz")

# Create Phase 2 DataFrame
phase2_df = pd.DataFrame({
    "deltaR_mean": np.concatenate([phase2_data["deltaR_quark_mean"], phase2_data["deltaR_gluon_mean"]]),
    "deltaR_max": np.concatenate([phase2_data["deltaR_quark_max"], phase2_data["deltaR_gluon_max"]]),
    "eta_width": np.concatenate([phase2_data["eta_width_quark"], phase2_data["eta_width_gluon"]]),
    "phi_width": np.concatenate([phase2_data["phi_width_quark"], phase2_data["phi_width_gluon"]]),
    "pt_sum": np.concatenate([phase2_data["pt_quark"], phase2_data["pt_gluon"]]),
    "label": np.concatenate([np.ones_like(phase2_data["deltaR_quark_mean"]), np.zeros_like(phase2_data["deltaR_gluon_mean"])])
})

print(f"Loaded Phase 2 features: {phase2_df.shape}")

# ---------------------------------------------
# 2. Load tau (τ1, τ2, τ3) features
# ---------------------------------------------

tau_quark = pd.read_csv("../processed/tau_quark.csv")
tau_gluon = pd.read_csv("../processed/tau_gluon.csv")

tau_df = pd.concat([tau_quark, tau_gluon], ignore_index=True)

print(f"Loaded tau features: {tau_df.shape}")

# ---------------------------------------------
# 3. Load entropy feature
# ---------------------------------------------

entropy_quark = pd.read_csv("../processed/entropy_quark.csv", names=["entropy"])
entropy_gluon = pd.read_csv("../processed/entropy_gluon.csv", names=["entropy"])

# Making sure only correct column
entropy_quark = entropy_quark[["entropy"]]
entropy_gluon = entropy_gluon[["entropy"]]

entropy_df = pd.concat([entropy_quark, entropy_gluon], ignore_index=True)

print(f"Loaded entropy features: {entropy_df.shape}")

# ---------------------------------------------
# 4. Assemble Full Feature Set
# ---------------------------------------------

# Reset all indices to ensure alignment
phase2_df = phase2_df.reset_index(drop=True)
tau_df = tau_df.reset_index(drop=True)
entropy_df = entropy_df.reset_index(drop=True)

# Combine everything
full_df = pd.concat([phase2_df, tau_df[["tau1", "tau2", "tau3"]], entropy_df[["entropy"]]], axis=1)

print(f"Final Assembled Feature Shape: {full_df.shape}")

# ---------------------------------------------
# 5. Display a preview
# ---------------------------------------------

print(full_df.head())

# ---------------------------------------------
# 6. Save Assembled Feature Matrix
# ---------------------------------------------

# Save to CSV
full_df.to_csv("../processed/full_features.csv", index=False)
print("Saved full_features.csv to processed/")

Loaded Phase 2 features: (100000, 6)
Loaded tau features: (100000, 9)
Loaded entropy features: (100000, 1)
Final Assembled Feature Shape: (100000, 10)
   deltaR_mean  deltaR_max  eta_width  phi_width      pt_sum  label      tau1  \
0     0.113489    0.357406   0.036857   0.031937  513.989348    1.0  0.031192   
1     0.104943    0.372912   0.025278   0.024072  538.222313    1.0  0.017082   
2     0.178022    0.392665   0.022716   0.038971  529.846951    1.0  0.017394   
3     0.099346    0.399456   0.048627   0.026427  506.544815    1.0  0.039010   
4     0.102658    0.347468   0.038306   0.039376  518.627759    1.0  0.059382   

       tau2      tau3   entropy  
0  0.024029  0.018583  2.039020  
1  0.014936  0.012220  1.947566  
2  0.012072  0.009343  1.549192  
3  0.031932  0.023717  2.812852  
4  0.036577  0.029405  3.208436  
Saved full_features.csv to processed/
