In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact
import numpy as np
import json
import os

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
datasets_dir = "./reconstructed_events"
filenames = [os.path.join(datasets_dir, f"higgs_events_Run2012B_DoubleElectron_{i}.csv") for i in range(5)] + [os.path.join(datasets_dir, f"higgs_events_Run2012B_DoubleMuParked_{i}.csv") for i in range(6)] + [os.path.join(datasets_dir, f"higgs_events_Run2012C_DoubleElectron_{i}.csv") for i in range(7)] + [os.path.join(datasets_dir, f"higgs_events_Run2012C_DoubleMuParked_{i}.csv") for i in range(8)]
dfs = []

for filename in filenames:
    df = pd.read_csv(filename)
    dfs.append(df)

final_df = pd.concat(dfs, ignore_index=True)
final_df

Unnamed: 0,mass_Z_1,mass_Z_2,mass_H,type_1_1,px_1_1,py_1_1,pz_1_1,energy_1_1,charge_1_1,isolation_1_1,...,isolation_2_1,sip3d_2_1,type_2_2,px_2_2,py_2_2,pz_2_2,energy_2_2,charge_2_2,isolation_2_2,sip3d_2_2
0,71.733536,32.191714,116.159368,ELECTRON,-27.181263,-36.977588,7.003694,46.424291,-1.0,0.095300,...,2.138484,1.651393,ELECTRON,-7.037700,4.637808,-11.158640,13.984053,-1.0,5.041755,1.357776
1,116.013905,13.166483,134.354673,ELECTRON,-7.229550,68.012015,60.216271,91.125737,1.0,0.012135,...,4.448079,5.814342,MUON,-3.265677,-2.465458,4.320326,5.950491,1.0,0.163214,133.130912
2,116.013905,3.578640,124.095767,ELECTRON,-7.229550,68.012015,60.216271,91.125737,1.0,0.012135,...,0.163214,133.130912,MUON,0.284896,-3.276870,2.933774,4.407502,-1.0,26.368921,1.000000
3,0.989200,1.975247,43.243424,ELECTRON,18.672989,4.421923,15.438558,24.628906,1.0,0.850321,...,0.827168,4.697393,MUON,-5.005772,0.050801,-3.181809,5.931631,-1.0,1.492350,4.434170
4,3.770940,0.563736,84.740369,ELECTRON,-49.819395,18.389213,17.981005,56.066495,1.0,0.644394,...,3.316199,45.562217,ELECTRON,4.773779,-5.947916,10.732182,13.166108,1.0,5.028063,3.685065
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135355,48.124147,3.054985,84.229960,ELECTRON,0.371960,-25.515981,32.112459,41.017236,-1.0,1.989740,...,0.546417,7.886516,MUON,-7.396218,-11.167912,7.899953,15.551062,-1.0,0.942265,1.151532
135356,1.265345,6.603060,19.386642,MUON,1.585027,12.640417,-18.626678,22.566470,-1.0,2.635561,...,0.500870,2354.517745,MUON,-3.365565,-2.827652,-5.550590,7.080374,-1.0,0.507240,999.353633
135357,26.984351,10.412528,38.393351,MUON,-16.880096,3.472844,14.898764,22.780945,1.0,1.004402,...,3.230597,9.441448,MUON,3.031912,-1.217236,-3.413170,4.724816,1.0,0.000000,1.000000
135358,3.215874,6.819736,19.654590,MUON,13.837444,-5.343954,45.219783,47.590560,1.0,0.087100,...,1.091033,2.398851,MUON,-3.326302,1.066065,9.911967,10.509418,-1.0,6.194585,0.359058


In [3]:
# Load filter settings
with open('filter_settings.json', 'r') as file:
    filter_settings = json.load(file)
filter_settings

{'mass_Z_1': {'min': 70, 'max': 110},
 'mass_Z_2': {'min': 0.4, 'max': 40},
 'isolation': {'min': 0, 'max': 0.44},
 'energy_1_1': {'min': 44, 'max': 140},
 'energy_1_2': {'min': 38.01, 'max': 100},
 'energy_2_1': {'min': 10, 'max': 200},
 'energy_2_2': {'min': 3.98, 'max': 99.98},
 'sip3d': {'min': 0, 'max': 5}}

In [12]:
# Function to create histograms and filter data
def plot_histograms(min_val_mass_Z_1, max_val_mass_Z_1, min_val_mass_Z_2, max_val_mass_Z_2,

                    min_val_isolation, max_val_isolation,

                    min_val_sip3d, max_val_sip3d,

                    min_val_energy_1_1, max_val_energy_1_1,
                    min_val_energy_1_2, max_val_energy_1_2,
                    min_val_energy_2_1, max_val_energy_2_1,
                    min_val_energy_2_2, max_val_energy_2_2,

                    bins_mass_Z_1, bins_mass_Z_2, bins_mass_H):

    filtered_data = final_df[
        (final_df['mass_Z_1'] >= min_val_mass_Z_1) & (final_df['mass_Z_1'] <= max_val_mass_Z_1) &
        (final_df['mass_Z_2'] >= min_val_mass_Z_2) & (final_df['mass_Z_2'] <= max_val_mass_Z_2) &

        (final_df['isolation_1_1'] >= min_val_isolation) & (final_df['isolation_1_1'] <= max_val_isolation) &
        (final_df['isolation_1_2'] >= min_val_isolation) & (final_df['isolation_1_2'] <= max_val_isolation) &
        (final_df['isolation_2_1'] >= min_val_isolation) & (final_df['isolation_2_1'] <= max_val_isolation) &
        (final_df['isolation_2_2'] >= min_val_isolation) & (final_df['isolation_2_2'] <= max_val_isolation) &

        (final_df['energy_1_1'] >= min_val_energy_1_1) & (final_df['energy_1_1'] <= max_val_energy_1_1) &
        (final_df['energy_1_2'] >= min_val_energy_1_2) & (final_df['energy_1_2'] <= max_val_energy_1_2) &
        (final_df['energy_2_1'] >= min_val_energy_2_1) & (final_df['energy_2_1'] <= max_val_energy_2_1) &
        (final_df['energy_2_2'] >= min_val_energy_2_2) & (final_df['energy_2_2'] <= max_val_energy_2_2) &

        (final_df['sip3d_1_1'] >= min_val_sip3d) & (final_df['sip3d_1_1'] <= max_val_sip3d) &
        (final_df['sip3d_1_2'] >= min_val_sip3d) & (final_df['sip3d_1_2'] <= max_val_sip3d) &
        (final_df['sip3d_2_1'] >= min_val_sip3d) & (final_df['sip3d_2_1'] <= max_val_sip3d) &
        (final_df['sip3d_2_2'] >= min_val_sip3d) & (final_df['sip3d_2_2'] <= max_val_sip3d)
    ]

    fig, axs = plt.subplots(1, 3, figsize=(15, 5))

    m_2l_range_1 = (filtered_data['mass_Z_1'].min(), filtered_data['mass_Z_1'].max())
    axs[0].hist(filtered_data['mass_Z_1'], bins=bins_mass_Z_1, color='skyblue')
    axs[0].set_title('CERN Open Data CMS Run 2012 B, C', loc="left", fontweight="bold")
    axs[0].set_xlabel('$m_{Z_1}$ [GeV]', fontsize=16)
    axs[0].set_ylabel('$N_{Events}$' + f' / {round((m_2l_range_1[1] - m_2l_range_1[0])/bins_mass_Z_1, 2)} GeV', fontsize=16)
    axs[0].axvline(91, color='red', linestyle='dashed', linewidth=2, label='Z-Boson  ($91 \, GeV$)')
    axs[0].legend(fontsize=12)

    m_2l_range_2 = (filtered_data['mass_Z_2'].min(), filtered_data['mass_Z_2'].max())
    axs[1].hist(filtered_data['mass_Z_2'], bins=bins_mass_Z_2, color='salmon')
    axs[1].set_title('CERN Open Data CMS Run 2012 B, C', loc="left", fontweight="bold")
    axs[1].set_xlabel('$m_{Z_2}$ [GeV]', fontsize=16)
    axs[1].set_ylabel('$N_{Events}$' + f' / {round((m_2l_range_2[1] - m_2l_range_2[0])/bins_mass_Z_2, 2)} GeV', fontsize=16)

    m_4l_range = (70, 180)
    axs[2].hist(filtered_data['mass_H'], bins=bins_mass_H, color='lightgreen', range=m_4l_range)
    axs[2].set_title('CERN Open Data CMS Run 2012 B, C', loc="left", fontweight="bold")
    axs[2].set_title('$S=3.1$', loc="right")
    axs[2].set_xlabel('$m_{4l}$ [GeV]', fontsize=16)
    axs[2].set_ylabel('$N_{Events}$' + f' / {round((m_4l_range[1] - m_4l_range[0])/bins_mass_H, 2)} GeV', fontsize=16)
    axs[2].axvline(125, color='yellow', linestyle='dashed', linewidth=2, label=f'Higgs-Boson ({len(filtered_data[(filtered_data["mass_H"] > 120) & (filtered_data["mass_H"] < 130)])})', alpha=0.3)
    axs[2].axvline(120, color='grey', linestyle='dashed', linewidth=1, alpha=0.3)
    axs[2].axvline(130, color='grey', linestyle='dashed', linewidth=1, alpha=0.3)
    axs[2].axvline(91, color='purple', linestyle='dashed', linewidth=2, label=f'Z-Boson ({len(filtered_data[(filtered_data["mass_H"] > 91-5) & (filtered_data["mass_H"] < 130)])})', alpha=0.3)
    axs[2].axvline(91+5, color='grey', linestyle='dashed', linewidth=1)
    axs[2].axvline(92-5, color='grey', linestyle='dashed', linewidth=1)
    axs[2].legend(loc="upper right", fontsize=12)

    plt.tight_layout()
    plt.show()

# Define sliders for min and max values
min_slider_mass_Z_1 = widgets.FloatSlider(value=filter_settings["mass_Z_1"]["min"], min=final_df['mass_Z_1'].min(),
                                          max=final_df['mass_Z_1'].max(), step=0.01, description='Min of mass_Z_1:', style=dict(description_width='initial'))
max_slider_mass_Z_1 = widgets.FloatSlider(value=filter_settings["mass_Z_1"]["max"], min=final_df['mass_Z_1'].min(),
                                          max=final_df['mass_Z_1'].max(), step=0.01, description='Max of mass_Z_1:', style=dict(description_width='initial'))

min_slider_mass_Z_2 = widgets.FloatSlider(value=filter_settings["mass_Z_2"]["min"], min=final_df['mass_Z_2'].min(),
                                          max=final_df['mass_Z_2'].max(), step=0.01, description='Min of mass_Z_2:', style=dict(description_width='initial'))
max_slider_mass_Z_2 = widgets.FloatSlider(value=filter_settings["mass_Z_2"]["max"], min=final_df['mass_Z_2'].min(),
                                          max=final_df['mass_Z_2'].max(), step=0.01, description='Max of mass_Z_2:', style=dict(description_width='initial'))

max_iso = max(final_df['isolation_1_1'].max(), final_df['isolation_1_2'].max(), final_df['isolation_2_1'].max(), final_df['isolation_2_2'].max()) 
min_slider_iso = widgets.FloatSlider(value=filter_settings["isolation"]["min"], min=0,
                                         max=max_iso, step=0.01, description='Min of isolation:', style=dict(description_width='initial'))
max_slider_iso = widgets.FloatSlider(value=filter_settings["isolation"]["max"], min=0,
                                         max=max_iso, step=0.01, description='Max of isolation:', style=dict(description_width='initial'))

max_sip3d = max(final_df['sip3d_1_1'].max(), final_df['sip3d_1_2'].max(), final_df['sip3d_2_1'].max(), final_df['sip3d_2_2'].max()) 
min_slider_sip3d = widgets.FloatSlider(value=filter_settings["sip3d"]["min"], min=0,
                                            max=max_sip3d, step=0.01, description='Min of sip3d:', style=dict(description_width='initial'))
max_slider_sip3d = widgets.FloatSlider(value=filter_settings["sip3d"]["max"], min=0,
                                            max=max_sip3d, step=0.01, description='Max of sip3d:', style=dict(description_width='initial'))

min_slider_energy_1_1 = widgets.FloatSlider(value=filter_settings["energy_1_1"]["min"], min=final_df['energy_1_1'].min(),
                                            max=final_df['energy_1_1'].max(), step=0.01, description='Min of energy_1_1:', style=dict(description_width='initial'))
max_slider_energy_1_1 = widgets.FloatSlider(value=filter_settings["energy_1_1"]["max"], min=final_df['energy_1_1'].min(),
                                            max=final_df['energy_1_1'].max(), step=0.01, description='Max of energy_1_1:', style=dict(description_width='initial'))

min_slider_energy_1_2 = widgets.FloatSlider(value=filter_settings["energy_1_2"]["min"], min=final_df['energy_1_2'].min(),
                                            max=final_df['energy_1_2'].max(), step=0.01, description='Min of energy_1_2:', style=dict(description_width='initial'))
max_slider_energy_1_2 = widgets.FloatSlider(value=filter_settings["energy_1_2"]["max"], min=final_df['energy_1_2'].min(),
                                            max=final_df['energy_1_2'].max(), step=0.01, description='Max of energy_1_2:', style=dict(description_width='initial'))

min_slider_energy_2_1 = widgets.FloatSlider(value=filter_settings["energy_2_1"]["min"], min=final_df['energy_2_1'].min(),
                                            max=final_df['energy_2_1'].max(), step=0.01, description='Min of energy_2_1:', style=dict(description_width='initial'))
max_slider_energy_2_1 = widgets.FloatSlider(value=filter_settings["energy_2_1"]["max"], min=final_df['energy_2_1'].min(),
                                            max=final_df['energy_2_1'].max(), step=0.01, description='Max of energy_2_1:', style=dict(description_width='initial'))

min_slider_energy_2_2 = widgets.FloatSlider(value=filter_settings["energy_2_2"]["min"], min=final_df['energy_2_2'].min(),
                                            max=final_df['energy_2_2'].max(), step=0.01, description='Min of energy_2_2:', style=dict(description_width='initial'))
max_slider_energy_2_2 = widgets.FloatSlider(value=filter_settings["energy_2_2"]["max"], min=final_df['energy_2_2'].min(),
                                            max=final_df['energy_2_2'].max(), step=0.01, description='Max of energy_2_2:', style=dict(description_width='initial'))

# Define sliders for number of bins
bins_slider_mass_Z_1 = widgets.IntSlider(value=15, min=5, max=250, step=1, description='Bins for mass_Z_1:', style=dict(description_width='initial'))
bins_slider_mass_Z_2 = widgets.IntSlider(value=15, min=5, max=250, step=1, description='Bins for mass_Z_2:', style=dict(description_width='initial'))
bins_slider_mass_H = widgets.IntSlider(value=55, min=5, max=250, step=1, description='Bins for mass_H:', style=dict(description_width='initial'))

# Interactively filter histograms using sliders
interact(plot_histograms,
         min_val_mass_Z_1=min_slider_mass_Z_1, max_val_mass_Z_1=max_slider_mass_Z_1,
         min_val_mass_Z_2=min_slider_mass_Z_2, max_val_mass_Z_2=max_slider_mass_Z_2,

         min_val_isolation=min_slider_iso, max_val_isolation=max_slider_iso,

         min_val_sip3d=min_slider_sip3d, max_val_sip3d=max_slider_sip3d,

         min_val_energy_1_1=min_slider_energy_1_1, max_val_energy_1_1=max_slider_energy_1_1,
         min_val_energy_1_2=min_slider_energy_1_2, max_val_energy_1_2=max_slider_energy_1_2,
         min_val_energy_2_1=min_slider_energy_2_1, max_val_energy_2_1=max_slider_energy_2_1,
         min_val_energy_2_2=min_slider_energy_2_2, max_val_energy_2_2=max_slider_energy_2_2,

         bins_mass_Z_1=bins_slider_mass_Z_1, bins_mass_Z_2=bins_slider_mass_Z_2, bins_mass_H=bins_slider_mass_H)

pass

interactive(children=(FloatSlider(value=70.0, description='Min of mass_Z_1:', max=757.3105883222205, min=0.003…