In [1]:
import pandas as pd
import numpy as np
import ast
import math
import random
from scipy import stats
import pickle

In [2]:
# compute invariant masses from a datraframe with the event

Eminbb = 100.0
Eminaa = 120.0
Emaxbb = 140.0
Emaxaa = 130.0

def mbb(df):
    tmp = df[(df['type']==4) & (df['btag']>0)].sort_values(by=['btag'], ascending=False)
    if len(tmp) > 1:        
        pt0 = tmp.iloc[0]['pt']
        pt1 = tmp.iloc[1]['pt']
        eta0 = tmp.iloc[0]['eta']
        eta1 = tmp.iloc[1]['eta']
        phi0 = tmp.iloc[0]['phi']
        phi1 = tmp.iloc[1]['phi']
        mbb = math.sqrt( 2 * pt0 * pt1 * (math.cosh( eta0 - eta1 ) - math.cos( phi0 - phi1 )))
        if mbb > Eminbb and mbb < Emaxbb: 
            return mbb
        else:
            return -1
    else:
        return -1
    

def maa(df):
    tmp = df[df['type']==0].sort_values(by=['pt'], ascending=False)
    if len(tmp) > 1:        
        pt0 = tmp.iloc[0]['pt']
        pt1 = tmp.iloc[1]['pt']
        eta0 = tmp.iloc[0]['eta']
        eta1 = tmp.iloc[1]['eta']
        phi0 = tmp.iloc[0]['phi']
        phi1 = tmp.iloc[1]['phi']
        maa = math.sqrt( 2 * pt0 * pt1 * (math.cosh( eta0 - eta1 ) - math.cos( phi0 - phi1 )))
        if maa > Eminaa and maa < Emaxaa: 
            return maa
        else:
            return -1
    else:
        return -1   

In [3]:
# import lhco Delphes data 
my_file = open('signal.csv', "r")
data = my_file.read()
my_file.close()
data = ast.literal_eval(data)

# We create a list of dataframes, where in each dataframe we put an event
# There may be something more efficient, but for the time being this works fine
d = []
for i in range(len(data)):
    d.append(pd.DataFrame(data[i], columns=['index','type','eta','phi', 'pt','jmas','ntrk','btag','had/em','dum1','dum2']))

# Select ecents in signal which have both bb and $\gamma\gamma$
signal_events = []
for i in range(len(d)):
    #if i % 1000 == 0 : print(i)
    aa = maa(d[i])
    bb = mbb(d[i])
    if aa>0 and bb>0:
        signal_events.append([bb,aa])
print('We have selected',len(signal_events),'events from a total of',len(d),'events passing through the detector')

We have selected 43 events from a total of 457 events passing through the detector


In [4]:
# import lhco Delphes data 
my_file = open('background.csv', "r")
data = my_file.read()
my_file.close()
data = ast.literal_eval(data)

# We create a list of dataframes, where in each dataframe we put an event
# There may be something more efficient, but for the time being this works fine
d = []
for i in range(len(data)):
    d.append(pd.DataFrame(data[i], columns=['index','type','eta','phi', 'pt','jmas','ntrk','btag','had/em','dum1','dum2']))

# Select ecents in signal which have both bb and $\gamma\gamma$
background_events = []
for i in range(len(d)):
    #if i % 1000 == 0 : print(i)
    aa = maa(d[i])
    bb = mbb(d[i])
    if aa>0 and bb>0:
        background_events.append([bb,aa])
print('We have selected',len(background_events),'events from a total of',len(d),'events passing through the detector')

We have selected 2 events from a total of 885 events passing through the detector


In [5]:
lhcdata = {'signal': signal_events, 'background': background_events}

with open('lhcdata_test.pkl', 'wb') as f:
    pickle.dump(lhcdata, f)