In [5]:
# Import all the necessary packages
#import uproot  # check out the documentation about the basics at https://uproot.readthedocs.io/en/latest/basic.html
import os
import glob
import matplotlib.pyplot as plt
import awkward as ak
import datetime as dt
import numpy as np
import vector
import itertools
vector.register_awkward()
import re

In [6]:
#path to parquet files
DATA_DIR = "."
PATHS = list(set(glob.glob(os.path.join(DATA_DIR, '*.parquet'))))

In [7]:
#filtering function
def createFilter(data,minMu,minEl):
    mu_isolation_cut = ak.sum(abs(data.Muon_pfRelIso04_all) < 0.4, axis=1) == minMu
    muon_pt_cut = ak.sum(data.Muon_pt > 5, axis=1) ==minMu
    muon_eta_cut = ak.sum(abs(data.Muon_eta) < 2.4, axis=1) == minMu
    muon_sip_cut = ak.sum(abs(data.Muon_sip3d) < 4, axis=1) == minMu
    muon_dxy_cut = ak.sum(abs(data.Muon_dxy) < 0.5, axis=1) == minMu
    muon_dz_cut = ak.sum(abs(data.Muon_dz) < 1.0, axis=1) == minMu
    mu_cuts = mu_isolation_cut * muon_pt_cut * muon_eta_cut * muon_sip_cut * muon_dxy_cut * muon_dz_cut

    el_isolation_cut = ak.sum(abs(data.Electron_pfRelIso03_all) < 0.4, axis=1) == minEl
    el_pt_cut = ak.sum(data.Electron_pt > 7, axis=1) ==minEl
    el_eta_cut = ak.sum(abs(data.Electron_eta) < 2.5, axis=1) == minEl
    el_sip_cut = ak.sum(abs(data.Electron_sip3d) < 4, axis=1) == minEl
    el_dxy_cut = ak.sum(abs(data.Electron_dxy) < 0.5, axis=1) == minEl
    el_dz_cut = ak.sum(abs(data.Electron_dz) < 1.0, axis=1) == minEl
    el_cuts = el_isolation_cut * el_pt_cut * el_eta_cut * el_sip_cut * el_dxy_cut * el_dz_cut

    return mu_cuts * el_cuts

In [8]:
variable_dic={}
for inf in PATHS:

    #reading the files
    data=ak.from_parquet(inf)

    # E and mu impact parameters and their significance
    data['Muon_ip3d']=np.sqrt(data.Muon_dxy*data.Muon_dxy + data.Muon_dz*data.Muon_dz)
    data['Muon_sip3d']=data.Muon_ip3d/np.sqrt(data.Muon_dxyErr*data.Muon_dxyErr + data.Muon_dzErr*data.Muon_dzErr)
    data['Electron_ip3d']=np.sqrt(data.Electron_dxy*data.Electron_dxy + data.Electron_dz*data.Electron_dz)
    data['Electron_sip3d']=data.Electron_ip3d/np.sqrt(data.Electron_dxyErr*data.Electron_dxyErr + data.Electron_dzErr*data.Electron_dzErr)

    name=re.search("(\w*).parquet", inf)
    
    namepath1="hzz4mu"
    namepath2="hzz2e2mu"
    namepath3="hzz4e"
    
    #using the filtering function
    fourMu=createFilter(data,4,0)
    twoMuTwoE=createFilter(data,2,2)
    fourEl=createFilter(data,0,4)
    
    variable_dic[namepath1+"."+name.group()]=data[fourMu]#.to_list()
    variable_dic[namepath2+"."+name.group()]=data[twoMuTwoE]#.to_list()
    variable_dic[namepath3+"."+name.group()]=data[fourEl]#.to_list()

    # Let's make arrays for the three final states    
    #hzz4mu=data[fourMu]#.to_list()
    #hzz2e2mu=data[twoMuTwoE]#.to_list()
    #hzz4e=data[fourEl]#.to_list()
    
    
    # Sample sizes:
    print(name.group()+ "Higgs to ZZ to 4 muons sample size: %i" % len(variable_dic[namepath1+"."+name.group()]))
    print(name.group()+ "Higgs to ZZ to 2 electrons and 2 muons sample size: %i" % len(variable_dic[namepath2+"."+name.group()]))
    print(name.group()+ "Higgs to ZZ to 4 electrons sample size: %i" % len(variable_dic[namepath3+"."+name.group()]))
    
    #sumhzz4mu=0
    #sumhzz2mu2e=0
    #sumhzz4e=0
   # sumhzz4mu+=len(hzz4mu)
    #sumhzz2mu2e+=len(hzz2e2mu)
    #sumhzz4e+=len(hzz4e)

ZZTo2e2mu.parquetHiggs to ZZ to 4 muons sample size: 2
ZZTo2e2mu.parquetHiggs to ZZ to 2 electrons and 2 muons sample size: 116065
ZZTo2e2mu.parquetHiggs to ZZ to 4 electrons sample size: 24
SMHiggsToZZTo4L.parquetHiggs to ZZ to 4 muons sample size: 12218
SMHiggsToZZTo4L.parquetHiggs to ZZ to 2 electrons and 2 muons sample size: 21224
SMHiggsToZZTo4L.parquetHiggs to ZZ to 4 electrons sample size: 11044
ZZTo4e.parquetHiggs to ZZ to 4 muons sample size: 0
ZZTo4e.parquetHiggs to ZZ to 2 electrons and 2 muons sample size: 4
ZZTo4e.parquetHiggs to ZZ to 4 electrons sample size: 115714
ZZTo4mu.parquetHiggs to ZZ to 4 muons sample size: 150493
ZZTo4mu.parquetHiggs to ZZ to 2 electrons and 2 muons sample size: 128
ZZTo4mu.parquetHiggs to ZZ to 4 electrons sample size: 0


In [9]:
print(variable_dic.keys())
#variable_dic['hzz2e4mu.parquet'][0] esimene element 

dict_keys(['hzz4mu.ZZTo2e2mu.parquet', 'hzz2e2mu.ZZTo2e2mu.parquet', 'hzz4e.ZZTo2e2mu.parquet', 'hzz4mu.SMHiggsToZZTo4L.parquet', 'hzz2e2mu.SMHiggsToZZTo4L.parquet', 'hzz4e.SMHiggsToZZTo4L.parquet', 'hzz4mu.ZZTo4e.parquet', 'hzz2e2mu.ZZTo4e.parquet', 'hzz4e.ZZTo4e.parquet', 'hzz4mu.ZZTo4mu.parquet', 'hzz2e2mu.ZZTo4mu.parquet', 'hzz4e.ZZTo4mu.parquet'])


In [75]:
#e=variable_dic['hzz4mu.ZZTo4mu.parquet'] #why do i have to take just the first element? 
#print(variable_dic['hzz2e2mu.ZZTo4e.parquet'])

data=variable_dic['hzz4mu.SMHiggsToZZTo4L.parquet']
data2=variable_dic['hzz4e.SMHiggsToZZTo4L.parquet']
data3=variable_dic['hzz2e2mu.SMHiggsToZZTo4L.parquet']

#data['mus']=vector.zip({'pt':data.Muon_pt,'eta':data.Muon_eta,'phi':data.Muon_phi,'mass':data.Muon_mass})

In [30]:
x0 = data[0]
x0

<Record ... eta: -0.864, tau: 0.106}]} type='{"run": int32, "luminosityBlock": u...'>

In [31]:
#for loop over all events
#for e in data:
 #   m1=vector.obj(pt=e['Muon_pt'][0], phi=e['Muon_phi'][0],eta=e["Muon_eta"][0],mass=e['Muon_mass'][0])
  #  m2=vector.obj(pt=e['Muon_pt'][1], phi=e['Muon_phi'][1],eta=e["Muon_eta"][1],mass=e['Muon_mass'][1])
   # m3=vector.obj(pt=e['Muon_pt'][2], phi=e['Muon_phi'][2],eta=e["Muon_eta"][2],mass=e['Muon_mass'][2])
    #m4=vector.obj(pt=e['Muon_pt'][3], phi=e['Muon_phi'][3],eta=e["Muon_eta"][3],mass=e['Muon_mass'][3])

In [80]:
mus=[]
for e in data:
    for i in range(e['nMuon']):
        mus.append(vector.obj(pt=e['Muon_pt'][i], phi=e['Muon_phi'][i],eta=e["Muon_eta"][i],mass=e['Muon_mass'][i]))

In [78]:
els=[]
for e in data2:
    for i in range(e['nElectron']):
        els.append(vector.obj(pt=e['Electron_pt'][i], phi=e['Electron_phi'][i],eta=e["Electron_eta"][i],mass=e['Electron_mass'][i]))

In [81]:
mus

[vector.obj(pt=12.480432510375977, phi=0.34655192494392395, eta=-2.2683401107788086, mass=0.10565836727619171),
 vector.obj(pt=9.652972221374512, phi=0.6081116795539856, eta=-1.6260325908660889, mass=0.10565836727619171),
 vector.obj(pt=55.84819412231445, phi=0.34228694438934326, eta=-0.7817119359970093, mass=0.10565836727619171),
 vector.obj(pt=45.67310333251953, phi=2.689340829849243, eta=-0.976482629776001, mass=0.10565836727619171),
 vector.obj(pt=46.23818588256836, phi=-1.952662706375122, eta=-1.3364412784576416, mass=0.10565836727619171),
 vector.obj(pt=9.839960098266602, phi=-2.9735004901885986, eta=0.7828898429870605, mass=0.10565836727619171),
 vector.obj(pt=46.10374450683594, phi=-1.385257601737976, eta=-0.47814276814460754, mass=0.10565836727619171),
 vector.obj(pt=15.51797103881836, phi=1.4022605419158936, eta=0.04414963722229004, mass=0.10565836727619171),
 vector.obj(pt=14.632739067077637, phi=-2.6377451419830322, eta=-0.045658405870199203, mass=0.10565836727619171),
 vec