Attention: dealing with jets symmetries (quark)

Special Loss function: dealing with particle symmetries (top and anti-top)

Tensor Attention: endoding the symmetries

Input: unordered jets
Outpu: one head per particle

In [2]:
import h5py
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
import awkward as ak
import numpy as np

### Data Construction

In [3]:
#testing reading h5 file
filename = '/Users/con_np/Desktop/IRIS-HEP/SPANet/data/full_hadronic_ttbar/example.h5'
# with h5py.File(filename, 'r') as f:
#     print('Key:', f.keys())

f = h5py.File(filename, 'r')
print(f.keys())
f['INPUTS'].keys(), f['TARGETS'].keys()
f['INPUTS']['Source'].keys() # <KeysViewHDF5 ['MASK', 'btag', 'eta', 'mass', 'phi', 'pt']>

<KeysViewHDF5 ['INPUTS', 'TARGETS']>


<KeysViewHDF5 ['MASK', 'btag', 'eta', 'mass', 'phi', 'pt']>

In [4]:
list(f['INPUTS']['Source'].keys())
# f['INPUTS']['Source']['pt'].shape

['MASK', 'btag', 'eta', 'mass', 'phi', 'pt']

In [75]:
f['INPUTS']['Source']['MASK'][2], f['INPUTS']['Source']['btag'][2]

(array([ True,  True,  True,  True,  True,  True,  True,  True, False,
        False]),
 array([1., 0., 0., 0., 1., 1., 0., 0., 0., 0.], dtype=float32))

In [39]:
un = []
for label in f['TARGETS']['t1']['b'][:]:
    if label not in un:
        un.append(label)

print(un)
# what does each label represent

[2, 0, 1, 3, 4, 5, 6, -1]


In [59]:
event = 2
print('t1 (b,q1,q2):', f['TARGETS']['t1']['b'][event], f['TARGETS']['t1']['q1'][event], f['TARGETS']['t1']['q2'][event]) 
print('t2 (b,q1,q2):', f['TARGETS']['t2']['b'][event], f['TARGETS']['t2']['q1'][event], f['TARGETS']['t2']['q2'][event]) 

t1 (b,q1,q2): 0 -1 1
t2 (b,q1,q2): 4 6 3


In [46]:
#writing h5
file_path = 'https://xrootd-local.unl.edu:1094//store/user/AGC/nanoAOD/TT_TuneCUETP8M1_13TeV-powheg-pythia8/cmsopendata2015_ttbar_19980_PU25nsData2015v1_76X_mcRun2_asymptotic_v12_ext3-v1_00000_0000.root'
tree_name = 'Events'
# events = NanoEventsFactory.from_root({file_path: tree_name}, schemaclass=NanoAODSchema).events()
events = NanoEventsFactory.from_root(file_path, treepath=tree_name, schemaclass=NanoAODSchema).events()



In [61]:
#code from jetassignment_training
# events filtering
selected_electrons = events.Electron[(events.Electron.pt > 30) & (np.abs(events.Electron.eta)<2.1) & 
                                        (events.Electron.cutBased==4) & (events.Electron.sip3d < 4)]
selected_muons = events.Muon[(events.Muon.pt > 30) & (np.abs(events.Muon.eta)<2.1) & (events.Muon.tightId) & 
                                (events.Muon.sip3d < 4) & (events.Muon.pfRelIso04_all < 0.15)]
jet_filter = (events.Jet.pt > 30) & (np.abs(events.Jet.eta) < 2.4) & (events.Jet.isTightLeptonVeto)
selected_jets = events.Jet[jet_filter]
selected_genpart = events.GenPart
even = (events.event%2==0)
    
# single lepton requirement
event_filters = ((ak.count(selected_electrons.pt, axis=1) + ak.count(selected_muons.pt, axis=1)) == 1)
# require at least 4 jets
event_filters = event_filters & (ak.count(selected_jets.pt, axis=1) >= 4)
# require at least one jet above B_TAG_THRESHOLD
B_TAG_THRESHOLD = 0.5
event_filters = event_filters & (ak.sum(selected_jets.btagCSVV2 >= B_TAG_THRESHOLD, axis=1) >= 1)
    
# apply event filters
selected_electrons = selected_electrons[event_filters]
selected_muons = selected_muons[event_filters]
selected_jets = selected_jets[event_filters]
selected_genpart = selected_genpart[event_filters]
even = even[event_filters]
    
### only consider 4j2b (signal) region
region_filter = ak.sum(selected_jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) >= 2 # at least two b-tagged jets
selected_jets_region = selected_jets[region_filter][:,:10] # only keep top 10 jets
selected_electrons_region = selected_electrons[region_filter]
selected_muons_region = selected_muons[region_filter]
selected_genpart_region = selected_genpart[region_filter]
even = even[region_filter]

In [197]:
def pad_array(array, max_jets=10, pad_value=0):
    padded_array = ak.pad_none(array, max_jets, axis=1)
    padded_array_filled = ak.fill_none(padded_array, pad_value, axis=1)
    return padded_array_filled

In [198]:
#creating padded features for each event
Jets_pt_padded = pad_array(selected_jets_region.pt)
Jets_mass_padded = pad_array(selected_jets_region.mass)
Jets_eta_padded = pad_array(selected_jets_region.eta)
Jets_phi_padded = pad_array(selected_jets_region.phi)
Jets_btag_padded = pad_array(selected_jets_region.btagCSVV2)
Jets_qgl_padded = pad_array(selected_jets_region.qgl)

#lepton features (only electrons left)
lep_pt_padded = pad_array(selected_electrons_region.pt)
lep_mass_padded = pad_array(selected_electrons_region.mass)
lep_eta_padded = pad_array(selected_electrons_region.eta)
lep_phi_padded = pad_array(selected_electrons_region.phi)

In [199]:
#creating MASK for each event
pad_check = ak.pad_none(selected_jets_region.pt, 10, axis=1)
Jets_mask_padded = np.invert(ak.is_none(pad_check, axis=1))


In [200]:
#preparing sequential data for h5 file
jet_data = {
    'mask': Jets_mask_padded,
    'mass': Jets_mass_padded,
    'pt': Jets_pt_padded,
    'eta': Jets_eta_padded,
    'phi': Jets_phi_padded,
    'btag': Jets_btag_padded,
    'qgl': Jets_qgl_padded,
}

lep_data = {
    'mass':lep_mass_padded,
    'pt':lep_pt_padded,
    'eta':lep_eta_padded,
    'phi':lep_phi_padded
}

In [None]:
# t1 = {'b': ,'q1': ,'q2':}
# t2 = {'b': ,'mu': ,'nu':}

In [201]:
#creating h5 file
with h5py.File('test.h5', 'w') as h5file:
    #INPUTS
    input_group = h5file.create_group('INPUTS')
    seq_subgroup = input_group.create_group('SEQUENTIAL')
    global_subgroup = input_group.create_group('GLOBAL')

    jet_2subgroup = seq_subgroup.create_group('Jet')
    lept_2subgroup = seq_subgroup.create_group('Lepton')

    for key, value in jet_data.items():
        jet_2subgroup.create_dataset(key, data=ak.to_numpy(value))

    for key, value in lep_data.items():
        lept_2subgroup.create_dataset(key, data=ak.to_numpy(value))

    #EVENT
    event_group = h5file.create_group('EVENT')
    t1_subgroup = event_group.create_group('t1')
    t2_subgroup = event_group.create_group('t2')

    #PERMUTATION
    perm_group = h5file.create_group('PERMUTATION')

    #REGRESSION
    regression_group = h5file.create_group('REGRESSION')

    #CLASSIFICATION
    classification_group = h5file.create_group('CLASSIFICATION')

### Attention

### Loss

### Training

### Testing