In [56]:
import coffea
from git import Repo
import awkward as ak
import dask_awkward as dak
mod = "Prayag Yadav"
local_repo = Repo(path='coffea')
local_branch = local_repo.active_branch.name
print("_______________________________________")
print("\tCurrent Configuration")
print("---------------------------------------")
print("Coffea Version: ", coffea.__version__)
print("Branch: \t", local_branch)
print("Modified by: \t", mod)
print("_______________________________________")

_______________________________________
	Current Configuration
---------------------------------------
Coffea Version:  0.1.dev3583+ge06c4b8
Branch: 	 master
Modified by: 	 Prayag Yadav
_______________________________________


## The real problem: Branches with mismatching offsets can't be zipped together

In [57]:
from coffea.nanoevents import NanoEventsFactory, BaseSchema
test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZH_ecm240/events_101027117.root'
events = NanoEventsFactory.from_root(
    test_file+":events",
    entry_stop=100,
    schemaclass = BaseSchema,
    delayed = True
).events()

In [58]:
events.fields

['Electron',
 'Electron#0',
 'Electron#0/Electron#0.index',
 'Electron#0/Electron#0.collectionID',
 'Muon',
 'Muon#0',
 'Muon#0/Muon#0.index',
 'Muon#0/Muon#0.collectionID',
 'AllMuon',
 'AllMuon#0',
 'AllMuon#0/AllMuon#0.index',
 'AllMuon#0/AllMuon#0.collectionID',
 'EFlowNeutralHadron',
 'EFlowNeutralHadron/EFlowNeutralHadron.type',
 'EFlowNeutralHadron/EFlowNeutralHadron.energy',
 'EFlowNeutralHadron/EFlowNeutralHadron.energyError',
 'EFlowNeutralHadron/EFlowNeutralHadron.position.x',
 'EFlowNeutralHadron/EFlowNeutralHadron.position.y',
 'EFlowNeutralHadron/EFlowNeutralHadron.position.z',
 'EFlowNeutralHadron/EFlowNeutralHadron.positionError[6]',
 'EFlowNeutralHadron/EFlowNeutralHadron.iTheta',
 'EFlowNeutralHadron/EFlowNeutralHadron.phi',
 'EFlowNeutralHadron/EFlowNeutralHadron.directionError.x',
 'EFlowNeutralHadron/EFlowNeutralHadron.directionError.y',
 'EFlowNeutralHadron/EFlowNeutralHadron.directionError.z',
 'EFlowNeutralHadron/EFlowNeutralHadron.shapeParameters_begin',
 'EFlo

### Some of the indexed branches are empty with [0,0,0,0,0...] as the offset and some have different offsets

#### Branches

In [59]:
r0 = events['ReconstructedParticles#0/ReconstructedParticles#0.index'].compute()
r1 = events['ReconstructedParticles#1/ReconstructedParticles#1.index'].compute()
r2 = events['ReconstructedParticles#2/ReconstructedParticles#2.index'].compute()
r3 = events['ReconstructedParticles#3/ReconstructedParticles#3.index'].compute()
r4 = events['ReconstructedParticles#4/ReconstructedParticles#4.index'].compute()
r5 = events['ReconstructedParticles#5/ReconstructedParticles#5.index'].compute()
print('r0 is ', r0)

r0 is  [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ..., 14, 15, 16, 17, 18, 19, 20, 21, 22], ...]


#### Offsets of the branches

In [60]:
i = 0
for r in [r0,r1,r2,r3,r4,r5]:
    classform, length, data = ak.to_buffers(r)
    print(f"r{i} offset: {data['node0-offsets']}")
    i += 1

r0 offset: [      0      23      51 ... 2739402 2739425 2739462]
r1 offset: [      0      37      85 ... 3318360 3318378 3318424]
r2 offset: [0 0 0 ... 0 0 0]
r3 offset: [      0      37      85 ... 3318360 3318378 3318424]
r4 offset: [      0      60     136 ... 6057762 6057803 6057886]
r5 offset: [      0      60     136 ... 6057762 6057803 6057886]


#### Here (r1 and r3) and (r4 and r5) have same offsets, therefore they can be zipped together: 

In [61]:
ak.zip(
    {'r0':r1,'r2':r3},
    with_name='test'
)

In [62]:
ak.zip(
    {'r4':r4,'r5':r5},
    with_name='test'
)

#### r0 and r2 have different offsets and cant be zipped with any other branch

In [63]:
ak.zip(
    {'r0':r0,'r1':r1},
    with_name='test'
)

ValueError: cannot broadcast nested list

#### Similarly, r0,r1,r2,r3,r4,r5 (all ReconstuctedParticle#n branches) cant be zipped together

In [64]:
ak.zip(
    {'r0':r0, 'r1':r1, 'r2':r2, 'r3':r3, 'r4':r4, 'r5':r5},
    with_name='test'
)

ValueError: cannot broadcast nested list

## Solution: A Suitable schema that doesnt zip indexed branches

#### Added a wrapper class FCC to choose variants of the fccschema

In [65]:
from coffea.nanoevents import NanoEventsFactory, FCC

In [66]:
# By default returns the latest version of the schema that does not zip the indexed branches
# Works in both delayed (delayed=True) and in eager mode (delayed=False)
FCC.get_schema() #same as FCC.get_schema(version = "latest", zip_missing=True)

coffea.nanoevents.schemas.fcc.FCCSchema

In [68]:
# Returns the latest version of the schema that zips the indexed branches
# Works only in delayed (delayed=True)
# Empty branches get corrupted
FCC.get_schema(version = "latest", zip_missing=True)

coffea.nanoevents.schemas.fcc.FCCSchema_zip_missing

In [69]:
test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZH_ecm240/events_101027117.root'
events = NanoEventsFactory.from_root(
    test_file+":events",
    entry_stop=100,
    schemaclass= FCC.get_schema(),
    delayed = False
).events()

In [70]:
events

In [71]:
events.fields

['AllMuonidx0',
 'EFlowNeutralHadron',
 'EFlowNeutralHadronidx0',
 'EFlowNeutralHadronidx1',
 'EFlowNeutralHadronidx2',
 'EFlowPhoton',
 'EFlowPhotonidx0',
 'EFlowPhotonidx1',
 'EFlowPhotonidx2',
 'EFlowTrack',
 'EFlowTrackidx0',
 'EFlowTrackidx1',
 'Electronidx0',
 'Jet',
 'Jetidx0',
 'Jetidx1',
 'Jetidx2',
 'Jetidx3',
 'Jetidx4',
 'Jetidx5',
 'MCRecoAssociations',
 'MCRecoAssociationsidx0',
 'MCRecoAssociationsidx1',
 'MissingET',
 'MissingETidx0',
 'MissingETidx1',
 'MissingETidx2',
 'MissingETidx3',
 'MissingETidx4',
 'MissingETidx5',
 'Muonidx0',
 'Particle',
 'ParticleIDs',
 'Particleidx0',
 'Particleidx1',
 'Photonidx0',
 'ReconstructedParticles',
 'ReconstructedParticlesidx0',
 'ReconstructedParticlesidx1',
 'ReconstructedParticlesidx2',
 'ReconstructedParticlesidx3',
 'ReconstructedParticlesidx4',
 'ReconstructedParticlesidx5']

In [72]:
events.ReconstructedParticles.fields

['E',
 'charge',
 'clusters_begin',
 'clusters_end',
 'covMatrix[10]',
 'goodnessOfPID',
 'mass',
 'particleIDs_begin',
 'particleIDs_end',
 'particles_begin',
 'particles_end',
 'px',
 'py',
 'pz',
 'referencePoint.x',
 'referencePoint.y',
 'referencePoint.z',
 'tracks_begin',
 'tracks_end',
 'type']

In [73]:
events.ReconstructedParticlesidx0

In [74]:
events.ReconstructedParticlesidx2

In [75]:
events.ReconstructedParticlesidx1.fields

['collectionID', 'index']

In [76]:
events.ReconstructedParticlesidx1.index

In [77]:
Muons = events.ReconstructedParticles.match_collection(events.Muonidx0)
Muons

In [78]:
Muons.absolute_mass

## Check the code [here](https://github.com/prayagyadav/coffea/blob/master/src/coffea/nanoevents/schemas/fcc.py)