In [1]:
import coffea
from git import Repo
import awkward as ak
import dask_awkward as dak
import vector
mod = "Prayag Yadav"
local_repo = Repo(path='coffea')
local_branch = local_repo.active_branch.name
print("_______________________________________")
print("\tCurrent Configuration")
print("---------------------------------------")
print("Coffea Version: ", coffea.__version__)
print("Branch: \t", local_branch)
print("Modified by: \t", mod)
print("_______________________________________")

_______________________________________
	Current Configuration
---------------------------------------
Coffea Version:  0.1.dev3583+ge06c4b8
Branch: 	 use_scikithep_vector
Modified by: 	 Prayag Yadav
_______________________________________


## What's the problem with zip?

In [2]:
test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZH_ecm240/events_101027117.root'
from coffea.nanoevents import NanoEventsFactory, FCCSchema
events = NanoEventsFactory.from_root(
    test_file+":events",
    entry_stop=100,
    schemaclass=FCCSchema,
    delayed=True #Delayed=False doesn't work, more info in later sections
).events()

Issue: coffea.nanoevents.methods.vector will be removed and replaced with scikit-hep vector. Nanoevents schemas internal to coffea will be migrated. Otherwise please consider using that package!.
  from coffea.nanoevents.methods import base, vector


In [3]:
events.fields

['AllMuonidx',
 'EFlowPhotonidx',
 'Photonidx',
 'MCRecoAssociationsidx',
 'Muonidx',
 'Particleidx',
 'Jetidx',
 'EFlowTrackidx',
 'EFlowNeutralHadronidx',
 'ReconstructedParticlesidx',
 'MissingETidx',
 'Electronidx',
 'ReconstructedParticles',
 'MCRecoAssociations',
 'MissingET',
 'ParticleIDs',
 'EFlowTrack',
 'EFlowNeutralHadron',
 'Particle',
 'EFlowPhoton',
 'Jet',
 'EFlowTrack_1']

### The idx\<n\> leaves were zipped together to form the idx collection

In [4]:
events.ReconstructedParticlesidx.fields

['ReconstructedParticlesidx0',
 'ReconstructedParticlesidx4',
 'ReconstructedParticlesidx5',
 'ReconstructedParticlesidx1',
 'ReconstructedParticlesidx2',
 'ReconstructedParticlesidx3']

### This is delayed and therefore doesnt produce any error at this stage, but its a error if we want to materialize or use this collection

In [5]:
events.ReconstructedParticlesidx.compute()

TypeError: size of array (0) is less than size of form (2739462)

### Its possible to get some of the individual leaves (capable only in delayed mode ofc)

In [6]:
events.ReconstructedParticlesidx.ReconstructedParticlesidx4.compute()

In [7]:
for idx in events.ReconstructedParticlesidx.fields:
    try :
        print(f"{idx} :", events.ReconstructedParticlesidx[idx].compute())
    except:
        print(f"{idx} leaf is corrupted")

ReconstructedParticlesidx0 : [[{index: 0, collectionID: 8}, {...}, ..., {index: 22, collectionID: 8}], ...]
ReconstructedParticlesidx4 : [[{index: -2, collectionID: -2}, ..., {index: -2, collectionID: -2}], ...]
ReconstructedParticlesidx5 : [[{index: -2, collectionID: -2}, ..., {index: -2, collectionID: -2}], ...]
ReconstructedParticlesidx1 : [[{index: 0, collectionID: 13}, {...}, ..., {index: 22, collectionID: 13}], ...]
ReconstructedParticlesidx2 leaf is corrupted
ReconstructedParticlesidx3 : [[{index: 0, collectionID: 11}, {...}, ..., {index: 22, collectionID: 11}], ...]


### Take the simplest example:

#### Case A : This is possible :

In [8]:
x = [
    [1,2,3],
    [4],
    []
]
y = [
    [5,2,4],
    [4],
    []
]
z = [
    [1,5,5],
    [7],
    []
]
#These arrays have same dimensions

In [9]:
P = ak.zip(
    {"x":x, "y":y, "z":z},
    with_name="Possible"
)
P

In [10]:
print(f"{P.x}\n{P.y}\n{P.z}")

[[1, 2, 3], [4], []]
[[5, 2, 4], [4], []]
[[1, 5, 5], [7], []]


#### Case B : This is NOT possible :

In [11]:
x = [
    [1,2],
    [4,5],
    []
]
y = [
    [5,2,4],
    [4],
    [6,7]
]
z = [
    [],
    [7,6],
    [5,5,5]
]
#These arrays have different dimensions in the "within events axis" but same number of events (3)

In [12]:
I = ak.zip(
    {"x":x, "y":y, "z":z},
    with_name="Impossible"
)
I

ValueError: cannot broadcast nested list

### Comparison with Reconstructed Particles

```
Initially I thought ReconstructedParticlesidx0, ReconstructedParticlesidx1, ReconstructedParticlesidx2, etc might have different lengths within the events
```

```
But some investigation showed that ReconstructedParticlesidx0, ReconstructedParticlesidx1, ReconstructedParticlesidx2, etc have the same number of events and same lengths within the event axis 
```

In [13]:
ev0 = dak.num(events.ReconstructedParticlesidx.ReconstructedParticlesidx0, axis = 0).compute()
n0 = dak.num(events.ReconstructedParticlesidx.ReconstructedParticlesidx0, axis = 1).compute()

print(f"No. of events for idx0 :\t {ev0}\nNum within events for idx0:\t {n0}" )

No. of events for idx0 :	 100000
Num within events for idx0:	 [23, 28, 38, 29, 22, 27, 32, 38, 17, ..., 23, 35, 23, 38, 33, 41, 17, 23, 37]


In [14]:
ev1 = dak.num(events.ReconstructedParticlesidx.ReconstructedParticlesidx5, axis = 0).compute()
n1 = dak.num(events.ReconstructedParticlesidx.ReconstructedParticlesidx5, axis = 1).compute()

print(f"No. of events for idx1 :\t {ev1}\nNum within events for idx1:\t {n1}" )

No. of events for idx1 :	 100000
Num within events for idx1:	 [23, 28, 38, 29, 22, 27, 32, 38, 17, ..., 23, 35, 23, 38, 33, 41, 17, 23, 37]


In [15]:
ak.sum(n1 == n0) # All True

np.int64(100000)

In [16]:
for idx in events.ReconstructedParticlesidx.fields:
    ev = dak.num(events.ReconstructedParticlesidx[idx], axis = 0).compute()
    n = dak.num(events.ReconstructedParticlesidx[idx], axis = 1).compute()
    print(f"No. of events for {idx} :\t {ev}\nNum within events for {idx}:\t {n}" )

No. of events for ReconstructedParticlesidx0 :	 100000
Num within events for ReconstructedParticlesidx0:	 [23, 28, 38, 29, 22, 27, 32, 38, 17, ..., 23, 35, 23, 38, 33, 41, 17, 23, 37]
No. of events for ReconstructedParticlesidx4 :	 100000
Num within events for ReconstructedParticlesidx4:	 [23, 28, 38, 29, 22, 27, 32, 38, 17, ..., 23, 35, 23, 38, 33, 41, 17, 23, 37]
No. of events for ReconstructedParticlesidx5 :	 100000
Num within events for ReconstructedParticlesidx5:	 [23, 28, 38, 29, 22, 27, 32, 38, 17, ..., 23, 35, 23, 38, 33, 41, 17, 23, 37]
No. of events for ReconstructedParticlesidx1 :	 100000
Num within events for ReconstructedParticlesidx1:	 [23, 28, 38, 29, 22, 27, 32, 38, 17, ..., 23, 35, 23, 38, 33, 41, 17, 23, 37]
No. of events for ReconstructedParticlesidx2 :	 100000
Num within events for ReconstructedParticlesidx2:	 [23, 28, 38, 29, 22, 27, 32, 38, 17, ..., 23, 35, 23, 38, 33, 41, 17, 23, 37]
No. of events for ReconstructedParticlesidx3 :	 100000
Num within events for Rec

### They are all the same, then, why cant i zip them?:Probably because some of the leaves are corrupted

In [17]:
idx = ak.zip({
    "idx0":events.ReconstructedParticlesidx.ReconstructedParticlesidx0,
    "idx1":events.ReconstructedParticlesidx.ReconstructedParticlesidx1,
    "idx2":events.ReconstructedParticlesidx.ReconstructedParticlesidx2,
    "idx3":events.ReconstructedParticlesidx.ReconstructedParticlesidx3,
    "idx4":events.ReconstructedParticlesidx.ReconstructedParticlesidx4,
    "idx5":events.ReconstructedParticlesidx.ReconstructedParticlesidx5,
},
       with_name="idx"
      )

In [18]:
idx.compute()

TypeError: size of array (0) is less than size of form (2739462)

### One peculiarity: the error string mentions '/offsets/AllMuon' towards the end....But, this is the ReconstructedParticle collection, it shouldnt use AllMuon Offsets. Sometimes the error string has '/offset/Jet/'. Maybe random incorrect offsets are being assigned here.

## Due to the previous issue, delayed=False is an error

In [19]:
test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/ee/generation/DelphesEvents/spring2021/IDEA/p8_ee_ZH_ecm240/events_101027117.root'
from coffea.nanoevents import NanoEventsFactory, FCCSchema
eager_events = NanoEventsFactory.from_root(
    test_file+":events",
    entry_stop=100,
    schemaclass=FCCSchema,
    delayed=False
).events()

TypeError: size of array (0) is less than size of form (505)

## Check the code [here](https://github.com/prayagyadav/coffea/blob/use_scikithep_vector/src/coffea/nanoevents/schemas/fcc.py#L69)

In [None]:
bcfjkdhaskjfdfkdjfk