# Newstyle EDM4HEPSchema showcase

## Load the events

In [37]:
import awkward

In [1]:
fcc_edm4hep_test_file = "../coffea-fcc-analyses/data/edm4hep1/p8_ee_WW_ecm240_edm4hep.root"
edm4hep_test_file = "coffea/tests/samples/edm4hep.root"

from coffea.nanoevents import NanoEventsFactory, BaseSchema, EDM4HEPSchema, FCC

# EDM4HEP root file with BaseSchema
edm4hep_base_events = NanoEventsFactory.from_root(
    edm4hep_test_file+":events",
    entry_stop=100,
    schemaclass=BaseSchema,
    delayed = False,
    uproot_options={"filter_name": lambda x: "PARAMETERS" not in x}
).events()

# EDM4HEP root file with EDM4HEPSchema
edm4hep_events = NanoEventsFactory.from_root(
    edm4hep_test_file+":events",
    entry_stop=100,
    schemaclass=EDM4HEPSchema,
    delayed = False,
    uproot_options={"filter_name": lambda x: "PARAMETERS" not in x}
).events()

# FCC EDM4HEP1 root file with BaseSchema
fcc_base_events = NanoEventsFactory.from_root(
    fcc_edm4hep_test_file+":events",
    entry_stop=100,
    schemaclass=BaseSchema,
    delayed = False,
    uproot_options={"filter_name": lambda x: "PARAMETERS" not in x}
).events()

# FCC EDM4HEP1 root file with newstyle edm4hep FCCSchema
fcc_events = NanoEventsFactory.from_root(
    fcc_edm4hep_test_file+":events",
    entry_stop=100,
    schemaclass=FCC.get_schema("latest"),
    delayed = False,
    uproot_options={"filter_name": lambda x: "PARAMETERS" not in x}
).events()

Issue: coffea.nanoevents.methods.vector will be removed and replaced with scikit-hep vector. Nanoevents schemas internal to coffea will be migrated. Otherwise please consider using that package!.
  from coffea.nanoevents.methods import vector
 skipping ...


In [2]:
edm4hep_base_events.fields
# edm4hep_events.fields
# fcc_base_events.fields
# fcc_events.fields

['CaloHitContributionCollection/CaloHitContributionCollection.PDG',
 'CaloHitContributionCollection/CaloHitContributionCollection.energy',
 'CaloHitContributionCollection/CaloHitContributionCollection.time',
 'CaloHitContributionCollection/CaloHitContributionCollection.stepPosition.x',
 'CaloHitContributionCollection/CaloHitContributionCollection.stepPosition.y',
 'CaloHitContributionCollection/CaloHitContributionCollection.stepPosition.z',
 '_CaloHitContributionCollection_particle/_CaloHitContributionCollection_particle.index',
 '_CaloHitContributionCollection_particle/_CaloHitContributionCollection_particle.collectionID',
 'CaloHitMCParticleLinkCollection/CaloHitMCParticleLinkCollection.weight',
 '_CaloHitMCParticleLinkCollection_from/_CaloHitMCParticleLinkCollection_from.index',
 '_CaloHitMCParticleLinkCollection_from/_CaloHitMCParticleLinkCollection_from.collectionID',
 '_CaloHitMCParticleLinkCollection_to/_CaloHitMCParticleLinkCollection_to.index',
 '_CaloHitMCParticleLinkCollecti

In [3]:
# edm4hep_base_events.fields
edm4hep_events.fields
# fcc_base_events.fields
# fcc_events.fields

['CaloHitContributionCollection',
 'CaloHitMCParticleLinkCollection',
 'CaloHitSimCaloHitLinkCollection',
 'CalorimeterHitCollection',
 'ClusterCollection',
 'ClusterMCParticleLinkCollection',
 'EventHeader',
 'GPDoubleKeys',
 'GPDoubleValues',
 'GPFloatKeys',
 'GPFloatValues',
 'GPIntKeys',
 'GPIntValues',
 'GPStringKeys',
 'GPStringValues',
 'GeneratorEventParametersCollection',
 'GeneratorPdfInfoCollection',
 'MCParticleCollection',
 'ParticleIDCollection',
 'RawCalorimeterHitCollection',
 'RawTimeSeriesCollection',
 'RecDqdxCollection',
 'RecoMCParticleLinkCollection',
 'ReconstructedParticleCollection',
 'SimCalorimeterHitCollection',
 'SimTrackerHitCollection',
 'TimeSeriesCollection',
 'TrackCollection',
 'TrackMCParticleLinkCollection',
 'TrackerHit3DCollection',
 'TrackerHitPlaneCollection',
 'TrackerHitSimTrackerHitLinkCollection',
 'VertexCollection',
 'VertexRecoParticleLinkCollection']

In [4]:
# edm4hep_base_events.fields
# edm4hep_events.fields
fcc_base_events.fields
# fcc_events.fields

['CalorimeterHits/CalorimeterHits.cellID',
 'CalorimeterHits/CalorimeterHits.energy',
 'CalorimeterHits/CalorimeterHits.energyError',
 'CalorimeterHits/CalorimeterHits.time',
 'CalorimeterHits/CalorimeterHits.position.x',
 'CalorimeterHits/CalorimeterHits.position.y',
 'CalorimeterHits/CalorimeterHits.position.z',
 'CalorimeterHits/CalorimeterHits.type',
 'EFlowNeutralHadron/EFlowNeutralHadron.type',
 'EFlowNeutralHadron/EFlowNeutralHadron.energy',
 'EFlowNeutralHadron/EFlowNeutralHadron.energyError',
 'EFlowNeutralHadron/EFlowNeutralHadron.position.x',
 'EFlowNeutralHadron/EFlowNeutralHadron.position.y',
 'EFlowNeutralHadron/EFlowNeutralHadron.position.z',
 'EFlowNeutralHadron/EFlowNeutralHadron.positionError.values[6]',
 'EFlowNeutralHadron/EFlowNeutralHadron.iTheta',
 'EFlowNeutralHadron/EFlowNeutralHadron.phi',
 'EFlowNeutralHadron/EFlowNeutralHadron.directionError.x',
 'EFlowNeutralHadron/EFlowNeutralHadron.directionError.y',
 'EFlowNeutralHadron/EFlowNeutralHadron.directionError.

In [5]:
# edm4hep_base_events.fields
# edm4hep_events.fields
# fcc_base_events.fields
fcc_events.fields

['CalorimeterHits',
 'EFlowNeutralHadron',
 'EFlowPhoton',
 'EFlowTrack',
 'EFlowTrack_L',
 'EFlowTrack_dNdx',
 'Electron_IsolationVar',
 'Electron_objIdx',
 'EventHeader',
 'GPDoubleKeys',
 'GPDoubleValues',
 'GPFloatKeys',
 'GPFloatValues',
 'GPIntKeys',
 'GPIntValues',
 'GPStringKeys',
 'GPStringValues',
 'Jet',
 'MCRecoAssociations',
 'Muon_IsolationVar',
 'Muon_objIdx',
 'Particle',
 'ParticleIDs',
 'Photon_IsolationVar',
 'Photon_objIdx',
 'ReconstructedParticles',
 'TrackerHits',
 'magFieldBz']

## EDM4HEP has four types of Mappings that required attention:


- Vector Members
- OneToOneRelations
- OneToManyRelations
- Links
<img title="edm4hep" alt="edm4hep" src="https://kjvbrt.org/fcc/presentations/2024/edm4hep-in-fccana/img/edm4hep_diagram.png">

## Vector Members

- The description of a datatype in the edm4hep.yaml can have one or more vector members
- Each Vector Member is stored in the root file with three related branches:
    - A begin branch that stores the start index of the vector
    - An end branch that stores the end index of the vector
    - The branch that stores the actual elements of all the elements
  
### For example:
The `ParticleID` datatype is defined as follows in the edm4hep.yaml:
```yaml
  edm4hep::ParticleID:
    Description:  "ParticleID"
    Author: "EDM4hep authors"
    Members:
      - int32_t   type           // userdefined type
      - int32_t   PDG            // PDG code of this id - ( 999999 ) if unknown
      - int32_t   algorithmType  // type of the algorithm/module that created this hypothesis
      - float likelihood     // likelihood of this hypothesis - in a user defined normalization
    VectorMembers:
      - float parameters     // parameters associated with this hypothesis
    OneToOneRelations:
      - edm4hep::ReconstructedParticle particle // the particle from which this PID has been computed
```
Which shows the definition of one vector member called 'parameters'

### And this is saved in the root files as three branches:

In [6]:
parameters_begin = edm4hep_base_events['ParticleIDCollection/ParticleIDCollection.parameters_begin']
parameters_begin

In [7]:
parameters_end = edm4hep_base_events['ParticleIDCollection/ParticleIDCollection.parameters_end']
parameters_end

In [8]:
parameters_elements = edm4hep_base_events['_ParticleIDCollection_parameters']
parameters_elements

### Transforms defined for EDM4HEPSchema, convert begin and end branches to ranges of indices and map the elements branch to the indices, effective rebuilding the vectors by combining the three branches 

In [9]:
parameters = edm4hep_events.ParticleIDCollection.parameters
parameters

In [10]:
# And in the case of fcc samples
fcc_parameters = fcc_events.ParticleIDs.parameters
fcc_parameters

## OneToOneRelations

- OneToOneRelations, denoted by a single black arrow connecting two datatypes in the edm4hep graph, is a direct one to one mapping between the two datatypes

### For example:
The `ReconstructedParticle` datatype is defined with a OneToOneRelation with the Vertex collection:
```yaml
  edm4hep::ReconstructedParticle:
    Description: "Reconstructed Particle"
    Author: "EDM4hep authors"
    Members:
      - int32_t                PDG            // PDG of the reconstructed particle.
      - float                  energy [GeV]    // energy of the reconstructed particle. Four momentum state is not kept consistent internally
      - edm4hep::Vector3f      momentum [GeV]  //  particle momentum. Four momentum state is not kept consistent internally
      - edm4hep::Vector3f      referencePoint [mm] // reference, i.e. where the particle has been measured
      - float                  charge         // charge of the reconstructed particle
      - float                  mass  [GeV]    //  mass of the reconstructed particle, set independently from four vector. Four momentum state is not kept consistent internally
      - float                  goodnessOfPID  // overall goodness of the PID on a scale of [0;1]
      - edm4hep::CovMatrix4f   covMatrix      // covariance matrix of the reconstructed particle 4vector
    OneToOneRelations:
      - edm4hep::Vertex          decayVertex    // decay vertex for the particle (if it is a composite particle)
    OneToManyRelations:
      - edm4hep::Cluster               clusters     // clusters that have been used for this particle
      - edm4hep::Track                 tracks       // tracks that have been used for this particle
      - edm4hep::ReconstructedParticle particles    // reconstructed particles that have been combined to this particle
    ExtraCode:
      includes: "#include <edm4hep/Constants.h>"
      declaration: "
      bool isCompound() const { return particles_size() > 0 ;}\n
      [[deprecated(\"use setPDG instead\")]]\n
      int32_t getType() const { return getPDG(); }\n
      /// Get the four momentum covariance matrix value for the two passed dimensions\n
      float getCovMatrix(edm4hep::FourMomCoords dimI, edm4hep::FourMomCoords dimJ) const { return getCovMatrix().getValue(dimI, dimJ); }\n
      "
    MutableExtraCode:
      includes: "#include <edm4hep/Constants.h>"
      declaration: "
      //vertex where the particle decays. This method actually returns the start vertex from the first daughter particle found.\n
      //TODO: edm4hep::Vertex  getEndVertex() { return  edm4hep::Vertex(  (getParticles(0).isAvailable() ? getParticles(0).getStartVertex() :  edm4hep::Vertex(0,0) ) ) ; }\n
      [[deprecated(\"use setPDG instead\")]]\n
      void setType(int32_t pdg) { setPDG(pdg); }\n
      /// Set the four momentum covariance matrix value for the two passed dimensions\n
      void setCovMatrix(float value, edm4hep::FourMomCoords dimI, edm4hep::FourMomCoords dimJ) { getCovMatrix().setValue(value, dimI, dimJ); }\n
```

And this is stored as `index` and `collectionID` branches in the root files.

In [11]:
vertex_index = edm4hep_base_events[
'_ReconstructedParticleCollection_decayVertex/_ReconstructedParticleCollection_decayVertex.index'
]
vertex_index

In [12]:
vertex_collectionID = edm4hep_base_events[
'_ReconstructedParticleCollection_decayVertex/_ReconstructedParticleCollection_decayVertex.collectionID'
]
vertex_index

### EDM4HEPSchema saves OneToOneRelations into the parent collection along with the global index counter part of index.
### EDM4HEPSchema also provides methods:
- `List_Relations` to list all the sub-branches associated with a OneToOneRelation or a OneToManyRelation
- `Map_Relation` to actually map the relation

In [13]:
edm4hep_events.ReconstructedParticleCollection.List_Relations

{'clusters_idx_ClusterCollection_collectionID',
 'clusters_idx_ClusterCollection_index',
 'clusters_idx_ClusterCollection_index_Global',
 'decayVertex_idx_VertexCollection_collectionID',
 'decayVertex_idx_VertexCollection_index',
 'decayVertex_idx_VertexCollection_index_Global',
 'particles_idx_ReconstructedParticleCollection_collectionID',
 'particles_idx_ReconstructedParticleCollection_index',
 'particles_idx_ReconstructedParticleCollection_index_Global',
 'tracks_idx_TrackCollection_collectionID',
 'tracks_idx_TrackCollection_index',
 'tracks_idx_TrackCollection_index_Global'}

In [14]:
edm4hep_events.ReconstructedParticleCollection.Map_Relation(generic_name='decayVertex', target_name='VertexCollection')

## OneToManyRelations

- OneToManyRelations, denoted by a multiple black arrows connecting two datatypes in the edm4hep graph, is a one to many mapping between the two datatypes.
- Each parent datatype may point to multiple elements in the target datatype. Consequently, OneToManyRelations can be expressed by providing vector indices to the daughter datatype.
- Each OneToManyRelation is stored in the root files with four branches:
    - begin branch (similar to VectorMembers), that stores the start index to the actual target indices
    - end branch (similar to VectorMembers), that stores the end index to the actual target indices
    - index branch that stores the actual target indices
    - collectionID branch that stores collectionID of the target datatype

### For example:
- As seen in the previous example, the `ReconstructedParticle` datatype also features OneToManyRelations to `Cluster`, `Track` and to itself
- Lets look at the track relation

The track OneToManyRelation is stored with these branches:

In [15]:
tracks_begin = edm4hep_base_events['ReconstructedParticleCollection/ReconstructedParticleCollection.tracks_begin']
tracks_begin

In [16]:
tracks_end = edm4hep_base_events['ReconstructedParticleCollection/ReconstructedParticleCollection.tracks_end']
tracks_end

In [17]:
track_indices = edm4hep_base_events['_ReconstructedParticleCollection_tracks/_ReconstructedParticleCollection_tracks.index']
track_indices

In [18]:
track_collectionID = edm4hep_base_events[
'_ReconstructedParticleCollection_tracks/_ReconstructedParticleCollection_tracks.collectionID'
]
track_collectionID

#### EDM4HEPSchema has methods to create vectors of indices by combining the begin, end and indices

In [19]:
edm4hep_events.ReconstructedParticleCollection.List_Relations

{'clusters_idx_ClusterCollection_collectionID',
 'clusters_idx_ClusterCollection_index',
 'clusters_idx_ClusterCollection_index_Global',
 'decayVertex_idx_VertexCollection_collectionID',
 'decayVertex_idx_VertexCollection_index',
 'decayVertex_idx_VertexCollection_index_Global',
 'particles_idx_ReconstructedParticleCollection_collectionID',
 'particles_idx_ReconstructedParticleCollection_index',
 'particles_idx_ReconstructedParticleCollection_index_Global',
 'tracks_idx_TrackCollection_collectionID',
 'tracks_idx_TrackCollection_index',
 'tracks_idx_TrackCollection_index_Global'}

In [20]:
edm4hep_events.ReconstructedParticleCollection.tracks_idx_TrackCollection_index

To map the relation, one can once again use the `Map_Relation` method:

In [21]:
edm4hep_events.ReconstructedParticleCollection.Map_Relation('tracks','TrackCollection')

## Links

- Links, represented by violet arrays in the edm4hep graph, are special OneToOneRelations that relate a subset of indices between two datatypes (unlike usual OneToOneRelations where each element of the parent datatype has a counterpart in the target datatype

#### For example:
The `RecoMCParticleLink` is defined in the edm4hep.yaml as:
```yaml
edm4hep::RecoMCParticleLink:
    Description: "Link between a ReconstructedParticle and the corresponding MCParticle"
    Author: "EDM4hep authors"
    Members:
      - float weight                        // weight of this link
    OneToOneRelations:
     - edm4hep::ReconstructedParticle  from  // reference to the reconstructed particle
     - edm4hep::MCParticle to              // reference to the Monte-Carlo particle
    ExtraCode:
      declaration: "
      [[deprecated(\"use getFrom instead\")]] edm4hep::ReconstructedParticle getRec() const;\n
      [[deprecated(\"use getTo instead\")]] edm4hep::MCParticle getSim() const;\n"
      implementation: "
      edm4hep::ReconstructedParticle {name}::getRec() const { return getFrom(); }\n
      edm4hep::MCParticle {name}::getSim() const { return getTo(); }\n"
    MutableExtraCode:
      declaration: "
      [[deprecated(\"use setFrom instead\")]]\n
      void setRec(const edm4hep::ReconstructedParticle& rec);\n
      [[deprecated(\"use setTo instead\")]]\n
      void setSim(const edm4hep::MCParticle& sim);\n
      "
      implementation: "
      void {name}::setSim(const edm4hep::MCParticle& sim) { setTo(sim); }\n
      void {name}::setRec(const edm4hep::ReconstructedParticle& rec) { setFrom(rec); }\n
      "
```
Note that this format has changed in [#373](https://github.com/key4hep/EDM4hep/pull/373) in EDM4HEP


A Link is stored with 5 branches, and the 5 branches for `RecoMCParticleLink` are :

In [22]:
link_weight = edm4hep_base_events['RecoMCParticleLinkCollection/RecoMCParticleLinkCollection.weight']
link_weight

In [23]:
link_from_reco_index = edm4hep_base_events['_RecoMCParticleLinkCollection_from/_RecoMCParticleLinkCollection_from.index']
link_from_reco_index

In [24]:
link_from_reco_collectionID = edm4hep_base_events['_RecoMCParticleLinkCollection_from/_RecoMCParticleLinkCollection_from.collectionID']
link_from_reco_collectionID

In [25]:
link_to_MC_index = edm4hep_base_events['_RecoMCParticleLinkCollection_to/_RecoMCParticleLinkCollection_to.index']
link_to_MC_index

In [26]:
link_to_MC_collectionID = edm4hep_base_events['_RecoMCParticleLinkCollection_to/_RecoMCParticleLinkCollection_to.collectionID']
link_to_MC_collectionID

#### All of these 4 branches (along with global indices) are zipped into a single collection in EDM4HEPSchema

In [27]:
edm4hep_events.RecoMCParticleLinkCollection

One can get the MC Particle corresponding to a reconstructed particle with:

#### Start with combining ReconstructedParticle collection and the Link_from_ReconstructedParticleCollection

In [39]:
R = edm4hep_events.ReconstructedParticleCollection
R['Link_from_ReconstructedParticleCollection']  = edm4hep_events.RecoMCParticleLinkCollection.Link_from_ReconstructedParticleCollection

In [40]:
R.fields

['E',
 'PDG',
 'charge',
 'clusters_idx_ClusterCollection_collectionID',
 'clusters_idx_ClusterCollection_index',
 'clusters_idx_ClusterCollection_index_Global',
 'covMatrix',
 'decayVertex_idx_VertexCollection_collectionID',
 'decayVertex_idx_VertexCollection_index',
 'decayVertex_idx_VertexCollection_index_Global',
 'goodnessOfPID',
 'mass',
 'particles_idx_ReconstructedParticleCollection_collectionID',
 'particles_idx_ReconstructedParticleCollection_index',
 'particles_idx_ReconstructedParticleCollection_index_Global',
 'px',
 'py',
 'pz',
 'referencePoint',
 'tracks_idx_TrackCollection_collectionID',
 'tracks_idx_TrackCollection_index',
 'tracks_idx_TrackCollection_index_Global',
 'Link_from_ReconstructedParticleCollection']

#### Now apply the cuts and selections that one wants to apply

In [42]:
R = R[R.energy >  10 ] # reconstructed particles with energy > 10 GeV
R

### Now get the MCParticle indices corresponding to the resulting reconstructed particles

In [47]:
mask = R.Link_from_ReconstructedParticleCollection.index == edm4hep_events.RecoMCParticleLinkCollection.Link_from_ReconstructedParticleCollection.index
Link = edm4hep_events.RecoMCParticleLinkCollection[mask]
MC_index_global = Link.Link_to_MCParticleCollection.index_Global
MC_index_global

### Finally, apply the global indices to MCParticleCollection to get the final MCParticles

In [50]:
matched_mc = edm4hep_events.MCParticleCollection._apply_global_index(MC_index_global)
matched_mc

### Note: Due to various issues like unknown collectionID of collections, mismatching shapes of link branches, I think I cannot add simplified methods like 'Map_Link' to facilitate mapping the links easily. But I do think that this could improved in Schemas that derive from EDM4Schema (For example FCCSchema newstyle)