# Reading the .root file and converting it to a .csv file

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from Dataset import Dataset

In [2]:
data_file = "./data/dataset_test.root"
dataset   = Dataset(fname=data_file)

In [105]:
# jet features
numberFatJets   = dataset.getNumberFatJets()
jetID           = dataset.getJetID()
jetArea         = dataset.getJetArea()
# jetVx           = dataset.getJetVx()  # these are all 0        
# jetVy           = dataset.getJetVy()  # these are all 0
# jetVz           = dataset.getJetVz()  # these are all 0
jetPx           = dataset.getJetPx()
jetPy           = dataset.getJetPy()
jetPz           = dataset.getJetPz()
jetE            = dataset.getJetE()
jetPolarPx      = dataset.getJetPolarPx()
jetPolarPy      = dataset.getJetPolarPy()
jetPolarPz      = dataset.getJetPolarPz()
jetPolarE       = dataset.getJetPolarE()
jetPhi          = dataset.getJetPhi()
jetTheta        = dataset.getJetTheta()

# particle features
particleType    = dataset.getParticleType()  
particleVx      = dataset.getParticleVx()
particleVy      = dataset.getParticleVy()
particleVz      = dataset.getParticleVz()
particlePx      = dataset.getParticlePx()
particlePy      = dataset.getParticlePy()
particlePz      = dataset.getParticlePz()
particleE       = dataset.getParticleE()
particlePolarPx = dataset.getParticlePolarPx()
particlePolarPy = dataset.getParticlePolarPy()
particlePolarPz = dataset.getParticlePolarPz()
particlePolarE  = dataset.getParticlePolarE()
particlePhi     = dataset.getParticlePhi()
particleTheta   = dataset.getParticleTheta()

The idea now is to create an event ID. Each entry of the above lists is an event. We then create two dataframes. The first one holds jet features and the second one holds particle features. Each event ID is repeated for each jet and particle in the event. Doing this, we then have a mapping between the jet and particle features and the event ID. We do not merge the two dataframes because they have a different underlying structure.

In [78]:
jet_columns      = ["evID", "jetID", "nParticles", "jetArea", "jetPx", "jetPy", "jetPz", "jetE", "jetPolarPx", "jetPolarPy", "jetPolarPz", "jetPolarE", "jetPhi", "jetTheta"]
particle_columns = ["evID", "jetID", "particleType", "particleVx", "particleVy", "particleVz", "particlePx", "particlePy", "particlePz", "particleE", "particlePolarPx", "particlePolarPy", 
                    "particlePolarPz", "particlePolarE", "particlePhi", "particleTheta"]

In [81]:
# create an event ID
ev_id = np.arange(0, len(jetID), 1, dtype=np.int16)

# create a dataframe
jet_df = pd.DataFrame(columns=jet_columns)

for ev in ev_id:
    jets = np.unique(jetID[ev])
    for i, jet in enumerate(jets):

        nParticles = len(np.array(particleType[ev])[np.array(jetID[ev]) == jet])
        
        # create a numpy array with one entry for each jet feature - this is the jet row for the dataframe
        jet_row = np.array([
            ev, jet, nParticles, jetArea[ev][i], jetPx[ev][i], jetPy[ev][i], jetPz[ev][i], jetE[ev][i], 
            jetPolarPx[ev][i], jetPolarPy[ev][i], jetPolarPz[ev][i], jetPolarE[ev][i], jetPhi[ev][i], jetTheta[ev][i]
        ])
        
        # append the jet row to the dataframe
        jet_df = jet_df.append(pd.DataFrame(jet_row.reshape(1, -1), columns=jet_columns), ignore_index=True)
        
# force evID and jetID to be integers
jet_df["evID"]       = jet_df["evID"].astype(np.int16)
jet_df["jetID"]      = jet_df["jetID"].astype(np.int16)
jet_df["nParticles"] = jet_df["nParticles"].astype(np.int16)
        
# save jet dataframe to a csv file
jet_df.to_csv("./data/jet_df.csv", index=False)

In [85]:
jet_df.head(10)

Unnamed: 0,evID,jetID,nParticles,jetArea,jetPx,jetPy,jetPz,jetE,jetPolarPx,jetPolarPy,jetPolarPz,jetPolarE,jetPhi,jetTheta
0,0,0,23,2.004635,-241.374752,0.754266,219.719568,330.240926,241.375931,0.816488,3.138468,50.200008,3.138468,0.832331
1,0,1,41,1.974715,178.037584,-5.547558,536.195118,566.15563,178.123993,1.821678,-0.031149,36.039368,-0.031149,0.32073
2,1,2,26,2.004635,157.606353,198.51181,-237.355351,348.306921,253.46933,-0.835698,0.899762,27.082243,0.899762,2.323376
3,1,3,20,2.024582,-141.06539,-96.179119,-883.349503,900.496654,170.733322,-2.345976,-2.543183,37.921139,-2.543183,2.950667
4,2,4,62,2.044528,56.939189,187.775177,-313.881867,372.725501,196.218216,-1.248802,1.276378,43.599136,1.276378,2.582897
5,2,5,35,1.994662,-6.843687,-176.672599,395.882462,434.952749,176.805099,1.545718,-1.609514,34.6544,-1.609514,0.420031
6,3,6,9,2.044528,-253.859634,12.243771,-564.509735,623.377403,254.154724,-1.538368,3.0934,73.03096,3.0934,2.718554
7,3,7,4,2.343728,221.186913,-62.356887,-144.815456,285.624219,229.808685,-0.594511,-0.274788,88.304283,-0.274788,2.133095
8,4,8,46,2.034555,-135.936301,272.007697,-199.728113,368.516492,304.083649,-0.616933,2.034245,58.704517,2.034245,2.151951
9,4,9,48,2.094395,157.338924,-256.537265,-1003.413453,1048.851891,300.943359,-1.919153,-1.020642,51.815296,-1.020642,2.85021


In [142]:
# create an event ID
ev_id = np.arange(0, len(jetID), 1, dtype=np.int16)

# create a dataframe
par_df = pd.DataFrame(columns=particle_columns)

for ev in ev_id:
    nPartInEvent = len(particleType[ev])
    nJets        = len(np.unique(jetID[ev]))
    nPartInJet   = list(np.unique(jetID[ev], return_counts=True)[1])

    for i, n in enumerate(nPartInJet):
        
        partJetID = np.unique(jetID[ev])[i]

        # for each particle in the jet create a row in the dataframe
        for p in range(n):
            
            offset = sum(nPartInJet[:i]) if i > 0 else 0
            p = offset + p
            # create a numpy array with one entry for each particle feature - this is the particle row for the dataframe
            par_row = np.array([
                ev, partJetID, particleType[ev][p], particleVx[ev][p], particleVy[ev][p], particleVz[ev][p], particlePx[ev][p], particlePy[ev][p], particlePz[ev][p], particleE[ev][p], 
                particlePolarPx[ev][p], particlePolarPy[ev][p], particlePolarPz[ev][p], particlePolarE[ev][p], particlePhi[ev][p], particleTheta[ev][p]
            ])

            # append the particle row to the dataframe
            par_df = par_df.append(pd.DataFrame(par_row.reshape(1, -1), columns=particle_columns), ignore_index=True)

            offset += n

                    # force evID and jetID to be integers
par_df["evID"]          = par_df["evID"].astype(np.int16)
par_df["jetID"]         = par_df["jetID"].astype(np.int16)
par_df["particleType"]  = par_df["particleType"].astype(np.int16)

# save jet dataframe to a csv file
jet_df.to_csv("./data/particle_df.csv", index=False)

In [140]:
par_df.head(10)

Unnamed: 0,evID,jetID,particleType,particleVx,particleVy,particleVz,particlePx,particlePy,particlePz,particleE,particlePolarPx,particlePolarPy,particlePolarPz,particlePolarE,particlePhi,particleTheta
0,0,0,0,0.0,0.0,0.0,-115.595071,5.513218,107.093643,157.675996,115.726471,0.82763,3.093935,0.2347607,3.093935,0.824122
1,0,0,0,0.0,0.0,0.0,-83.072377,4.831796,75.798599,112.561324,83.212776,0.816948,3.083494,0.5078805,3.083494,0.831991
2,0,0,-211,-0.981025,1.422285,-33.456345,-11.168506,-8.774579,9.043395,16.838385,14.203125,0.600055,-2.475661,0.1395264,-2.475661,1.003814
3,0,0,130,0.073932,0.089866,-2.399344,-8.233158,-1.087632,6.64721,10.637351,8.304688,0.732994,-3.010249,-1.192093e-07,-3.010249,0.895801
4,0,0,-211,0.073905,0.089409,-2.399101,-8.048296,0.478376,6.0979,10.109785,8.0625,0.698202,3.082224,0.1395264,3.082224,0.923257
5,0,0,211,0.073976,0.091909,-2.399079,-4.639545,0.100107,4.272678,6.309568,4.640625,0.824183,3.120019,0.1395264,3.120019,0.826655
6,0,0,130,0.073932,0.089866,-2.399344,-1.395184,0.060261,0.712286,1.567648,1.396484,0.490188,3.098427,-0.0,3.098427,1.099136
7,0,0,130,0.073932,0.089866,-2.399344,-1.196659,0.621947,1.991598,2.40526,1.348633,1.181799,2.662281,0.0,2.662281,0.595233
8,0,0,-211,0.072526,0.100102,-2.39462,-1.259653,-0.173054,0.731444,1.473483,1.271484,0.547502,-3.005065,0.1395264,-3.005065,1.048761
9,0,0,211,0.073819,0.088003,-2.401674,-1.133641,0.06906,1.100915,1.587889,1.135742,0.859523,3.080749,0.1395264,3.080749,0.800968
