In [1]:
# # Run this once to install mplhep:
# !pip install mplhep --user 
# !pip install --user awkward1
# !pip install pyarrow --user

In [2]:
import uproot4 as uproot
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import mplhep as hep
import pandas as pd
import awkward as ak
import logging
plt.style.use(hep.style.ATLAS)  # or ATLAS/LHCb

In [3]:
class HistManager:
    def __init__(self):
        self.histograms = {}
    

    def clear(self):
        self.histograms = {}

    def add(self, df, name, x, cuts=[], nbins=100, range=[0,1], density=False):
        
        
        #branch = df[varString].ravel()
    
        mask = np.ones(len(x), dtype=bool)

        for cut in cuts:
            if len(mask) != len(cut):
                print("Error: existing bitmask and new cut bitmask have different sizes!")

            mask = np.bitwise_and(np.array(mask), np.array(cut))
        
        if name not in self.histograms:
            print(f"Creating histogram named {name} ... ")
            self.histograms[name] = np.histogram(x[mask], nbins, range, density=density)
        else:
            (hist, bins) = np.histogram(x[mask], nbins, range)
            newhist = np.sum([hist, self.histograms[name][0]], axis=0)
            self.histograms[name] = (newhist, self.histograms[name][1])
    
    def get_all(self):
        return self.histograms

    def get(self, name):
        return self.histograms[name]

    def get_hist(self, name):
        return self.histograms[name][0]
    
    def get_bins(self, name):
        return self.histograms[name][1]

def HCALEnergy(row, eta_range=[-1,1]):
    # Primary theta and phi
    primary_theta = row["PrimaryTheta"]
    primary_phi = row["PrimaryPhi"]
    
    primary_eta = -np.log(np.tan(primary_theta/2.0))
    
    if (primary_eta < eta_range[0] or primary_eta > eta_range[1]):
        return -1.0
    
    dRmax = 1.0
    TotalHCAL = 0.0

    for system in ["HcalBarrel"]: #, "HcalHadronEndcap", "HcalElectronEndcap"]:
        try:
            cl_energy = row[f"{system}Clusters.energy"]
            cl_theta  = row[f"{system}Clusters.polar.theta"]
            cl_phi    = row[f"{system}Clusters.polar.phi"]
            deltaR = np.sqrt( np.power(cl_theta - primary_theta,2) + np.power(cl_phi - primary_phi, 2))
            TotalHCAL += np.sum(cl_energy[deltaR < dRmax])
        except:
            pass

        
    return TotalHCAL

def ECALEnergy(row, eta_range=[-1,1]):
    # Primary theta and phi
    primary_theta = row["PrimaryTheta"]
    primary_phi = row["PrimaryPhi"]
    
    primary_eta = -np.log(np.tan(primary_theta/2.0))
    
    if (primary_eta < eta_range[0] or primary_eta > eta_range[1]):
        return -1.0
        
    dRmax = 1.0
    TotalECAL = 0.0
    
    for system in ["EcalBarrel"]: #, "EcalEndcap", "CrystalEcal"]:
        try:
            cl_energy = row[f"{system}Clusters.energy"]
            cl_theta  = row[f"{system}Clusters.polar.theta"]
            cl_phi    = row[f"{system}Clusters.polar.phi"]
            deltaR = np.sqrt( np.power(cl_theta - primary_theta,2) + np.power(cl_phi - primary_phi, 2))
            TotalECAL += np.sum(cl_energy[deltaR < dRmax])
        except:
            pass
        
    return TotalECAL


def PrimaryTrackTheta(row):
    mc_ID = row["mcparticles2.pdgID"]
    mc_status = row["mcparticles2.genStatus"]
    mc_vx = row["mcparticles2.psx"]
    mc_vy = row["mcparticles2.psy"]
    mc_vz = row["mcparticles2.psz"]
    
    isThrown = mc_status == 1
    thrown_ID = mc_ID[isThrown]
    theta = np.arctan2(np.sqrt(mc_vx[isThrown]**2 + mc_vy[isThrown]**2), mc_vz[isThrown])
    phi = np.arctan2(mc_vy[isThrown],mc_vx[isThrown])
    
    
    return theta[0]

def PrimaryTrackPhi(row):
    mc_ID = row["mcparticles2.pdgID"]
    mc_status = row["mcparticles2.genStatus"]
    mc_vx = row["mcparticles2.psx"]
    mc_vy = row["mcparticles2.psy"]
    mc_vz = row["mcparticles2.psz"]
    
    isThrown = mc_status == 1
    thrown_ID = mc_ID[isThrown]
    theta = np.arctan2(np.sqrt(mc_vx[isThrown]**2 + mc_vy[isThrown]**2), mc_vz[isThrown])
    phi = np.arctan2(mc_vy[isThrown],mc_vx[isThrown])
    
    return phi[0]

def PrimaryTrackID(row):
    mc_ID = row["mcparticles2.pdgID"]
    mc_status = row["mcparticles2.genStatus"]
    
    isThrown = mc_status == 1
    thrown_ID = mc_ID[isThrown]
    
    return thrown_ID[0]

def GoodClusters(energy = np.array([]), theta = np.array([]), phi = np.array([])):
    
    return (energy != 0) & (theta != 0) & (phi != 0)
    

def ElectronID(row):
    # Find the highest-energy cluster and the energy in a region around it
    
    MaxCluster_E = 0.0
    MaxCluster_Phi = -999
    MaxCluster_Theta = -999
    IsolationEnergy = -999
    
    # Barrrel energies
    for system in ["EcalBarrel"]: #, "EcalEndcap", "CrystalEcal"]:
        cl_energy = row[f"{system}Clusters.energy"]
        cl_theta  = row[f"{system}Clusters.polar.theta"]
        cl_phi    = row[f"{system}Clusters.polar.phi"]

        if len(cl_energy) == 0:
            continue

        good = GoodClusters(cl_energy, cl_theta, cl_phi)

        if len(cl_energy[good]) == 0:
            continue

        NewMaxCluster_E = np.max(cl_energy[good])
        if NewMaxCluster_E > MaxCluster_E:
            MaxCluster_E = NewMaxCluster_E
            index = np.where(cl_energy == MaxCluster_E)
            MaxCluster_Phi = cl_phi[index][0]
            MaxCluster_Theta = cl_theta[index][0]
    
    if MaxCluster_E > 0.0:

        # Compute the isolation energy
        IsolationEnergy = 0.0
        for system in ["EcalBarrel"]: #, "EcalEndcap", "CrystalEcal"]:
            if len(row[f"{system}Clusters.energy"]) == 0:
                continue

            cl_energy = row[f"{system}Clusters.energy"]
            cl_theta  = row[f"{system}Clusters.polar.theta"]
            cl_phi    = row[f"{system}Clusters.polar.phi"]

            good = GoodClusters(cl_energy, cl_theta, cl_phi)
            
            if len(cl_energy[good]) == 0:
                continue
            
            for index, cluster_energy in enumerate(cl_energy[good]):
                if cluster_energy == 0.0:
                    continue

                cl_eta = -np.log(np.tan(cl_theta[good][index]/2))
                max_cl_eta = -np.log(np.tan(MaxCluster_Theta/2))
                
                DeltaR = np.sqrt((cl_phi[good][index] - MaxCluster_Phi)**2 + (cl_eta - max_cl_eta)**2)
                if DeltaR < 1.0 and DeltaR > 0.1:
                    IsolationEnergy += cluster_energy

    return pd.Series((MaxCluster_Theta, MaxCluster_Phi, IsolationEnergy))



def ProcessFiles(files={}, branches=[], hist_manager=None, name="", eta_range=[-1,1], output_arrays = None):
    count = 0
    for batch in uproot.iterate([files], 
                            branches, 
                            step_size = '1000 MB',
                            library="np"):

        df = pd.DataFrame(batch)
        #df["PrimaryID"]     = df.apply(PrimaryTrackID, axis=1)
        #df["PrimaryTheta"]  = df.apply(PrimaryTrackTheta, axis=1)
        #df["PrimaryPhi"]    = df.apply(PrimaryTrackPhi,  axis=1)
        #df["ECALEnergyFraction"] = df.apply(ECALEnergyFraction, axis=1)
        df["ECALEnergy"] = df.apply(ECALEnergy, eta_range=eta_range, axis=1)  # apply function ECALEnergy to each row. 
        df["HCALEnergy"] = df.apply(HCALEnergy, eta_range=eta_range, axis=1)
        df["ECALEnergyFraction"] = df["ECALEnergy"]/(df["ECALEnergy"] + df["HCALEnergy"])
        
        df[["ElectronTheta", "ElectronPhi", "ElectronIsoE"]] = df.apply(ElectronID, axis=1)
    
        hist_manager.add(df, f"{name}ECALEnergyFraction", x=df["ECALEnergyFraction"], cuts=[(df["ECALEnergy"] + df["HCALEnergy"]) > 0], nbins=100, range=[0,1.0], density=True)
        hist_manager.add(df, f"{name}ECALEnergy", x=df["ECALEnergy"]/1000.0, cuts=[], nbins=100, range=[0,10.0], density=True)
        hist_manager.add(df, f"{name}HCALEnergy", x=df["HCALEnergy"]/1000.0, cuts=[], nbins=100, range=[0,10.0], density=True)
        count += 1
        
        if output_arrays is not None:
            for key in output_arrays:
                output_arrays[key] = np.concatenate([output_arrays[key], df[key]])
        


# Barrel Study

This part focuses on electrons and pions at 5 GeV/c fired into the barrel region, $\eta=[-1,1]$.

In [4]:
testfile = uproot.open("/home/rdhill13/projects/rrg-wdconinc/wdconinc/EIC/campaigns/reco/SINGLE/e-/5GeV/130to177deg/e-_5GeV_130to177deg.0020.root")
testfile.keys()

testtree = testfile['events;1']
for i in range(len(testtree)):
    print(testtree[i])


<TBranchElement 'EcalEndcapNClusters' (21 subbranches) at 0x2abd9b54b8e0>
<TBranchElement 'EcalEndcapNClusters#0' (2 subbranches) at 0x2abd9b59ab50>
<TBranchElement 'EcalEndcapNClusters#1' (2 subbranches) at 0x2abd9b5e3820>
<TBranchElement 'EcalEndcapPClusters' (21 subbranches) at 0x2abd9b5eb490>
<TBranchElement 'EcalEndcapPClusters#0' (2 subbranches) at 0x2abd9decb610>
<TBranchElement 'EcalEndcapPClusters#1' (2 subbranches) at 0x2abd9ded82b0>
<TBranchElement 'EcalBarrelClusters' (19 subbranches) at 0x2abd9dedd730>
<TBranchElement 'EcalBarrelClusters#0' (2 subbranches) at 0x2abd9df0d5b0>
<TBranchElement 'EcalBarrelClusters#1' (2 subbranches) at 0x2abd9df15250>
<TBranchElement 'EcalBarrelLayers' (15 subbranches) at 0x2abd9df1beb0>
<TBranchElement 'EcalBarrelLayers#0' (2 subbranches) at 0x2abd9df586d0>
<TBranchElement 'HcalBarrelClusters' (21 subbranches) at 0x2abd9df64370>
<TBranchElement 'HcalBarrelClusters#0' (2 subbranches) at 0x2abd9dfb14f0>
<TBranchElement 'HcalBarrelClusters#1' (2

In [5]:
hcal = testtree['HcalBarrelClusters']
ecal = testtree['EcalBarrelClusters']
hcal.keys(), ecal.keys()

(['HcalBarrelClusters.energy',
  'HcalBarrelClusters.edep',
  'HcalBarrelClusters.nhits',
  'HcalBarrelClusters.position.x',
  'HcalBarrelClusters.position.y',
  'HcalBarrelClusters.position.z',
  'HcalBarrelClusters.positionError.cov_xx',
  'HcalBarrelClusters.positionError.cov_yy',
  'HcalBarrelClusters.positionError.cov_zz',
  'HcalBarrelClusters.positionError.cov_xy',
  'HcalBarrelClusters.positionError.cov_xz',
  'HcalBarrelClusters.positionError.cov_yz',
  'HcalBarrelClusters.polar.r',
  'HcalBarrelClusters.polar.theta',
  'HcalBarrelClusters.polar.phi',
  'HcalBarrelClusters.cl_theta',
  'HcalBarrelClusters.cl_phi',
  'HcalBarrelClusters.clusters_begin',
  'HcalBarrelClusters.clusters_end',
  'HcalBarrelClusters.hits_begin',
  'HcalBarrelClusters.hits_end'],
 ['EcalBarrelClusters.nhits',
  'EcalBarrelClusters.energy',
  'EcalBarrelClusters.edep',
  'EcalBarrelClusters.radius',
  'EcalBarrelClusters.skewness',
  'EcalBarrelClusters.leakcorr',
  'EcalBarrelClusters.eta',
  'EcalBa

In [6]:
#sample_dir = "EIC/reconstruction_benchmarks/2021-06-19-HCAL-BRL-FWD"

# e_files = {f"{sample_dir}/rec_electron_5k-p1000MEV-[0-9].root": "events", 
#           f"{sample_dir}/rec_electron_5k-p1000MEV-[0-9][0-9].root": "events",
#           f"{sample_dir}/rec_electron_5k-p1000MEV-1[0-9][0-9].root": "events"
#           }
# pi_files = {f"{sample_dir}/rec_pion+_5k-p1000MEV-[0-9].root": "events",
#            f"{sample_dir}/rec_pion+_5k-p1000MEV-[0-9][0-9].root": "events",
#            f"{sample_dir}/rec_pion+_5k-p1000MEV-1[0-9][0-9].root": "events"
#            }

e_files = {f"/home/rdhill13/projects/rrg-wdconinc/wdconinc/EIC/campaigns/reco/SINGLE/e-/5GeV/130to177deg/e-_5GeV_130to177deg.0020.root": "events"} # reco -> full
pi_files = {f"/home/rdhill13/projects/rrg-wdconinc/wdconinc/EIC/campaigns/reco/SINGLE/pi+/5GeV/45to135deg/pi+_5GeV_45to135deg.0020.root": "events"}

branches = ['EcalBarrelClusters.energy', 'HcalBarrelClusters.energy']
#             'EcalEndcapClusters.energy', 'HcalHadronEndcapClusters.energy', 'HcalElectronEndcapClusters.energy', 'CrystalEcalClusters.energy',
#             'EcalBarrelClusters.polar.theta', 'HcalBarrelClusters.polar.theta',
#             'EcalEndcapClusters.polar.theta', 'HcalHadronEndcapClusters.polar.theta', 'HcalElectronEndcapClusters.polar.theta', 'CrystalEcalClusters.polar.theta',
#             'EcalBarrelClusters.polar.phi', 'HcalBarrelClusters.polar.phi',
#             'EcalEndcapClusters.polar.phi', 'HcalHadronEndcapClusters.polar.phi', 'HcalElectronEndcapClusters.polar.phi', 'CrystalEcalClusters.polar.phi',
#             'mcparticles2.pdgID', 'mcparticles2.genStatus',
#             'mcparticles2.psx', 'mcparticles2.psy', 'mcparticles2.psz']

hm = HistManager()
hm.clear()

data_arrays = {}
data_arrays["PrimaryID"] = np.array([])
data_arrays["ECALEnergyFraction"] = np.array([])
data_arrays["ECALEnergy"] = np.array([])
data_arrays["HCALEnergy"] = np.array([])
data_arrays["ElectronTheta"] = np.array([])
data_arrays["ElectronPhi"] = np.array([])
data_arrays["ElectronIsoE"] = np.array([])

bak_data_arrays = data_arrays.copy()
fwd_data_arrays = data_arrays.copy()


ProcessFiles(e_files, branches, hm, "e", eta_range=[-1,1], output_arrays=data_arrays)
ProcessFiles(pi_files, branches, hm, "pi", eta_range=[-1,1], output_arrays=data_arrays)

KeyError: 'PrimaryTheta'