# Exploring econdata 

In [1]:
import uproot
import awkward as ak
import numpy as np
import pandas as pd

In [2]:
fname = "/ecoderemdvol/EleGun/EPGun-PU200/econ_ntuples/econ_ntuple.root"
dir = 'FloatingpointAutoEncoderEMDAEMSEttbarDummyHistomaxGenmatchGenclustersntuple'
ev_dict = uproot.open(fname)[dir+'/HGCalTriggerNtuple']
#ev_dict.show()

In [3]:
arrays_toread = [
    "econ_index","econ_data",
    "econ_subdet","econ_zside","econ_layer","econ_waferu","econ_waferv","econ_wafertype",
    "tc_simenergy",
    "tc_subdet","tc_zside","tc_layer","tc_waferu","tc_waferv","tc_wafertype",
    "gen_pt","gen_energy","gen_eta","gen_phi",
    "genpart_pt","genpart_energy",
]
events = ev_dict.arrays(arrays_toread)

econ = ak.zip({
    "index": events['econ_index'],
    "data": events["econ_data"],
    "subdet": events["econ_subdet"],
    "zside": events["econ_zside"],
    "layer": events["econ_layer"],
    "waferu": events["econ_waferu"],
    "waferv": events["econ_waferv"],
})
tc = ak.zip({
    "simenergy": events["tc_simenergy"],
    "subdet": events["tc_subdet"],
    "zside": events["tc_zside"],
    "layer": events["tc_layer"],
    "waferu": events["tc_waferu"],
    "waferv": events["tc_waferv"],
})
gen = ak.zip({
    "pt": events["gen_pt"],
    "energy": events["gen_energy"],
    "eta": events["gen_eta"],
    "phi": events["gen_phi"],
})

# find wafers that we want to save
# the problem is that the number of wafers from trigger cells: trigger cells/48 
# is not the same as the number of wafers from econ data: econ_data/16
df_tc = ak.to_pandas(tc)
df_econ = ak.to_pandas(econ)
df_gen = ak.to_pandas(gen)

df_simtotal = df_tc.groupby(['entry','subdet','zside','layer','waferu','waferv'])["simenergy"].sum()
df_econ.index.names
df_econ.reset_index(inplace=True)
df_econ.set_index(['entry','subdet','zside','layer','waferu','waferv'],inplace=True)
df_econ['simenergy'] = df_simtotal
df_econ.drop(columns='subentry',inplace=True)
print(df_econ['simenergy'][df_econ['simenergy'] >0])
df_econ_wsimenergy = df_econ[df_econ.simenergy > 0]
df_econ_wsimenergy = df_econ_wsimenergy.rename(columns={"index": "econ_index", "data": "econ_data", "simenergy": "wafer_energy"})
df_econ_wsimenergy.reset_index(inplace=True)
df_econ_wsimenergy.set_index(['entry'],inplace=True)
df=df_econ_wsimenergy
df['WaferEntryIdx'] = (df.layer*10000 + df.waferu*100 + df.waferv)*df.zside
dfTrainData = df.pivot_table(index='WaferEntryIdx',columns='econ_index',values='econ_data').fillna(0).astype(int)
dfTrainData.columns = [f'CALQ_{i}' for i in range(16)]

dfTrainData[['subdet','zside','layer','waferu','waferv','wafer_energy']] = df.groupby(['WaferEntryIdx'])[['subdet','zside','layer','waferu','waferv','wafer_energy']].mean()
dfTrainData[['subdet','zside','layer','waferu','waferv']] = dfTrainData[['subdet','zside','layer','waferu','waferv']].astype(int)
dfTrainData.reset_index(inplace=True)
dfTrainData.drop(columns='WaferEntryIdx')

entry  subdet  zside  layer  waferu  waferv
0      2       1      30     -2      -3          0.258525
                                     -3          0.258525
                                     -3          0.258525
                                     -3          0.258525
                                     -3          0.258525
                                                  ...    
       1       1      11     -2      -4        109.955444
                                     -4        109.955444
                                     -4        109.955444
                                     -4        109.955444
                                     -4        109.955444
Name: simenergy, Length: 2752, dtype: float32


Unnamed: 0,CALQ_0,CALQ_1,CALQ_2,CALQ_3,CALQ_4,CALQ_5,CALQ_6,CALQ_7,CALQ_8,CALQ_9,...,CALQ_12,CALQ_13,CALQ_14,CALQ_15,subdet,zside,layer,waferu,waferv,wafer_energy
0,0,192,64,128,128,0,128,128,64,128,...,128,64,128,64,2,-1,33,1,2,0.054336
1,0,64,64,0,192,0,0,64,64,64,...,128,64,192,192,2,-1,32,3,5,0.062769
2,0,128,64,64,128,0,128,128,192,128,...,128,64,64,128,2,-1,32,3,4,0.052978
3,0,192,64,128,128,0,192,128,64,64,...,64,64,64,128,2,-1,31,2,4,0.074107
4,0,128,128,128,128,0,64,64,128,128,...,128,128,128,128,2,-1,30,3,5,0.036578
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,0,192,320,64,64,0,128,64,128,64,...,192,64,64,64,2,1,29,-3,-5,0.333403
168,0,192,64,64,128,0,192,64,128,128,...,64,128,64,128,2,1,30,-2,-4,0.212168
169,0,64,128,64,128,0,128,64,128,64,...,128,64,192,128,2,1,30,-2,-3,0.258525
170,0,192,128,128,128,0,64,192,192,64,...,64,128,64,64,2,1,30,-1,-2,0.114087


In [4]:
dfTrainData

Unnamed: 0,WaferEntryIdx,CALQ_0,CALQ_1,CALQ_2,CALQ_3,CALQ_4,CALQ_5,CALQ_6,CALQ_7,CALQ_8,...,CALQ_12,CALQ_13,CALQ_14,CALQ_15,subdet,zside,layer,waferu,waferv,wafer_energy
0,-330102,0,192,64,128,128,0,128,128,64,...,128,64,128,64,2,-1,33,1,2,0.054336
1,-320305,0,64,64,0,192,0,0,64,64,...,128,64,192,192,2,-1,32,3,5,0.062769
2,-320304,0,128,64,64,128,0,128,128,192,...,128,64,64,128,2,-1,32,3,4,0.052978
3,-310204,0,192,64,128,128,0,192,128,64,...,64,64,64,128,2,-1,31,2,4,0.074107
4,-300305,0,128,128,128,128,0,64,64,128,...,128,128,128,128,2,-1,30,3,5,0.036578
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,289695,0,192,320,64,64,0,128,64,128,...,192,64,64,64,2,1,29,-3,-5,0.333403
168,299796,0,192,64,64,128,0,192,64,128,...,64,128,64,128,2,1,30,-2,-4,0.212168
169,299797,0,64,128,64,128,0,128,64,128,...,128,64,192,128,2,1,30,-2,-3,0.258525
170,299898,0,192,128,128,128,0,64,192,192,...,64,128,64,64,2,1,30,-1,-2,0.114087
