In [1]:
import os
import pickle
import argparse
import uproot
import awkward as ak
import hist
import pandas as pd

In [7]:
class Args:
    input_file = '../CLUE_clusters_single.root'
    output_file = '/home/llr/cms/cuisset/hgcal/testbeam18/clue3d-dev/src/plots/cache/hists.pkl'
args = Args()

In [8]:
for array_i in uproot.iterate(args.input_file + ":clusters", step_size="100MB", library="ak"):
    array = array_i
    break

In [35]:
impact = ak.to_dataframe(array[
    ["impactX", "impactY"]],
    levelname=lambda i : {0 : "event", 1:"layer"}[i])

In [10]:
clusters_2d = ak.to_dataframe(array[
    ["beamEnergy", "NRechits", "clus2D_x", "clus2D_y", "clus2D_z", "clus2D_energy", "clus2D_layer",
        "clus2D_rho", "clus2D_delta", "clus2D_idxs", "clus2D_isSeed"]
    ], 
    levelname=lambda i : {0 : "event", 1:"clus2D_id", 2:"hit_id"}[i])

In [11]:
clusters_3d = ak.to_dataframe(array[
    ["beamEnergy", "NRechits", "clus3D_x", "clus3D_y", "clus3D_z", "clus3D_energy", "clus3D_size", "clus3D_idxs"]
    ], 
    levelname=lambda i : {0 : "event", 1:"clus3D_id", 2:"hit_id"}[i])

In [47]:
 #Merge clusters3D and clusters2D
clusters_3d_2d = pd.merge(
    #Left : clusters3d
    # Reset multiindex so its columns can be kept in joined dataframe (otherwise clus3D_id column disappears)
    clusters_3d.reset_index(level=("clus3D_id", "hit_id"), names=["event", "clus3D_id", "clus3D_hit_id"]),

    #Right : clusters_2d
    # We don't care about rechits so we slice the df by taking the row of first rechit of 2D cluster
    #                  event     cluster2d_id  index of hit inside cluster2D
    # Also reset index for same reason
    clusters_2d.loc[(slice(None), slice(None),   0                       )].reset_index(level="event"),
    how='inner',

    # Map event on both sides
    # Map clus3D_idxs to clus2D_id
    left_on=('event', 'clus3D_idxs'),
    right_on=('event', 'clus2D_id'),

    suffixes=('', '_clus2D') # This is to avoid beamEnergy column (which exists on both sides) to get renamed. We just keep the one from the left
)
clusters_3d_2d

Unnamed: 0,event,clus3D_id,clus3D_hit_id,beamEnergy,NRechits,clus3D_x,clus3D_y,clus3D_z,clus3D_energy,clus3D_size,...,NRechits_clus2D,clus2D_x,clus2D_y,clus2D_z,clus2D_energy,clus2D_layer,clus2D_rho,clus2D_delta,clus2D_idxs,clus2D_isSeed
0,0,0,0,20.0,317,-2.923339,3.561614,20.577499,0.070715,1,...,317,-2.923340,3.561614,20.577499,0.070715,6,0.070715,3.402823e+38,58,1
1,0,1,0,20.0,317,-1.650613,0.120992,22.685703,15.710234,16,...,317,-1.867480,0.558911,13.877500,0.448077,1,1.254791,3.874668e-01,0,0
2,0,1,1,20.0,317,-1.650613,0.120992,22.685703,15.710234,16,...,317,-1.652880,-0.064618,14.767500,0.684047,2,1.978301,1.593665e-01,5,0
3,0,1,2,20.0,317,-1.650613,0.120992,22.685703,15.710234,16,...,317,-1.869435,0.171449,16.782499,0.929381,3,2.949999,1.610287e-01,13,0
4,0,1,3,20.0,317,-1.650613,0.120992,22.685703,15.710234,16,...,317,-1.758617,0.054618,17.672501,1.211050,4,3.599792,2.032689e-01,23,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
364344,11603,13,0,20.0,378,-0.398342,2.480419,46.412498,0.079123,1,...,378,-0.398342,2.480419,46.412498,0.079123,23,0.079123,1.586731e+00,275,1
364345,11603,14,0,20.0,378,-2.018789,0.147382,46.412498,0.096182,1,...,378,-2.018789,0.147382,46.412498,0.096182,23,0.096182,3.402823e+38,277,1
364346,11603,15,0,20.0,378,-2.923340,3.937988,46.412498,0.062449,1,...,378,-2.923340,3.937988,46.412498,0.062449,23,0.062449,1.927641e+00,279,1
364347,11603,16,0,20.0,378,-0.000009,1.125244,52.881496,0.111305,1,...,378,-0.000009,1.125244,52.881500,0.111305,27,0.111305,3.402823e+38,285,1


In [48]:
#Now merge impact to get impact position of beam on each layer for each event
clusters_3d_2d_layer = pd.merge(
    # Left : previously merged dataframe
    clusters_3d_2d,

    #Right : impact df (indexed by event and layer)
    impact, 

    # Map event on both sides
    # Map layer of 2D cluster with layer of impact computation
    left_on=("event", "clus2D_layer"),
    right_on=("event", "layer")
)

In [32]:
#### Make an index that gets us the highest energy 3D cluster per event
# Slice clusters3d to remove 2D cluster rows (just take the first row)
clusters3d_slice = clusters_3d.loc[(slice(None), slice(None), 0)]
# Build an index that selects for each event the 3D cluster with highest energy
index_largest_3D_cluster = clusters3d_slice.groupby(["event"])['clus3D_energy'].transform(max) == clusters3d_slice["clus3D_energy"]

In [55]:
########## 2D cluster positions wrt incident particle
#Now we can compute the difference between 2D cluster position and impact of trajectory per layer :
clusters_3d_2d_layer["clus2D_diff_impact_x"] = clusters_3d_2d_layer["clus2D_x"] - clusters_3d_2d_layer["impactX"]
clusters_3d_2d_layer["clus2D_diff_impact_y"] = clusters_3d_2d_layer["clus2D_y"] - clusters_3d_2d_layer["impactY"]

clusters_3d_2d_layer.set_index(["event", "clus3D_id"])

Unnamed: 0_level_0,Unnamed: 1_level_0,clus3D_hit_id,beamEnergy,NRechits,clus3D_x,clus3D_y,clus3D_z,clus3D_energy,clus3D_size,clus3D_idxs,beamEnergy_clus2D,...,clus2D_energy,clus2D_layer,clus2D_rho,clus2D_delta,clus2D_idxs,clus2D_isSeed,impactX,impactY,clus2D_diff_impact_x,clus2D_diff_impact_y
event,clus3D_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,0,0,20.0,317,-2.923339,3.561614,20.577499,0.070715,1,6,20.0,...,0.070715,6,0.070715,3.402823e+38,58,1,-1.551337,0.074679,-1.372002,3.486936
0,1,5,20.0,317,-1.650613,0.120992,22.685703,15.710234,16,5,20.0,...,1.465993,6,4.788234,3.785385e-01,53,0,-1.551337,0.074679,-0.014105,0.043198
0,1,0,20.0,317,-1.650613,0.120992,22.685703,15.710234,16,0,20.0,...,0.448077,1,1.254791,3.874668e-01,0,0,-1.405556,0.089466,-0.461924,0.469445
0,1,1,20.0,317,-1.650613,0.120992,22.685703,15.710234,16,1,20.0,...,0.684047,2,1.978301,1.593665e-01,5,0,-1.480678,0.110803,-0.172202,-0.175421
0,1,2,20.0,317,-1.650613,0.120992,22.685703,15.710234,16,2,20.0,...,0.929381,3,2.949999,1.610287e-01,13,0,-1.459117,0.065231,-0.410319,0.106218
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11603,13,0,20.0,378,-0.398342,2.480419,46.412498,0.079123,1,33,20.0,...,0.079123,23,0.079123,1.586731e+00,275,1,-1.195111,-0.589565,0.796769,3.069984
11603,14,0,20.0,378,-2.018789,0.147382,46.412498,0.096182,1,34,20.0,...,0.096182,23,0.096182,3.402823e+38,277,1,-1.195111,-0.589565,-0.823677,0.736947
11603,15,0,20.0,378,-2.923340,3.937988,46.412498,0.062449,1,35,20.0,...,0.062449,23,0.062449,1.927641e+00,279,1,-1.195111,-0.589565,-1.728229,4.527554
11603,16,0,20.0,378,-0.000009,1.125244,52.881496,0.111305,1,36,20.0,...,0.111305,27,0.111305,3.402823e+38,285,1,-1.168610,-0.696318,1.168601,1.821562


In [43]:
index_largest_3D_cluster

event  clus3D_id
0      0            False
       1             True
       2            False
       3            False
       4            False
                    ...  
11603  13           False
       14           False
       15           False
       16           False
       17           False
Name: clus3D_energy, Length: 151215, dtype: bool

In [56]:
clusters_3d_2d_largest_3d_cluster = clusters_3d_2d_layer[index_largest_3D_cluster]

  clusters_3d_2d_largest_3d_cluster = clusters_3d_2d_layer[index_largest_3D_cluster]


IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).