In [None]:
%matplotlib inline

In [None]:
import uproot
import numpy as np
import awkward

import plotly.express as px
import pandas

import matplotlib.pyplot as plt

In [None]:
def hits_to_features(hit_data, iev, coll, feats):
    feat_arr = {f: hit_data[coll + "." + f][iev] for f in feats}

    sdcoll = "subdetector"
    feat_arr[sdcoll] = np.zeros(len(feat_arr["type"]), dtype=np.int32)
    if coll.startswith("ECAL"):
        feat_arr[sdcoll][:] = 0
    elif coll.startswith("HCAL"):
        feat_arr[sdcoll][:] = 1
    else:
        feat_arr[sdcoll][:] = 2
    return awkward.Record(feat_arr)

In [None]:
fi = uproot.open("/media/joosep/data/clic_edm4hep_2023_02_27/p8_ee_tt_ecm380/reco_p8_ee_tt_ecm380_1.root")
ev = fi["events"]

iev = 0

In [None]:
ev["SiTracks_1"].arrays()

In [None]:
ev["ECALBarrel"]["ECALBarrel.cellID"].array()[0]

In [None]:
collectionIDs = {k: v for k, v in
    zip(fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_names"][0],
    fi.get("metadata").arrays("CollectionIDs")["CollectionIDs"]["m_collectionIDs"][0])}
collectionIDs_reverse = {v: k for k, v in collectionIDs.items()}

hit_data = {
    "ECALBarrel": ev["ECALBarrel"].array(),
    "ECALEndcap": ev["ECALEndcap"].array(),
    "ECALOther": ev["ECALOther"].array(),
    "HCALBarrel": ev["HCALBarrel"].array(),
    "HCALEndcap": ev["HCALEndcap"].array(),
    "HCALOther": ev["HCALOther"].array(),
    "MUON": ev["MUON"].array(),
}
    
feats = ["type", "cellID", "energy", "energyError", "time", "position.x", "position.y", "position.z"]

hit_idx_global = 0
hit_idx_global_to_local = {}
hit_feature_matrix = []
for col in sorted(hit_data.keys()):
    icol = collectionIDs[col]
    hit_features = hits_to_features(hit_data[col], iev, col, feats)
    hit_feature_matrix.append(hit_features)
    for ihit in range(len(hit_data[col][col+".energy"][iev])):
        hit_idx_global_to_local[hit_idx_global] = (icol, ihit)
        hit_idx_global += 1
hit_idx_local_to_global = {v: k for k, v in hit_idx_global_to_local.items()}
hit_feature_matrix = awkward.Record({
    k: awkward.concatenate([hit_feature_matrix[i][k] for i in range(len(hit_feature_matrix))]) for k in hit_feature_matrix[0].fields})

dd = ev.arrays(
    ["PandoraClusters/PandoraClusters.energy",
     "PandoraClusters/PandoraClusters.position.x",
     "PandoraClusters/PandoraClusters.position.y",
     "PandoraClusters/PandoraClusters.position.z",
     "PandoraClusters/PandoraClusters.hits_begin",
     "PandoraClusters/PandoraClusters.hits_end",
     "PandoraClusters#1/PandoraClusters#1.collectionID",
     "PandoraClusters#1/PandoraClusters#1.index",
    ]
)

coll_arr = dd["PandoraClusters#1/PandoraClusters#1.collectionID"][iev]
idx_arr = dd["PandoraClusters#1/PandoraClusters#1.index"][iev]
hits_begin = dd["PandoraClusters/PandoraClusters.hits_begin"][iev]
hits_end = dd["PandoraClusters/PandoraClusters.hits_end"][iev]

#index in the array of all hits
hit_to_cluster_matrix_coo0 = []
#index in the cluster array
hit_to_cluster_matrix_coo1 = []

#weight
hit_to_cluster_matrix_w = []

#loop over all clusters
for icluster in range(len(hits_begin)):

    #get the slice in the hit array corresponding to this cluster
    hbeg = hits_begin[icluster]
    hend = hits_end[icluster]
    idx_range = idx_arr[hbeg:hend]
    coll_range = coll_arr[hbeg:hend]

    #add edges from hit to cluster
    for icol, idx in zip(coll_range, idx_range):
        hit_to_cluster_matrix_coo0.append(hit_idx_local_to_global[(icol, idx)])
        hit_to_cluster_matrix_coo1.append(icluster)
        hit_to_cluster_matrix_w.append(1.0)
hit_to_cluster_matrix_coo0 = np.array(hit_to_cluster_matrix_coo0)
hit_to_cluster_matrix_coo1 = np.array(hit_to_cluster_matrix_coo1)

In [None]:
hit_cluster_idx = -1*np.ones(len(hit_feature_matrix["position.x"]))

In [None]:
for cl in np.unique(hit_to_cluster_matrix_coo1):
    hit_cluster_idx[hit_to_cluster_matrix_coo0[hit_to_cluster_matrix_coo1==cl]] = cl

In [None]:
df = pandas.DataFrame()
df["px"] = hit_feature_matrix["position.x"].to_numpy()
df["py"] = hit_feature_matrix["position.y"].to_numpy()
df["pz"] = hit_feature_matrix["position.z"].to_numpy()
df["energy"] = 10*hit_feature_matrix["energy"].to_numpy()
df["cluster"] = hit_cluster_idx

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(df["px"], df["py"], c=df["cluster"], s=df["energy"])
plt.xlim(-3000,3000)
plt.ylim(-3000,3000)

In [None]:
fig = px.scatter_3d(df, x='px', y='pz', z='py', color='cluster', size='energy', color_continuous_scale=px.colors.diverging.Spectral)
fig.show()