In [None]:
import bz2
import json
import pandas
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [None]:
data = json.load(bz2.BZ2File("/home/joosep/Downloads/pythia6_ttbar_0001_pandora.json.bz2", "r"))

In [None]:
#http://flc.desy.de/lcnotes/notes/localfsExplorer_read?currentPath=/afs/desy.de/group/flc/lcnotes/LC-DET-2006-004.pdf 
a = 3*10**-4
b = 5 #B-field in tesla 

In [None]:
def track_pt(omega):
    return a*np.abs(b/omega)

In [None]:
iev = 28
df_gen = pandas.DataFrame(data[iev]["genparticles"])

df_hit = pandas.DataFrame(data[iev]["track_hits"])
df_cl = pandas.DataFrame(data[iev]["clusters"])
df_tr = pandas.DataFrame(data[iev]["tracks"])
df_ecal = pandas.DataFrame(data[iev]["ecal_hits"])
df_hcal = pandas.DataFrame(data[iev]["hcal_hits"])
df_pfs = pandas.DataFrame(data[iev]["pfs"])

df_tr["pt"] = track_pt(df_tr["omega"])
df_tr["px"] = np.cos(df_tr["phi"])*df_tr["pt"]
df_tr["py"] = np.sin(df_tr["phi"])*df_tr["pt"]
df_tr["pz"] = df_tr["tan_lambda"]*df_tr["pt"]

In [None]:
import networkx as nx

In [None]:
def filter_gp(gp):
    row = df_gen.loc[gp]
    if row["status"] == 1 and row["energy"]>0.2:
        return True
    return False

In [None]:
reco_to_pf = {}
for ipf in range(len(df_pfs)):
    row = df_pfs.loc[ipf]
    if row["track_idx"] != -1:
        k = ("tr", int(row["track_idx"]))
        assert(not (k in reco_to_pf))
        reco_to_pf[k] = ipf
    elif row["cluster_idx"] != -1:
        k = ("cl", int(row["cluster_idx"]))
        assert(not (k in reco_to_pf))
        reco_to_pf[k] = ipf
    else:
        #PF should always have a track or a cluster associated
        assert(False)

In [None]:
dg = nx.Graph()

gps = set()

#loop over clusters, get all genparticles associated to clusters
for icl in range(len(df_cl)):
    row = df_cl.loc[icl]
    dg.add_node(("cl", icl))
    for gp, weight in row["gp_contributions"].items():
        gp = int(gp)
        if filter_gp(gp):
            dg.add_node(("gp", gp))
            gps.add(gp)
            dg.add_edge(("gp", gp), ("cl", icl), weight=weight)
        
        
#loop over tracks, get all genparticles associated to tracks
for itr in range(len(df_tr)):
    row = df_tr.loc[itr]
    dg.add_node(("tr", itr))
    for gp in row["gp_contributions"].keys():
        gp = int(gp)
        if filter_gp(gp):
            dg.add_node(("gp", gp))
            gps.add(gp)
            dg.add_edge(("gp", gp), ("tr", itr), weight=9999.0)

        
#uniqe genparticles
gps = set(gps)

#now loop over all the genparticles
#for each genparticle, find the neighboring reco elements (clusters and tracks)
#sort the neighbors by the edge weight (deposited energy)
#for each genparticle, choose the closest neighbor as the "key" reco element
#remove the reco element from the list
pairs = {}
for gp in gps:
    gp_node = ("gp", gp)
    neighbors = list(dg.neighbors(gp_node))
    weights = [dg.edges[gp_node, n]["weight"] for n in neighbors]
    nw = zip(neighbors, weights)
    nw = sorted(nw, key=lambda x: x[1], reverse=True)
    reco_obj = None
    if len(nw)>0:
        reco_obj = nw[0][0]
        dg.remove_node(reco_obj)
        
    if reco_obj:
        pf_obj = None
        if reco_obj and reco_obj in reco_to_pf:
            pf_obj = reco_to_pf[reco_obj]

        assert(not (reco_obj in pairs))
        pairs[reco_obj] = (gp, pf_obj)
    else:
        print("genparticle {} is merged and cannot be reconstructed".format(gp))

In [None]:
len(df_tr), len(df_cl), len(pairs)

In [None]:
def track_as_array(itr):
    row = df_tr.loc[itr]
    return [0, row["px"], row["py"], row["pz"], row["nhits"], row["d0"], row["z0"]]

def cluster_as_array(icl):
    row = df_cl.loc[icl]
    return [1, row["x"], row["y"], row["z"], row["nhits_ecal"], row["nhits_hcal"], 0.0]

def gen_as_array(igen):
    if igen:
        row = df_gen.loc[igen]
        return np.array([row["pdgid"], row["px"], row["py"], row["pz"], row["energy"]])
    else:
        return np.zeros(5)
    
def pf_as_array(igen):
    if igen:
        row = df_pfs.loc[igen]
        return np.array([row["type"], row["px"], row["py"], row["pz"], row["energy"]])
    else:
        return np.zeros(5)

In [None]:
Xs = []
ys_gen = []
ys_cand = []
for itr in range(len(df_tr)):
    Xs.append(track_as_array(itr))
    
    k = ("tr", itr)
    gp = None
    rp = None
    if k in pairs:
        gp = pairs[k][0]
        rp = pairs[k][1]
    ys_gen.append(gen_as_array(gp))
    ys_cand.append(pf_as_array(rp))

    
for icl in range(len(df_cl)):
    Xs.append(cluster_as_array(icl))
    
    k = ("cl", icl)
    gp = None
    rp = None
    if k in pairs:
        gp = pairs[k][0]
        rp = pairs[k][1]
    ys_gen.append(gen_as_array(gp))
    ys_cand.append(pf_as_array(rp))
    
Xs = np.stack(Xs, axis=-1).T
ys_gen = np.stack(ys_gen, axis=-1).T
ys_cand = np.stack(ys_cand, axis=-1).T

In [None]:
len(Xs)
i = 106

In [None]:
Xs[i]

In [None]:
ys_gen[i]

In [None]:
ys_cand[i]

In [None]:
ys_gen[:, 0]

In [None]:
ys_cand[:, 0]

In [None]:
gen_x = []
gen_y = []
gen_z = []

mult = 10
for i in range(len(df_gen.loc[gps])):
    gen_x += [0.0, mult*df_gen["px"].values[i]]
    gen_y += [0.0, mult*df_gen["py"].values[i]]
    gen_z += [0.0, mult*df_gen["pz"].values[i]]

points_gen = go.Scatter3d(
    x=gen_x,
    y=gen_z,
    z=gen_y,
    mode="lines",
    line=dict(color='rgba(0, 0, 0, 1.0)'),
    name="gen"
)

trk_x = []
trk_y = []
trk_z = []

mult = 40
for i in range(len(df_tr)):
    trk_x += [0.0, mult*df_tr["px"].values[i]]
    trk_y += [0.0, mult*df_tr["py"].values[i]]
    trk_z += [0.0, mult*df_tr["pz"].values[i]]

points_trk = go.Scatter3d(
    x=trk_x,
    y=trk_y,
    z=trk_z,
    mode="lines",
    line=dict(color='rgba(0, 255, 0, 1.0)'),
    name="tracks"
)
    
points_ecal = go.Scatter3d(
    x=df_ecal["x"].values,
    y=df_ecal["z"].values,
    z=df_ecal["y"].values,
    mode="markers",
    marker={
        "symbol": "square",
        "opacity": 0.5,
        "color": "blue",
        "size": 1.0
    },
    name="ECAL"
)

points_hcal = go.Scatter3d(
    x=df_hcal["x"].values,
    y=df_hcal["z"].values,
    z=df_hcal["y"].values,
    mode="markers",
    marker={
        "symbol": "square",
        "opacity": 0.5,
        "color": "red",
        "size": 1.0
    },
    name="HCAL"
)

points_clusters = go.Scatter3d(
    x=df_cl["x"].values,
    y=df_cl["z"].values,
    z=df_cl["y"].values,
    mode="markers",
    marker={
        "symbol": "square",
        "opacity": 0.8,
        "color": "gray",
        "size": 2.0
    },
    name="clusters"
)

points_hit = go.Scatter3d(
    x=df_hit["x"].values,
    y=df_hit["z"].values,
    z=df_hit["y"].values,
    mode="markers",
    marker={
        "symbol": "square",
        "opacity": 0.8,
        "color": "green",
        "size": 1.0
    },
    name="hits"
)

fig = go.Figure(data=[
    points_gen, points_trk,
    points_ecal, points_hcal,
    points_clusters, points_hit
])

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    margin=go.layout.Margin(
        l=50,
        r=0,
        b=0,
        t=50,
    ),
    scene_camera={
        "eye": dict(x=0.8, y=0.8, z=0.8)
    }
)

fig.show()