In [None]:
%matplotlib inline
import bz2
import json
import pandas
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import networkx as nx
from networkx.drawing.nx_pydot import graphviz_layout


In [None]:
#data = json.load(bz2.BZ2File("/home/joosep/Downloads/pythia6_ttbar_0001_pandora.json.bz2", "r"))
data = json.load(bz2.BZ2File("/home/joosep/Downloads/pythia6_ttbar_0001_pandora_0.json.bz2", "r"))

In [None]:
#http://flc.desy.de/lcnotes/notes/localfsExplorer_read?currentPath=/afs/desy.de/group/flc/lcnotes/LC-DET-2006-004.pdf 
a = 3*10**-4
b = 5 #B-field in tesla 

In [None]:
def track_pt(omega):
    return a*np.abs(b/omega)

In [None]:
iev = 3
df_gen = pandas.DataFrame(data[iev]["genparticles"])

df_hit = pandas.DataFrame(data[iev]["track_hits"])
df_cl = pandas.DataFrame(data[iev]["clusters"])
df_tr = pandas.DataFrame(data[iev]["tracks"])
df_ecal = pandas.DataFrame(data[iev]["ecal_hits"])
df_hcal = pandas.DataFrame(data[iev]["hcal_hits"])
df_pfs = pandas.DataFrame(data[iev]["pfs"])

df_tr["pt"] = track_pt(df_tr["omega"])
df_tr["px"] = np.cos(df_tr["phi"])*df_tr["pt"]
df_tr["py"] = np.sin(df_tr["phi"])*df_tr["pt"]
df_tr["pz"] = df_tr["tan_lambda"]*df_tr["pt"]

In [None]:
df_gen[df_gen["pdgid"].abs()==15]

In [None]:
df_hit

In [None]:
df_ecal

In [None]:
df_hcal

In [None]:
df_tr

In [None]:
df_cl

In [None]:
df_gen

In [None]:
df_pfs

In [None]:
g = nx.DiGraph()
for igen in range(len(df_gen)):
    g.add_node("gen{}".format(igen), typ=int(df_gen.iloc[igen]["pdgid"]), e=df_gen.iloc[igen]["energy"])
    
for igen in range(len(df_gen)):
    idx_parent0 = int(df_gen.iloc[igen]["idx_parent0"])
    if idx_parent0 != -1:
        g.add_edge("gen{}".format(idx_parent0), "gen{}".format(igen))
        
for icl in range(len(df_cl)):
    g.add_node("clu{}".format(icl), typ=df_cl.iloc[icl]["type"], e=df_cl.iloc[icl]["energy"])
    for gp, gp_w in df_cl.iloc[icl]["gp_contributions"].items():
        if gp_w/df_cl.iloc[icl]["energy"]>0.2:
            g.add_edge("gen{}".format(gp), "clu{}".format(icl))

for itr in range(len(df_tr)):
    g.add_node("tra{}".format(itr), typ=0, e=df_tr.iloc[itr]["pt"])
    for gp, gp_w in df_tr.iloc[itr]["gp_contributions"].items():
        if gp_w/df_tr.iloc[itr]["nhits"]>0.2:
            g.add_edge("gen{}".format(gp), "tra{}".format(itr))
        
for ipf in range(len(df_pfs)):
    g.add_node("pfo{}".format(ipf), typ=int(df_pfs.iloc[ipf]["type"]), e=df_pfs.iloc[ipf]["energy"])
    cl_idx = int(df_pfs.iloc[ipf]["cluster_idx"])
    if cl_idx!=-1:
        g.add_edge("clu{}".format(cl_idx), "pfo{}".format(ipf))

    tr_idx = int(df_pfs.iloc[ipf]["track_idx"])
    if tr_idx!=-1:
        g.add_edge("tra{}".format(tr_idx), "pfo{}".format(ipf))

In [None]:
def node_color(node):
    if node.startswith("gen"):
        if abs(g.nodes[node]["typ"])==15:
            return "purple"
        return "red"
    elif node.startswith("clu"):
        return "blue"
    elif node.startswith("tra"):
        return "green"
    else:
        return "gray"
    
def node_label(node):
    typ = node[:4]
    l = "{}".format(g.nodes[node]["typ"])
    return l

In [None]:
plt.figure(figsize=(50,30))
pos = graphviz_layout(g, prog="dot")
nx.draw_networkx_nodes(g, pos,
    node_size=[5*g.nodes[n]["e"] for n in g.nodes],
    node_color=[node_color(n) for n in g.nodes],
)
nx.draw_networkx_labels(g, pos,
    labels={n: node_label(n) for n in g.nodes},
    font_size=5
)
nx.draw_networkx_edges(g, pos, node_size=100.0);
plt.savefig("plot.svg")

In [None]:
matrix_tr_to_gp = np.zeros((len(df_tr), len(df_gen)))
matrix_cl_to_gp = np.zeros((len(df_cl), len(df_gen)))

for itr in range(len(df_tr)):
    gps = df_tr.loc[itr]["gp_contributions"]
    for gp, val in gps.items():
        matrix_tr_to_gp[itr, int(gp)] += val
        
for icl in range(len(df_cl)):
    gps = df_cl.loc[icl]["gp_contributions"]
    for gp, val in gps.items():
        matrix_cl_to_gp[icl, int(gp)] += val

In [None]:
import networkx as nx

In [None]:
def filter_gp(gp):
    row = df_gen.loc[gp]
    if row["status"] == 1 and row["energy"]>0.2:
        return True
    return False

In [None]:
reco_to_pf = {}
for ipf in range(len(df_pfs)):
    row = df_pfs.loc[ipf]
    if row["track_idx"] != -1:
        k = ("tr", int(row["track_idx"]))
        assert(not (k in reco_to_pf))
        reco_to_pf[k] = ipf
    elif row["cluster_idx"] != -1:
        k = ("cl", int(row["cluster_idx"]))
        assert(not (k in reco_to_pf))
        reco_to_pf[k] = ipf
    else:
        #PF should always have a track or a cluster associated
        assert(False)

In [None]:
dg = nx.Graph()

gps = set()

#loop over clusters, get all genparticles associated to clusters
for icl in range(len(df_cl)):
    row = df_cl.loc[icl]
    dg.add_node(("cl", icl))
    for gp, weight in row["gp_contributions"].items():
        gp = int(gp)
        if filter_gp(gp):
            dg.add_node(("gp", gp))
            gps.add(gp)
            dg.add_edge(("gp", gp), ("cl", icl), weight=weight)
        
        
#loop over tracks, get all genparticles associated to tracks
for itr in range(len(df_tr)):
    row = df_tr.loc[itr]
    dg.add_node(("tr", itr))
    for gp in row["gp_contributions"].keys():
        gp = int(gp)
        if filter_gp(gp):
            dg.add_node(("gp", gp))
            gps.add(gp)
            
            #the track is added to the genparticle with a very high weight
            #because we always want to associate the genparticle to a track if it's possible
            dg.add_edge(("gp", gp), ("tr", itr), weight=9999.0)

In [None]:
node_labels = {
    k: "{}={}".format(k[0], k[1]) for k in dg.nodes
}

In [None]:
plt.figure(figsize=(20,20))
pos = nx.nx_pydot.pydot_layout(dg)
nx.draw_networkx(dg, pos=pos, labels=node_labels)

In [None]:
#uniqe genparticles
gps = set(gps)

#now loop over all the genparticles
pairs = {}
for gp in gps:
    gp_node = ("gp", gp)

    #find the neighboring reco elements (clusters and tracks)
    neighbors = list(dg.neighbors(gp_node))
    weights = [dg.edges[gp_node, n]["weight"] for n in neighbors]
    nw = zip(neighbors, weights)
    
    #sort the neighbors by the edge weight (deposited energy)
    nw = sorted(nw, key=lambda x: x[1], reverse=True)
    reco_obj = None
    if len(nw)>0:
        #choose the closest neighbor as the "key" reco element
        reco_obj = nw[0][0]
        
        #remove the reco element from the list, so it can't be associated to anything else
        dg.remove_node(reco_obj)
    
    #this genparticle had a unique reco element
    if reco_obj:
        pf_obj = None
        if reco_obj and reco_obj in reco_to_pf:
            pf_obj = reco_to_pf[reco_obj]

        assert(not (reco_obj in pairs))
        pairs[reco_obj] = (gp, pf_obj)
        
    #this is a case where a genparticle did not have a key reco element, but instead was smeared between others
    else:
        print("genparticle {} is merged and cannot be reconstructed".format(gp))
        print(df_gen.loc[gp])

In [None]:
len(df_tr), len(df_cl), len(pairs)

In [None]:
def track_as_array(df_tr, itr):
    row = df_tr.loc[itr]
    return [0, row["px"], row["py"], row["pz"], row["nhits"], row["d0"], row["z0"]]

def cluster_as_array(df_cl, icl):
    row = df_cl.loc[icl]
    return [1, row["x"], row["y"], row["z"], row["nhits_ecal"], row["nhits_hcal"], 0.0]

def gen_as_array(df_gen, igen):
    if igen:
        row = df_gen.loc[igen]
        return np.array([abs(row["pdgid"]), row["charge"], row["px"], row["py"], row["pz"], row["energy"]])
    else:
        return np.zeros(6)
    
def pf_as_array(df_pfs, igen):
    if igen:
        row = df_pfs.loc[igen]
        return np.array([abs(row["type"]), row["charge"], row["px"], row["py"], row["pz"], row["energy"]])
    else:
        return np.zeros(6)

In [None]:
def flatten_event(df_tr, df_cl, df_gen, df_pfs, pairs):
    Xs = []
    ys_gen = []
    ys_cand = []
    
    #find all track-associated particles
    for itr in range(len(df_tr)):
        Xs.append(track_as_array(df_tr, itr))

        k = ("tr", itr)
        gp = None
        rp = None
        if k in pairs:
            gp = pairs[k][0]
            rp = pairs[k][1]
        ys_gen.append(gen_as_array(df_gen, gp))
        ys_cand.append(pf_as_array(df_pfs, rp))
    
    #find all cluster-associated particles
    for icl in range(len(df_cl)):
        Xs.append(cluster_as_array(df_cl, icl))

        k = ("cl", icl)
        gp = None
        rp = None
        if k in pairs:
            gp = pairs[k][0]
            rp = pairs[k][1]
        ys_gen.append(gen_as_array(df_gen, gp))
        ys_cand.append(pf_as_array(df_pfs, rp))

    Xs = np.stack(Xs, axis=-1).T
    ys_gen = np.stack(ys_gen, axis=-1).T
    ys_cand = np.stack(ys_cand, axis=-1).T
    
    return Xs, ys_gen, ys_cand

In [None]:
Xs, ys_gen, ys_cand = flatten_event(df_tr, df_cl, df_gen, df_pfs, pairs)
len(Xs), len(ys_gen), len(ys_cand)

In [None]:
import sklearn
import sklearn.metrics

In [None]:
np.unique(ys_gen[:, 0])

In [None]:
np.unique(ys_cand[:, 0])

In [None]:
labels = [0,   13,   11, 22,  130,  211,  321, 2112, 2212]
labels_text = {
    0: "none",
    13: "mu",
    11: "el",
    22: "$\gamma$",
    130: "$K^0_L$",
    211: "$\pi^\pm$",
    321: "$K^+$",
    2112: "n",
    2212: "p"
}
cm = sklearn.metrics.confusion_matrix(
    ys_gen[:, 0],
    ys_cand[:, 0],
    labels=labels,
    normalize="true"
)

In [None]:
plt.imshow(cm, cmap="Blues")
plt.xticks(range(len(labels)), [labels_text[l] for l in labels], rotation=90);
plt.yticks(range(len(labels)), [labels_text[l] for l in labels]);
plt.xlabel("reco")
plt.ylabel("gen")

In [None]:
gen_x = []
gen_y = []
gen_z = []

mult = 10
for i in range(len(df_gen.loc[gps])):
    gen_x += [0.0, mult*df_gen["px"].values[i]]
    gen_y += [0.0, mult*df_gen["py"].values[i]]
    gen_z += [0.0, mult*df_gen["pz"].values[i]]

points_gen = go.Scatter3d(
    x=gen_x,
    y=gen_z,
    z=gen_y,
    mode="lines",
    line=dict(color='rgba(0, 0, 0, 1.0)'),
    name="gen"
)

trk_x = []
trk_y = []
trk_z = []

mult = 40
for i in range(len(df_tr)):
    trk_x += [0.0, mult*df_tr["px"].values[i]]
    trk_y += [0.0, mult*df_tr["py"].values[i]]
    trk_z += [0.0, mult*df_tr["pz"].values[i]]

points_trk = go.Scatter3d(
    x=trk_x,
    y=trk_y,
    z=trk_z,
    mode="lines",
    line=dict(color='rgba(0, 255, 0, 1.0)'),
    name="tracks"
)
    
points_ecal = go.Scatter3d(
    x=df_ecal["x"].values,
    y=df_ecal["z"].values,
    z=df_ecal["y"].values,
    mode="markers",
    marker={
        "symbol": "square",
        "opacity": 0.5,
        "color": "blue",
        "size": 1.0
    },
    name="ECAL"
)

points_hcal = go.Scatter3d(
    x=df_hcal["x"].values,
    y=df_hcal["z"].values,
    z=df_hcal["y"].values,
    mode="markers",
    marker={
        "symbol": "square",
        "opacity": 0.5,
        "color": "red",
        "size": 1.0
    },
    name="HCAL"
)

points_clusters = go.Scatter3d(
    x=df_cl["x"].values,
    y=df_cl["z"].values,
    z=df_cl["y"].values,
    mode="markers",
    marker={
        "symbol": "square",
        "opacity": 0.8,
        "color": "gray",
        "size": 5.0
    },
    name="clusters"
)

points_hit = go.Scatter3d(
    x=df_hit["x"].values,
    y=df_hit["z"].values,
    z=df_hit["y"].values,
    mode="markers",
    marker={
        "symbol": "square",
        "opacity": 0.8,
        "color": "green",
        "size": 1.0
    },
    name="hits"
)

fig = go.Figure(data=[
    points_gen, points_trk,
    points_ecal, points_hcal,
    points_clusters, points_hit
])

fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    margin=go.layout.Margin(
        l=50,
        r=0,
        b=0,
        t=50,
    ),
    scene_camera={
        "eye": dict(x=0.8, y=0.8, z=0.8)
    }
)

fig.show()