In [None]:
import matplotlib.pyplot as plt
import numpy as np
import numba
import hepaccelerate.backend_cpu as ha
import uproot
import sklearn.metrics
from collections import Counter

In [None]:
def freqtable(arr, labels):
    ret = {}
    for l in labels:
        ret[l] = np.sum(arr==l)
    return ret

In [None]:
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    cm[np.isnan(cm)] = 0.0

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.2f}".format(cm[i, j]),
                horizontalalignment="center",
                color="white" if cm[i, j] > thresh else "black",
                fontsize=8)
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                horizontalalignment="center",
                color="white" if cm[i, j] > thresh else "black",
                fontsize=8)


    plt.ylabel('True label')
    plt.xlim(-1, len(target_names))
    plt.ylim(-1, len(target_names))
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.tight_layout()

In [None]:
@numba.njit
def associate_deltar(etaphi1, etaphi2):
    associations1 = np.zeros(len(etaphi1), dtype=np.int32)
    associations2 = np.zeros(len(etaphi2), dtype=np.int32)
    associations1[:] = -1
    associations2[:] = -1
    
    for i in range(len(etaphi1)):
        for j in range(len(etaphi2)):
            if associations1[i] == -1 and associations2[j] == -1:
                dphi = ha.deltaphi(etaphi1[i, 1], etaphi2[j, 1])
                deta = etaphi1[i, 0] - etaphi2[j, 0]
                dr = np.sqrt(dphi**2 + deta**2)
                if dr < 0.001:
                    associations1[i] = j
                    associations2[j] = i
                    continue
    return associations1, associations2

In [None]:
t = uproot.open("../pfntuple.root").get("ana/pftree")

In [None]:
iev = 0
cand_pt = t.array('pfcandidate_pt')[iev]
cand_eta = t.array('pfcandidate_eta')[iev]
cand_phi = t.array('pfcandidate_phi')[iev]
cand_pid = t.array('pfcandidate_pdgid')[iev]
m = np.ones_like(cand_pt, dtype=np.bool)

tp_pt = t.array('trackingparticle_pt')[iev]
tp_eta = t.array('trackingparticle_eta')[iev]
tp_phi = t.array('trackingparticle_phi')[iev]
tp_pid = t.array('trackingparticle_pid')[iev]
tp_bx = t.array('trackingparticle_bx')[iev]
tp_idx_track = t.array('trackingparticle_idx_track')[iev]
mtp = ((tp_idx_track!=-1) & (tp_bx == 0))
#m2 = (gp_bx == 0)

cp_pt = t.array('caloparticle_pt')[iev]
cp_eta = t.array('caloparticle_eta')[iev]
cp_phi = t.array('caloparticle_phi')[iev]
cp_pid = t.array('caloparticle_pid')[iev]
cp_bx = t.array('caloparticle_bx')[iev]
cp_idx_cluster = t.array('caloparticle_idx_cluster')[iev]
#m3 = (gp2_bx == 0)
mcp = ((cp_idx_cluster!=-1) & (cp_bx == 0))

In [None]:
st_x = t.array("simtrack_x")[iev]
st_y = t.array("simtrack_y")[iev]
st_z = t.array("simtrack_z")[iev]

In [None]:
rh_x = t.array("rechit_x")[iev]
rh_y = t.array("rechit_y")[iev]
rh_z = t.array("rechit_z")[iev]
rh_eta = t.array("rechit_eta")[iev]
rh_phi = t.array("rechit_phi")[iev]
rh_e = t.array("rechit_e")[iev]
rh_idx_cluster = t.array("rechit_idx_cluster")[iev]

sh_x = t.array("simhit_x")[iev]
sh_y = t.array("simhit_y")[iev]
sh_z = t.array("simhit_z")[iev]
sh_e = t.array("simhit_e")[iev]
sh_eta = t.array("simhit_eta")[iev]
sh_phi = t.array("simhit_phi")[iev]
sh_det = t.array("simhit_det")[iev]
sh_subdet = t.array("simhit_subdet")[iev]
sh_idx_cluster = t.array("simhit_idx_cluster")[iev]
sh_idx_caloparticle = t.array("simhit_idx_caloparticle")[iev]

st_x = t.array("simtrack_x")[iev]
st_y = t.array("simtrack_y")[iev]
st_z = t.array("simtrack_z")[iev]
st_idx_cluster = t.array("simtrack_idx_cluster")[iev]
st_idx_caloparticle = t.array("simtrack_idx_caloparticle")[iev]
st_pid = t.array("simtrack_pid")[iev]

In [None]:
Counter(sh_subdet[sh_det==4])

In [None]:
plt.scatter(sh_eta[sh_det==4], sh_subdet[sh_det==4])

In [None]:
Counter(sh_det)

In [None]:
plt.hist(sh_e[sh_e!=0], bins=np.linspace(0,2,100), density=1.0, histtype="step", lw=2);
plt.hist(rh_e[rh_e!=0], bins=np.linspace(0,2,100), density=1.0, histtype="step", lw=2);
plt.yscale("log")

In [None]:
# import plotly.graph_objects as go

# fig = go.Figure()

# m1 = (np.abs(sh_eta) < 1.0) & (np.abs(sh_phi) < 1.0) & (sh_e>0.2)
# m2 = (np.abs(rh_eta) < 1.0) & (np.abs(rh_phi) < 1.0) & (rh_e>0.2)

# # Add traces
# fig.add_trace(go.Scatter(
#     x=sh_eta[m1], y=sh_phi[m1],
#     mode='markers',
#     name='SimHit',
#     hovertemplate='<b>%{text}</b>',
#     marker={"size": 0.1*sh_e[m1], "symbol": "circle"},
#     text = ['CaloParticle {}, cluster {}'.format(sh_idx_caloparticle[m1][i], sh_idx_cluster[m1][i]) for i in range(len(sh_eta[m1]))],
#     ))

# fig.add_trace(go.Scatter(
#     x=rh_eta[m2], y=rh_phi[m2],
#     mode='markers',
#     name='RecHit',
#     hovertemplate='<b>%{text}</b>',
#     text = ['PFcluster {}'.format(rh_idx_cluster[m2][i]) for i in range(len(rh_eta[m2]))],

# ))

# fig.show()

In [None]:
plt.scatter(sh_eta, sh_e, marker=".", alpha=0.5)
plt.scatter(rh_eta, rh_e, marker=".", alpha=0.5)

In [None]:
bins = np.linspace(-500, 500, 100)
plt.hist(sh_x, bins=bins, histtype="step", lw=2, density=1.0);
plt.hist(rh_x, bins=bins, histtype="step", lw=2, density=1.0);


In [None]:
bins = np.linspace(-500, 500, 100)
plt.hist(sh_y, bins=bins, histtype="step", lw=2, density=1.0);
plt.hist(rh_y, bins=bins, histtype="step", lw=2, density=1.0);

In [None]:
bins = np.linspace(-1000, 1000, 100)
plt.hist(sh_z, bins=bins, histtype="step", lw=2, density=1.0);
plt.hist(rh_z, bins=bins, histtype="step", lw=2, density=1.0);

In [None]:
bins = np.linspace(0,500,100)
plt.hist(sh_e, bins=bins, histtype="step", lw=2, density=1.0);
plt.hist(rh_e, bins=bins, histtype="step", lw=2, density=1.0);
plt.yscale("log")

In [None]:
plt.figure(figsize=(20, 10))
plt.scatter(sh_eta, sh_phi, marker="o", alpha=0.5, s=0.1*sh_e, color="blue")
plt.scatter(rh_eta, rh_phi, marker="x", alpha=0.5, s=rh_e, color="red")

In [None]:
icaloparticle = 28
mask_simhit = sh_idx_caloparticle==icaloparticle
mask_rechit = rh_idx_cluster==cp_idx_cluster[icaloparticle]
print(cp_idx_cluster[icaloparticle], cp_pid[icaloparticle])
print(st_pid[st_idx_caloparticle==icaloparticle])

plt.figure(figsize=(10,10))
plt.scatter(sh_x[mask_simhit], sh_y[mask_simhit], s=50*sh_e[mask_simhit], marker=".")
plt.scatter(rh_x[mask_rechit], rh_y[mask_rechit], marker="x");

In [None]:
bins=np.linspace(-1500, 1500, 100)
plt.hist(rh_z, bins=bins, density=1.0, lw=2, histtype="step");
plt.hist(sh_z[sh_x!=0], bins=bins, density=1.0, lw=2, histtype="step");

In [None]:
np.log(rh_e[:100]+1)

In [None]:
import plotly.express as px
import plotly.graph_objects as go

points_rechit_all = go.Scatter3d(
    x=rh_x,
    z=rh_y,
    y=rh_z,
    mode="markers",
    marker={
        "symbol": "square",
        "opacity": 0.5,
        "size": 2,
        "color": "red"

    },
    name="all rechits"
)

points_simtracks = go.Scatter3d(
    x=st_x[st_idx_caloparticle==icaloparticle],
    z=st_y[st_idx_caloparticle==icaloparticle],
    y=st_z[st_idx_caloparticle==icaloparticle],
    mode="markers",
    marker={
        "symbol": "x",
        "opacity": 1.0,
        "size": 5,
        "color": "green"

    },
    name="simtracks"
)

points_rechit = go.Scatter3d(
    x=rh_x[mask_rechit],
    z=rh_y[mask_rechit],
    y=rh_z[mask_rechit],
    mode="markers",
    marker={
        "symbol": "square",
        "opacity": 0.5,
        "size": 10.0,
        "color": "red"
    },
    name="matched rechits"
)

m1 = (sh_e > 0.2) & (sh_x != 0)
points_simhit_all = go.Scatter3d(
    x=sh_x[m1],
    z=sh_y[m1],
    y=sh_z[m1],
    mode="markers",
    marker={
        "symbol": "circle",
        "opacity": 0.2,
        "size": 0.1*sh_e[m1],
        #"size": 1.0,
        "color": "blue"
    },
    name="all simhits",
    #hovertemplate='<b>%{text}</b>',
    #text = ['CaloParticle {}, cluster {}'.format(sh_idx_caloparticle[m1][i], sh_idx_cluster[m1][i]) for i in range(len(sh_eta[m1]))],

)

points_simhit = go.Scatter3d(
    x=sh_x[mask_simhit & m1],
    z=sh_y[mask_simhit & m1],
    y=sh_z[mask_simhit & m1],
    mode="markers",
    marker={
        "symbol": "circle",
        "opacity": 0.5,
        "size": 10,
        "color": "blue"
    },
    name="matched simhits",
)

data=[
    points_rechit_all,
    points_simhit_all,
    #points_simtracks,
    #points_simhit,
    #points_rechit,
]

fig = go.Figure(data=data)

fig.update_layout(
    autosize=False,
    width=1000,
    height=1000,
    margin=go.layout.Margin(
        l=0,
        r=0,
        b=0,
        t=0,
    ),
    scene_camera={
        "eye": dict(x=0.8, y=0.8, z=0.8)
    }
)

fig.show()

In [None]:
import networkx as nx

In [None]:
iev = 0
g = nx.DiGraph()

cp_cl = t.array("caloparticle_idx_cluster")[iev]
cp_pt = t.array("caloparticle_pt")[iev]
cp_e = t.array("caloparticle_energy")[iev]
cp_eta = t.array("caloparticle_eta")[iev]
cp_phi = t.array("caloparticle_phi")[iev]
cp_pid = t.array("caloparticle_pid")[iev]

cl_cnd = t.array("cluster_idx_candidate")[iev]
cl_cp = t.array("cluster_idx_caloparticle")[iev]
cl_e = t.array("cluster_energy")[iev]
cl_eta = t.array("cluster_eta")[iev]
cl_phi = t.array("cluster_phi")[iev]

tp_t = t.array("trackingparticle_idx_track")[iev]
tp_pt = t.array("trackingparticle_pt")[iev]
tp_eta = t.array("trackingparticle_eta")[iev]
tp_phi = t.array("trackingparticle_phi")[iev]
tp_pid = t.array("trackingparticle_pid")[iev]

t_cnd = t.array("track_idx_candidate")[iev]
t_pt = t.array("track_pt")[iev]
t_eta = t.array("track_eta")[iev]
t_phi = t.array("track_phi")[iev]

c_pt = t.array("pfcandidate_pt")[iev]
c_eta = t.array("pfcandidate_eta")[iev]
c_phi = t.array("pfcandidate_phi")[iev]
c_pid = t.array("pfcandidate_pdgid")[iev]

ncaloparticle = len(cp_cl)
ncluster = len(cl_cnd)
ncandidate = len(c_pt)
ntrackingparticle = len(tp_t)
ntrack = len(t_cnd)

for i in range(ncaloparticle):
    g.add_node(("caloparticle", i))
    
for i in range(ncluster):
    g.add_node(("cluster", i))
    
for i in range(ncandidate):
    g.add_node(("candidate", i))

for i in range(ntrackingparticle):
    g.add_node(("trackingparticle", i))
    
for i in range(ntrack):
    g.add_node(("track", i))
    
for i in range(ncaloparticle):
    ind_cluster = cp_cl[i]
    if ind_cluster != -1:
        g.add_edge(("caloparticle", i), ("cluster", ind_cluster))
        
for i in range(ncluster):
    ind_cand = cl_cnd[i]
    if ind_cand != -1:
        g.add_edge(("cluster", i), ("candidate", ind_cand))
    
    ind_cp = cl_cp[i]
    if ind_cp != -1:
        g.nodes[("caloparticle", ind_cp)]["state"] = "matched"
        
for i in range(ntrackingparticle):
    ind_track = tp_t[i]
    if ind_track != -1:
        g.add_edge(("trackingparticle", i), ("track", ind_track))

for i in range(ntrack):
    ind_cand = t_cnd[i]
    if ind_cand != -1:
        g.add_edge(("track", i), ("candidate", ind_cand))

In [None]:
len(g.nodes)

In [None]:
sgs = [nx.subgraph(g, c) for c in nx.weakly_connected_components(g)]

In [None]:
sgs_d = {}
for sg in sgs:
    cands = [n for n in sg.nodes if n[0] == "candidate"]
    if len(cands) == 1:
        pdgid = c_pid[cands[0][1]]
        if not (pdgid in sgs_d):
            sgs_d[pdgid] = []
        sgs_d[pdgid] += [sg]

In [None]:
from networkx.drawing.nx_agraph import write_dot, graphviz_layout

In [None]:
colors = {
    "caloparticle": "red",
    "cluster": "lightblue",
    "candidate": "green",
    "trackingparticle": "pink",
    "track": "orange"
}

for pid in sgs_d.keys():
    ip = 0
    for sg in sgs_d[pid][:5]:
        labels = {}
        plt.figure(figsize=(max(3, len(sg.nodes)+1), 10))
        for n in sg.nodes:
            if n[0] == "caloparticle":
                d = sg.nodes[n]
                labels[n] = "{}\npt: {:.2f}\ne: {:.2f}\neta: {:.2f}\nphi: {:.2f}\npid: {}".format(
                    "CaloParticle", cp_pt[n[1]], cp_e[n[1]], cp_eta[n[1]], cp_phi[n[1]], cp_pid[n[1]]
                ) + "\n" + d.get("state", "")
            elif n[0] == "cluster":
                labels[n] = "{}\ne: {:.2f}\neta: {:.2f}\nphi: {:.2f}".format(
                    "PFCluster", cl_e[n[1]], cl_eta[n[1]], cl_phi[n[1]]
                )
            elif n[0] == "trackingparticle":
                labels[n] = "{}\npt: {:.2f}\neta: {:.2f}\nphi: {:.2f}\npid: {}".format(
                    "TrackingParticle", tp_pt[n[1]], tp_eta[n[1]], tp_phi[n[1]], tp_pid[n[1]]
                )
            elif n[0] == "track":
                labels[n] = "{}\npt: {:.2f}\neta: {:.2f}\nphi: {:.2f}".format(
                    "Track", t_pt[n[1]], t_eta[n[1]], t_phi[n[1]]
                )
            elif n[0] == "candidate":
                labels[n] = "{}\npt: {:.2f}\neta: {:.2f}\nphi: {:.2f}\npid: {}".format(
                    "PFCandidate", c_pt[n[1]], c_eta[n[1]], c_phi[n[1]], c_pid[n[1]]
                )
            else:
                labels[n] = n

        pos = graphviz_layout(sg, prog='dot')
        nx.draw(sg,
            pos=pos,
            with_labels=True,
            node_color=[colors[n[0]] for n in sg.nodes],
            labels=labels,
            edge_color="gray"
        )
        plt.tight_layout()
        plt.savefig("graph_ev{}_pid{}_i{}.pdf".format(iev, pid, ip))
        ip += 1


In [None]:
tp_idx_track = t.array("trackingparticle_idx_track")
tp_pt = t.array("trackingparticle_pt")
tp_eta = t.array("trackingparticle_eta")
tp_phi = t.array("trackingparticle_phi")
tp_pid = t.array("trackingparticle_pid")

cp_idx_cluster = t.array("caloparticle_idx_cluster")
cp_pt = t.array("caloparticle_pt")
cp_e = t.array("caloparticle_energy")
cp_eta = t.array("caloparticle_eta")
cp_phi = t.array("caloparticle_phi")
cp_pid = t.array("caloparticle_pid")

t_pt = t.array('track_pt')
t_eta = t.array('track_eta')
t_phi = t.array('track_phi')
t_idx_c = t.array('track_idx_candidate')

cl_e = t.array('cluster_energy')
cl_eta = t.array('cluster_eta')
cl_phi = t.array('cluster_phi')
cl_idx_cp = t.array('cluster_idx_caloparticle')
cl_idx_c = t.array('cluster_idx_candidate')

c_pt = t.array('pfcandidate_pt')
c_eta = t.array('pfcandidate_eta')
c_phi = t.array('pfcandidate_phi')
c_pid = t.array('pfcandidate_pdgid')
c_idx_tp = t.array('pfcandidate_idx_trackingparticle')
c_idx_cp = t.array('pfcandidate_idx_caloparticle')

mc = c_idx_tp != -1
mc2 = (c_idx_tp == -1) & (c_idx_cp != -1)
mc3 = (c_idx_tp == -1) & (c_idx_cp == -1)
mtp = tp_idx_track != -1

In [None]:
bins = np.array([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.5,2.0,3.0,4.0,5.0,6.0,8.0,10,15,20,50])
c1, _ = np.histogram(tp_pt.flatten(), bins=bins)
c2, _ = np.histogram(tp_pt[tp_idx_track!=-1].flatten(), bins=bins)
ratio = c2/c1
errs = np.sqrt(c2)/c1
ratio[np.isnan(ratio)] = 0
plt.errorbar(bins[:-1] + np.diff(bins)/2, ratio, yerr=errs, marker=".", lw=1, elinewidth=1)
plt.ylabel("matching efficiency\nTrackingParticle to Track")
plt.xlabel("TrackingParticle $p_T$ [GeV]")
plt.savefig("trackingparticle_matching_eff.pdf", bbox_inches="tight")

In [None]:
plt.hist(tp_pt.flatten(), bins=np.linspace(0,5,101));
plt.yscale("log")
plt.xlabel("TrackingParticle $p_T$ [GeV]")

In [None]:
bins = np.array([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.5,2.0,3.0,4.0,5.0,6.0,8.0,10,15,20,50])
c1, _ = np.histogram(cp_pt.flatten(), bins=bins)
c2, _ = np.histogram(cp_pt[cp_idx_cluster!=-1].flatten(), bins=bins)
ratio = c2/c1
errs = np.sqrt(c2)/c1
ratio[np.isnan(ratio)] = 0
plt.errorbar(bins[:-1] + np.diff(bins)/2, ratio, yerr=errs, marker=".", lw=1, elinewidth=1)
plt.ylabel("matching efficiency\nCaloParticle to Cluster ")
plt.xlabel("CaloParticle $p_T$ [GeV]")
plt.savefig("caloparticle_matching_eff.pdf", bbox_inches="tight")

In [None]:
plt.hist(cp_pt.flatten(), bins=np.linspace(0,5,101));
plt.yscale("log")
plt.xlabel("CaloParticle $p_T$ [GeV]")

In [None]:
a = cp_idx_cluster[cp_idx_cluster != -1]
vs = sum([
    list(freqtable(_a, np.unique(_a)).values()) for _a in a
], [])

plt.hist(vs, bins=np.linspace(0, 20, 21))
plt.xlabel("CaloParticles per Cluster")
plt.ylabel("Number of Clusters")
plt.yscale("log")
plt.title("CaloParticles per Cluster: {:.2f}".format(np.mean(vs)))

In [None]:
a = cl_idx_c[cl_idx_c != -1]
vs1 = sum([
    list(freqtable(_a, np.unique(_a)).values()) for _a in a
], [])

plt.hist(vs1, bins=np.linspace(0, 10, 11))
plt.xlabel("Cluster per Candidate")
plt.ylabel("Number of Candidates")
plt.yscale("log")
plt.title("Cluster per Candidate: {:.2f}".format(np.mean(vs1)))

In [None]:
a = t_idx_c[t_idx_c != -1]
vs2 = sum([
    list(freqtable(_a, np.unique(_a)).values()) for _a in a
], [])
plt.hist(vs2, bins=np.linspace(0, 20, 11))
plt.xlabel("Tracks per Candidate")
plt.ylabel("Number of Candidates")
plt.yscale("log")
plt.title("Tracks per Candidate: {:.2f}".format(np.mean(vs2)))

In [None]:
np.mean(vs1), np.mean(vs2)

In [None]:
Counter(vs1)

In [None]:
Counter(vs2)

In [None]:
plt.figure(figsize=(3*3, 2*3))

plt.suptitle("GenParticle kinematics", y=1.01)
plt.subplot(2, 3, 1)
plt.hist(tp_pt.flatten(), bins=np.linspace(0,10,100), histtype="step", lw=2, label="all TrackingParticles", density=1);
plt.hist(tp_pt[tp_idx_track!=-1].flatten(), bins=np.linspace(0,10,100), histtype="step", lw=2, label="matched to reco track", density=1);
plt.ylim(1e-3, 1000)
plt.yscale("log")
plt.legend(frameon=False)
plt.xlabel("$p_T$ [GeV]")

plt.subplot(2, 3, 2)
plt.hist(tp_eta.flatten(), bins=np.linspace(-6, 6, 100), histtype="step", lw=2, density=1);
plt.hist(tp_eta[tp_idx_track!=-1].flatten(), bins=np.linspace(-6, 6, 100), histtype="step", lw=2, density=1);
plt.yscale("log")
plt.xlabel("$\eta$")

plt.subplot(2, 3, 3)
plt.hist(tp_phi.flatten(), bins=np.linspace(-4, 4, 100), histtype="step", lw=2, density=1);
plt.hist(tp_phi[tp_idx_track!=-1].flatten(), bins=np.linspace(-4, 4, 100), histtype="step", lw=2, density=1);
plt.yscale("log")
plt.xlabel("$\phi$")

plt.subplot(2, 3, 4)
plt.hist(cp_pt.flatten(), bins=np.linspace(0,10,100), histtype="step", lw=2, label="all CaloParticles", density=1);
plt.hist(cp_pt[cp_idx_cluster!=-1].flatten(), bins=np.linspace(0,10,100), histtype="step", lw=2, label="matched to reco cluster", density=1);
plt.yscale("log")
plt.ylim(1e-3, 100)
plt.legend(frameon=False)
plt.xlabel("$p_T$ [GeV]")

plt.subplot(2, 3, 5)
plt.hist(cp_eta.flatten(), bins=np.linspace(-6, 6, 100), histtype="step", lw=2, density=1);
plt.hist(cp_eta[cp_idx_cluster!=-1].flatten(), bins=np.linspace(-6, 6, 100), histtype="step", lw=2, density=1);
plt.yscale("log")
plt.xlabel("$\eta$")

plt.subplot(2, 3, 6)
plt.hist(cp_phi.flatten(), bins=np.linspace(-4, 4, 100), histtype="step", lw=2, density=1);
plt.hist(cp_phi[cp_idx_cluster!=-1].flatten(), bins=np.linspace(-4, 4, 100), histtype="step", lw=2, density=1);
plt.yscale("log")
plt.xlabel("$\phi$")

plt.tight_layout()
plt.savefig("genparticle_to_reco.pdf", bbox_inches="tight")

In [None]:
inds = np.random.permutation(range(len(tp_pt[tp_idx_track!=-1].flatten())))[:1000]

plt.figure(figsize=(3*3, 2*3))
plt.suptitle("GenParticle to Track or Cluster", y=1.01)
plt.subplot(2,3,1)
plt.scatter(
    tp_pt[tp_idx_track!=-1].flatten()[inds],
    t_pt[tp_idx_track[tp_idx_track!=-1]].flatten()[inds],
    alpha=0.2, marker=".")
plt.xscale("log")
plt.yscale("log")
plt.xlabel("TrackingParticle pT [GeV]")
plt.ylabel("recoTrack pT [GeV]")
plt.xlim(0.1, 100)
plt.ylim(0.1, 100)

plt.subplot(2,3,2)
plt.scatter(
    tp_eta[tp_idx_track!=-1].flatten()[inds],
    t_eta[tp_idx_track[tp_idx_track!=-1]].flatten()[inds],
    alpha=0.2, marker=".")
plt.xlim(-4, 4)
plt.ylim(-4, 4)
plt.xlabel("TrackingParticle $\eta$")
plt.ylabel("recoTrack $\eta$")

plt.subplot(2,3,3)
plt.scatter(
    tp_phi[tp_idx_track!=-1].flatten()[inds],
    t_phi[tp_idx_track[tp_idx_track!=-1]].flatten()[inds],
    alpha=0.2, marker=".")
plt.xlim(-4, 4)
plt.ylim(-4, 4)
plt.xlabel("TrackingParticle $\phi$")
plt.ylabel("recoTrack $\phi$")

inds = np.random.permutation(range(len(cp_e[cl_idx_cp[cl_idx_cp!=-1]].flatten())))[:1000]

plt.subplot(2,3,4)
plt.scatter(
    cp_e[cl_idx_cp[cl_idx_cp!=-1]].flatten()[inds],
    cl_e[cl_idx_cp!=-1].flatten()[inds], alpha=0.2, marker=".")
plt.xscale("log")
plt.yscale("log")
plt.xlabel("CaloParticle energy")
plt.ylabel("PFCluster energy")
plt.xlim(0.1, 1000)
plt.ylim(0.1, 1000)

plt.subplot(2,3,5)
plt.scatter(
    cp_eta[cl_idx_cp[cl_idx_cp!=-1]].flatten()[inds],
    cl_eta[cl_idx_cp!=-1].flatten()[inds], alpha=0.2, marker=".")
plt.xlim(-6, 6)
plt.ylim(-6, 6)
plt.xlabel("CaloParticle $\eta$")
plt.ylabel("PFCluster $\eta$")

plt.subplot(2,3,6)
plt.scatter(
    cp_phi[cl_idx_cp[cl_idx_cp!=-1]].flatten()[inds],
    cl_phi[cl_idx_cp!=-1].flatten()[inds], alpha=0.2, marker=".")
plt.xlim(-4, 4)
plt.ylim(-4, 4)
plt.xlabel("CaloParticle $\phi$")
plt.ylabel("PFCluster $\phi$")

plt.tight_layout()
plt.savefig("genparticle_to_reco_scatter.pdf", bbox_inches="tight")

In [None]:
unmatched_pids = c_pid[mc3].flatten()
matched_pids = c_pid[~mc3].flatten()

In [None]:
labels = np.unique(c_pid.flatten())
f1 = freqtable(matched_pids, labels)
f2 = freqtable(unmatched_pids, labels)

In [None]:
numtot = len(c_pid.flatten())
b1 = np.array([f1[l]/numtot for l in labels])
b2 = np.array([f2[l]/numtot for l in labels])

In [None]:
xs = np.arange(len(b1))
plt.bar(xs, b1, label="matched PFCandidates")
plt.bar(xs, b2, bottom=b1, label="unmatched PFCandidates")
plt.xticks(xs, labels);
plt.legend(frameon=False)
plt.ylabel("fraction of PFCandidates")
plt.xlabel("PFCandidate PDGID")
plt.title("PFCandidate GenParticle matching efficiency")
plt.savefig("candidate_matching_efficiency.pdf", bbox_inches="tight")

In [None]:
labels=np.unique(c_pid.flatten())
cm = sklearn.metrics.confusion_matrix(cp_pid[c_idx_cp[mc2]].flatten(), c_pid[mc2].flatten(), labels=labels)
plot_confusion_matrix(cm, labels, normalize=False)
plt.title("CaloParticle to PFCandidate")
plt.ylabel("CaloParticle PDGID")
plt.xlabel("PFCandidate PDGID")
plt.savefig("caloparticle_to_candidate_confusion.pdf", bbox_inches="tight")

In [None]:
cm = sklearn.metrics.confusion_matrix(tp_pid[c_idx_tp[mc]].flatten(), c_pid[mc].flatten(), labels=labels)
plot_confusion_matrix(cm, labels, normalize=False)
plt.title("TrackingParticle to PFCandidate")
plt.ylabel("TrackingParticle PDGID")
plt.xlabel("PFCandidate PDGID")
plt.savefig("trackingparticle_to_pfcandidate_confusion.pdf", bbox_inches="tight")

In [None]:
inds = np.random.permutation(range(len(tp_pt[c_idx_tp[mc]].flatten())))[:1000]

plt.figure(figsize=(3*3, 2*3))
plt.suptitle("GenParticle to PFCandidate", y=1.01)
plt.subplot(2,3,1)
plt.scatter(tp_pt[c_idx_tp[mc]].flatten()[inds], c_pt[mc].flatten()[inds], marker=".", alpha=0.2)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("TrackingParticle $p_T$ [GeV]")
plt.ylabel("PFCand $p_T$ [GeV]")
plt.xlim(0.1, 100)
plt.ylim(0.1, 100)

plt.subplot(2,3,2)
plt.scatter(tp_eta[c_idx_tp[mc]].flatten()[inds], c_eta[mc].flatten()[inds], marker=".", alpha=0.2)
plt.xlabel("TrackingParticle $\eta$")
plt.ylabel("PFCand $\eta$")
plt.xlim(-5, 5)
plt.ylim(-5, 5)

plt.subplot(2,3,3)
plt.scatter(tp_phi[c_idx_tp[mc]].flatten()[inds], c_phi[mc].flatten()[inds], marker=".", alpha=0.2)
plt.xlabel("TrackingParticle $\phi$")
plt.ylabel("PFCand $\phi$")
plt.xlim(-4, 4)
plt.ylim(-4, 4)

inds = np.random.permutation(range(len(cp_pt[c_idx_cp[mc2]].flatten())))[:1000]

plt.subplot(2,3,4)
plt.scatter(cp_pt[c_idx_cp[mc2]].flatten()[inds], c_pt[mc2].flatten()[inds], marker=".", alpha=0.2)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("CaloParticle $p_T$ [GeV]")
plt.ylabel("PFCandidate $p_T$ [GeV]")
plt.xlim(0.1, 100)
plt.ylim(0.1, 100)

plt.subplot(2,3,5)
plt.scatter(cp_eta[c_idx_cp[mc2]].flatten()[inds], c_eta[mc2].flatten()[inds], marker=".", alpha=0.2)
plt.xlabel("CaloParticle $\eta$")
plt.ylabel("PFCandidate $\eta$")
plt.xlim(-5, 5)
plt.ylim(-5, 5)

plt.subplot(2,3,6)
plt.scatter(cp_phi[c_idx_cp[mc2]].flatten()[inds], c_phi[mc2].flatten()[inds], marker=".", alpha=0.2)
plt.xlabel("CaloParticle $\phi$")
plt.ylabel("PFCandidate $\phi$")
plt.xlim(-4, 4)
plt.ylim(-4, 4)

plt.tight_layout()
plt.savefig("genparticle_to_candidate_scatter.pdf", bbox_inches="tight")

In [None]:
plt.figure(figsize=(5,5))
bins = np.linspace(-0.5, 0.5, 101)
plt.subplot(2,1,1)

plt.title("TrackingParticle to Track")
plt.hist(((tp_pt[mtp] - t_pt[tp_idx_track[mtp]])/tp_pt[mtp]).flatten(), bins=bins, histtype="step", lw=2, label="$p_T$");
plt.hist(((tp_eta[mtp] - t_eta[tp_idx_track[mtp]])/tp_eta[mtp]).flatten(), bins=bins, histtype="step", lw=2, label="$\eta$");
plt.hist(((tp_phi[mtp] - t_phi[tp_idx_track[mtp]])/tp_phi[mtp]).flatten(), bins=bins, histtype="step", lw=2, label="$\phi$");
plt.yscale("log")
plt.legend(frameon=False)
plt.xlabel("(gen - reco) / gen")


plt.subplot(2,1,2)
plt.title("TrackingParticle to PFCandidate")
plt.hist(((tp_pt[c_idx_tp[mc]] - c_pt[mc])/tp_eta[c_idx_tp[mc]]).flatten(), bins=bins, histtype="step", lw=2, label="$p_T$");
plt.hist(((tp_eta[c_idx_tp[mc]] - c_eta[mc])/tp_eta[c_idx_tp[mc]]).flatten(), bins=bins, histtype="step", lw=2, label="$\eta$");
plt.hist(((tp_phi[c_idx_tp[mc]] - c_phi[mc])/tp_eta[c_idx_tp[mc]]).flatten(), bins=bins, histtype="step", lw=2, label="$\phi$");
plt.yscale("log")
plt.legend(frameon=False)
plt.xlabel("(gen - reco) / gen")

plt.tight_layout()
plt.savefig("reco_resolutions.pdf", bbox_inches="tight")