In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import mplhep
plt.style.use(mplhep.style.CMS)

In [None]:
import scipy
import scipy.sparse

In [None]:
dm = scipy.sparse.load_npz("../dist_0.npz")

In [None]:
plt.imshow(dm.todense()>0.0, cmap="Greys")

In [None]:
gen = pd.read_csv("../gen_0.csv", index_col=0)
reco = pd.read_csv("../reco_0.csv", index_col=0)

fi0 = np.load("../ev_0.npz")
rg = fi0["reco_gen"]
rc = fi0["reco_cand"]

In [None]:
gen["num_matched"] = (rg > 0.0).sum(axis=0)
reco["num_matched"] = (rg > 0.0).sum(axis=1)

In [None]:
plt.figure(figsize=(5,5))
gen["pt"].hist(bins=np.linspace(0,10,100))
plt.yscale("log")

In [None]:
Xs = []
ys = []
ycs = []
for i in range(10):
    fi = np.load("../ev_{}.npz".format(i))
    X = fi["X"]
    y = fi["ygen"]
    yc = fi["ycand"]
    
    Xs += [X]
    ys += [y]
    ycs += [yc]

In [None]:
# plt.hist(rg[rg>0], bins=np.linspace(0,200,100));
# plt.yscale("log")

In [None]:
for igen in range(20):
    idx_max = np.argmax(rg[:, igen])
    inds_max = np.argsort(rg[:, igen])[::-1][:3]
    rgs = rg[inds_max, igen]
    print(igen, inds_max, rgs)

In [None]:
plt.figure(figsize=(5, 5))
plt.imshow(rg>0.0, cmap="Greys", interpolation='None')
plt.xlabel("genparticle index")
plt.ylabel("recoparticle index")

In [None]:
plt.figure(figsize=(5, 5))

rgs = rg>0.0
plt.hist(rgs.sum(axis=0), bins=np.linspace(0, 20, 21), density=1.0, histtype="step", lw=2, label="gen");
plt.hist(rgs.sum(axis=1), bins=np.linspace(0, 20, 21), density=1.0, histtype="step", lw=2, label="reco");
plt.legend()
#plt.yscale("log")
plt.xlabel("number of associations")
plt.ylabel("fraction of total")

In [None]:
plt.figure(figsize=(5, 5))
plt.imshow(rc>0.0, cmap="Greys", interpolation='None')
plt.xlabel("PFCandidate index")
plt.ylabel("recoparticle index")

In [None]:
plt.figure(figsize=(5, 5))
rcs = rc>0.0
plt.hist(rcs.sum(axis=0), bins=np.linspace(0, 10, 11), density=1.0, histtype="step", lw=2, label="candidate");
plt.hist(rcs.sum(axis=1), bins=np.linspace(0, 10, 11), density=1.0, histtype="step", lw=2, label="reco");
plt.legend()
#plt.yscale("log")
plt.xlabel("number of associations")
plt.ylabel("fraction of total")

In [None]:
from collections import Counter

In [None]:
plot_id_pairs(yc[:, 0], X[:, 0])
plt.title("reco to PFCandidate", y=1.0)
plt.ylabel("PFCandidate PDGID")
plt.xlabel("Reco object type")

In [None]:
plot_id_pairs(yc[m2, 0], y[m2, 0])
plt.title("gen to PFCandidate", y=1.0)
plt.xlabel("GenParticle PDGID")
plt.ylabel("PFCandidate PDGID")

In [None]:
has_cand = (yc[:, 0] != 0)
has_gen = (y[:, 0] != 0)

is_track = X[:, 0] == 1

In [None]:
X.shape

In [None]:
Counter(X[has_gen, 0])

In [None]:
Counter(X[~has_gen, 0])

In [None]:
X[is_track].shape

In [None]:
X[is_track & has_cand & has_gen].shape

In [None]:
X[is_track & has_cand & ~has_gen].shape

In [None]:
X[is_track & ~has_cand].shape

In [None]:
yc[has_cand & has_gen, 0].shape

In [None]:
yc[has_cand & ~has_gen, 0].shape

In [None]:
pids = np.unique(yc[has_cand, 0])
c1 = Counter(yc[has_cand&has_gen, 0])
c2 = Counter(yc[has_cand&~has_gen, 0])

In [None]:
yvals = [c1[p]/np.sum(has_cand) for p in pids]
yvals2 = [c2[p]/np.sum(has_cand) for p in pids]

In [None]:
plt.figure(figsize=(9,5))
xs = np.arange(len(pids))
plt.bar(xs, yvals, label="matched")
plt.bar(xs, yvals2, bottom=yvals, label="not matched")
plt.xticks(xs, [int(x) for x in pids]);
plt.ylabel("fraction of total candidates")
plt.xlabel("PFCandidate PDGID")
plt.title("PFCandidate to Gen match")
plt.legend()
plt.ylim(0,0.4)

In [None]:
df2 = pd.DataFrame(np.hstack([X, y, yc]))

In [None]:
df2[df2[0]==4]

In [None]:
randinds = np.random.permutation(range(len(df2)))[:100]
df3 = df2[[0, 4, 6, 12]]

In [None]:
df3.loc[randinds]

In [None]:
plt.figure(figsize=(5,5))
ax = plt.axes()

bigmask = (np.abs(X[:, 2]) < 0.2) & (np.abs(X[:, 3]) < 0.2)
plt.scatter(X[bigmask, 2], X[bigmask, 3], marker=".", label="reco")
plt.scatter(y[bigmask & m, 2], y[bigmask & m, 3], marker="x", label="gen")
plt.scatter(yc[bigmask & m2, 2], yc[bigmask & m2, 3], marker=".", label="PF")

for idx in np.nonzero(bigmask)[0]:
    _x1, _y1 = X[idx, 2], X[idx, 3]
    _x2, _y2 = y[idx, 2], y[idx, 3]
    _x3, _y3 = yc[idx, 2], yc[idx, 3]
    if _x2 != 0 and abs(_x2) < 0.2 and abs(_y2) < 0.2:
        plt.plot([_x1, _x2], [_y1, _y2], color="gray")
    if _x3 != 0 and abs(_x3) < 0.2 and abs(_y3) < 0.2:
        plt.plot([_x1, _x3], [_y1, _y3], color="gray")
        
plt.xlim(-0.2, 0.2)
plt.ylim(-0.2, 0.2)
plt.xlabel("eta")
plt.ylabel("phi")
plt.legend(loc=(1.01,0.1))

In [None]:
plt.legend?