In [None]:
import sklearn
import sklearn.metrics
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas
import mplhep
import pickle
import awkward
import glob
import bz2
import os
import tqdm
import fastjet
import vector
import uproot

mplhep.style.use("CMS")

import sys
sys.path += ["../../mlpf/"]

import jet_utils
sys.path += ["../../mlpf/plotting/"]

from plot_utils import ELEM_LABELS_CMS, ELEM_NAMES_CMS
from plot_utils import CLASS_LABELS_CMS, CLASS_NAMES_CMS
from plot_utils import cms_label, sample_label
from plot_utils import pid_to_text

In [None]:
b = np.logspace(-4,4,100)
fig = plt.figure()
ax = plt.axes()

plt.hist(awkward.flatten(particles_pythia[mask_pythia_nonu]["gen_pt"]), bins=b, label="Pythia", histtype="step")
plt.hist(awkward.flatten(particles_cp[mask_cp]["caloparticle_pt"]), bins=b, label="CaloParticle", histtype="step")
plt.hist(awkward.flatten(arrs_awk["ytarget"]["pt"]), bins=b, label="MLPF target", histtype="step")
plt.hist(awkward.flatten(arrs_awk["ytarget"]["pt"][pu_mask]), bins=b, label="MLPF target, no PU", histtype="step")

plt.xscale("log")
plt.yscale("log")
plt.xlabel("Particle $p_T$ [GeV]")
plt.legend(loc=1, fontsize=12)
plt.ylim(1, 1e7)

cms_label(ax)
sample_label(ax, sample_name)

plt.savefig("{}_particle_pt.pdf".format(sample_label))

In [None]:
b = np.logspace(-4,4,100)

pid1 = np.abs(particles_pythia["gen_pdgid"])
pid2 = np.abs(particles_cp["caloparticle_pid"])
pid3 = np.abs(arrs_awk["ytarget"]["pid"])

uniq_pid = np.unique(awkward.flatten(pid1[mask_pythia_nonu]))
fig, axs = plt.subplots(4,4, figsize=(16,16))

iax = 0
axs = axs.flatten()

for pid in uniq_pid:
    if (np.sum(pid1==pid)>0):
        plt.sca(axs[iax])
        plt.hist(awkward.flatten(particles_pythia[mask_pythia_nonu & (pid1==pid)]["gen_pt"]), bins=b, label="Pythia", histtype="step")
        plt.hist(awkward.flatten(particles_cp[mask_cp & (pid2==pid)]["caloparticle_pt"]), bins=b, label="CaloParticle", histtype="step")
        plt.hist(awkward.flatten(arrs_awk["ytarget"]["pt"][pid3==pid]), bins=b, label="MLPF target", histtype="step")
        plt.hist(awkward.flatten(arrs_awk["ytarget"]["pt"][(pid3==pid) & pu_mask]), bins=b, label="MLPF target, no PU", histtype="step")
        
        plt.xscale("log")
        plt.yscale("log")
        plt.xlabel("Particle $p_T$ [GeV]")
        plt.legend(loc=1, fontsize=8)
        plt.title(pid, fontsize=12)
        iax += 1
        plt.ylim(1, 1e6)
plt.tight_layout()
plt.savefig("{}_particle_pt_separate.pdf".format(sample_label))

In [None]:
jets_coll = {}
jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)

vec = vector.awk(
    awkward.zip(
        {   
            "pt": particles_pythia[mask_pythia_nonu]["gen_pt"],
            "eta": particles_pythia[mask_pythia_nonu]["gen_eta"],
            "phi": particles_pythia[mask_pythia_nonu]["gen_phi"],
            "energy": particles_pythia[mask_pythia_nonu]["gen_energy"],
        }
    )
)
cluster = fastjet.ClusterSequence(vec.to_xyzt(), jetdef)
jets_coll["pythia_nonu"] = cluster.inclusive_jets(min_pt=3)

vec = vector.awk(
    awkward.zip(
        {   
            "pt": particles_cp[mask_cp]["caloparticle_pt"],
            "eta": particles_cp[mask_cp]["caloparticle_eta"],
            "phi": particles_cp[mask_cp]["caloparticle_phi"],
            "energy": particles_cp[mask_cp]["caloparticle_energy"],
        }
    )
)
cluster = fastjet.ClusterSequence(vec.to_xyzt(), jetdef)
jets_coll["cp"] = cluster.inclusive_jets(min_pt=3)

for coll in ["ytarget", "ycand"]:
    vec = vector.awk(
        awkward.zip(
            {   
                "pt": arrs_awk[coll]["pt"],
                "eta": arrs_awk[coll]["eta"],
                "phi": arrs_awk[coll]["phi"],
                "energy": arrs_awk[coll]["energy"],
            }
        )
    )
    cluster = fastjet.ClusterSequence(vec.to_xyzt(), jetdef)
    jets_coll[coll] = cluster.inclusive_jets(min_pt=3)

vec = vector.awk(
    awkward.zip(
        {   
            "pt": arrs_awk["ytarget"]["pt"][pu_mask],
            "eta": arrs_awk["ytarget"]["eta"][pu_mask],
            "phi": arrs_awk["ytarget"]["phi"][pu_mask],
            "energy": arrs_awk["ytarget"]["energy"][pu_mask],
        }
    )
)
cluster = fastjet.ClusterSequence(vec.to_xyzt(), jetdef)
jets_coll["ytarget_nopu"] = cluster.inclusive_jets(min_pt=3)

jets_coll["genjet"] = genjet_cmssw

In [None]:
bins = np.logspace(0, 4, 100)
fig = plt.figure()
ax = plt.axes()

plt.hist(awkward.flatten(jets_coll["pythia_nonu"].pt), histtype="step", bins=bins, label="Pythia")
plt.hist(awkward.flatten(jets_coll["cp"].pt), histtype="step", bins=bins, label="CaloParticle")
plt.hist(awkward.flatten(jets_coll["ytarget"].pt), histtype="step", bins=bins, label="MLPF target")
plt.hist(awkward.flatten(jets_coll["ytarget_nopu"].pt), histtype="step", bins=bins, label="MLPF target, no PU")
plt.xscale("log")
plt.yscale("log")
plt.legend()
sample_label(ax, sample_name)
cms_label(ax)
plt.xlabel("jet $p_T$ [GeV]")
plt.ylim(1, 1e6)

In [None]:
bins = np.linspace(-5, 5, 100)

fig = plt.figure()
ax = plt.axes()

plt.hist(awkward.flatten(jets_coll["pythia_nonu"].eta), histtype="step", bins=bins, label="Pythia")
plt.hist(awkward.flatten(jets_coll["cp"].eta), histtype="step", bins=bins, label="CaloParticle")
plt.hist(awkward.flatten(jets_coll["ytarget"].eta), histtype="step", bins=bins, label="MLPF target");
plt.hist(awkward.flatten(jets_coll["ytarget_nopu"].eta), histtype="step", bins=bins, label="MLPF target, no PU");

sample_label(ax, sample_name)
cms_label(ax)
plt.xlabel("jet $\eta$")

In [None]:
pythia_to_cp = jet_utils.match_two_jet_collections(jets_coll, "pythia_nonu", "cp", 0.1)
pythia_to_ytarget = jet_utils.match_two_jet_collections(jets_coll, "pythia_nonu", "ytarget", 0.1)
pythia_to_ytarget_nopu = jet_utils.match_two_jet_collections(jets_coll, "pythia_nonu", "ytarget_nopu", 0.1)
pythia_to_ycand = jet_utils.match_two_jet_collections(jets_coll, "pythia_nonu", "ycand", 0.1)

In [None]:
fm_cp = np.sum(awkward.num(pythia_to_cp["pythia_nonu"]))/np.sum(awkward.num(jets_coll["pythia_nonu"], axis=1))
fm_tg = np.sum(awkward.num(pythia_to_ytarget["pythia_nonu"]))/np.sum(awkward.num(jets_coll["pythia_nonu"], axis=1))
fm_tg_nopu = np.sum(awkward.num(pythia_to_ytarget_nopu["pythia_nonu"]))/np.sum(awkward.num(jets_coll["pythia_nonu"], axis=1))
fm_pf = np.sum(awkward.num(pythia_to_ycand["pythia_nonu"]))/np.sum(awkward.num(jets_coll["pythia_nonu"], axis=1))

In [None]:
plt.figure()
ax = plt.axes()
b = np.linspace(0.5,1.5,101)

ratio = awkward.flatten((jets_coll["cp"][pythia_to_cp["cp"]].pt / jets_coll["pythia_nonu"][pythia_to_cp["pythia_nonu"]].pt))
med, iqr = med_iqr(ratio)
plt.hist(
    ratio, bins=b, histtype="bar", lw=1, label="CaloParticle (M={:.2f}, IQR={:.2f}, f={:.2f})".format(med, iqr, fm_cp)
);

ratio = awkward.flatten((jets_coll["ytarget"][pythia_to_ytarget["ytarget"]].pt / jets_coll["pythia_nonu"][pythia_to_ytarget["pythia_nonu"]].pt))
med, iqr = med_iqr(ratio)
plt.hist(
    ratio, bins=b, histtype="step", lw=1, label="MLPF target (M={:.2f}, IQR={:.2f}, f={:.2f})".format(med, iqr, fm_tg)
);

ratio = awkward.flatten((jets_coll["ytarget_nopu"][pythia_to_ytarget_nopu["ytarget_nopu"]].pt / jets_coll["pythia_nonu"][pythia_to_ytarget_nopu["pythia_nonu"]].pt))
med, iqr = med_iqr(ratio)
plt.hist(
    ratio, bins=b, histtype="step", lw=1, label="MLPF target, no PU (M={:.2f}, IQR={:.2f}, f={:.2f})".format(med, iqr, fm_tg)
);

ratio = awkward.flatten((jets_coll["ycand"][pythia_to_ycand["ycand"]].pt / jets_coll["pythia_nonu"][pythia_to_ycand["pythia_nonu"]].pt))
med, iqr = med_iqr(ratio)
plt.hist(
    ratio, bins=b, histtype="step", lw=1, label="PF (M={:.2f}, IQR={:.2f}, f={:.2f})".format(med, iqr, fm_pf)
);

#plt.xscale("log")
#plt.yscale("log")
plt.xlabel("jet $p_T$ / gen-jet $p_T$")
plt.legend(loc=1, fontsize=10)
cms_label(ax)
sample_label(ax, sample_name)
#plt.ylim(1,1e6)
plt.savefig("{}_truth_target_jets.pdf".format(sample_label))

In [None]:
plt.figure()
ax = plt.axes()
b = np.linspace(0,5,101)

ratio = awkward.flatten((jets_coll["cp"][pythia_to_cp["cp"]].pt / jets_coll["pythia_nonu"][pythia_to_cp["pythia_nonu"]].pt))
med, iqr = med_iqr(ratio)
plt.hist(
    ratio, bins=b, histtype="bar", lw=1, label="CaloParticle (M={:.2f}, IQR={:.2f}, f={:.2f})".format(med, iqr, fm_cp)
);

ratio = awkward.flatten((jets_coll["ytarget"][pythia_to_ytarget["ytarget"]].pt / jets_coll["pythia_nonu"][pythia_to_ytarget["pythia_nonu"]].pt))
med, iqr = med_iqr(ratio)
plt.hist(
    ratio, bins=b, histtype="step", lw=1, label="MLPF target (M={:.2f}, IQR={:.2f}, f={:.2f})".format(med, iqr, fm_tg)
);

ratio = awkward.flatten((jets_coll["ytarget_nopu"][pythia_to_ytarget_nopu["ytarget_nopu"]].pt / jets_coll["pythia_nonu"][pythia_to_ytarget_nopu["pythia_nonu"]].pt))
med, iqr = med_iqr(ratio)
plt.hist(
    ratio, bins=b, histtype="step", lw=1, label="MLPF target, no PU (M={:.2f}, IQR={:.2f}, f={:.2f})".format(med, iqr, fm_tg)
);

ratio = awkward.flatten((jets_coll["ycand"][pythia_to_ycand["ycand"]].pt / jets_coll["pythia_nonu"][pythia_to_ycand["pythia_nonu"]].pt))
med, iqr = med_iqr(ratio)
plt.hist(
    ratio, bins=b, histtype="step", lw=1, label="PF (M={:.2f}, IQR={:.2f}, f={:.2f})".format(med, iqr, fm_pf)
);

#plt.xscale("log")
plt.yscale("log")
plt.xlabel("jet $p_T$ / gen-jet $p_T$")
plt.legend(loc=(0.55, 0.8), fontsize=10)
cms_label(ax)
sample_label(ax, sample_name)
#plt.ylim(1,1e6)
plt.savefig("{}_truth_target_jets.pdf".format(sample_label))

In [None]:
import pickle
from functools import reduce
import mplhep
import boost_histogram as bh

In [None]:
def add_results(d0, d1):
    d_ret = {}
    k0 = set(d0.keys())
    k1 = set(d1.keys())

    for k in k0.intersection(k1):
        d_ret[k] = d0[k] + d1[k]

    for k in k0.difference(k1):
        d_ret[k] = d0[k]

    for k in k1.difference(k0):
        d_ret[k] = d1[k]

    return d_ret

In [None]:
files = [pickle.load(open(fn, "rb")) for fn in glob.glob("../../out*.pkl")]
ret = reduce(add_results, files, {})

sample_keys = sorted(set(["/".join(k.split("/")[0:2]) for k in ret.keys() if not k.startswith("combined")]))

sample_keys_combined = sorted(set(["/".join(k.split("/")[0:3]) for k in ret.keys() if k.startswith("combined")]))

In [None]:
sample_keys

In [None]:
sample_keys_combined

In [None]:
for sample in sample_keys_combined:
    plt.figure()
    mplhep.histplot(ret[f"{sample}/particles_pt_pythia"], label="Pythia")
    mplhep.histplot(ret[f"{sample}/particles_pt_caloparticle"], label="CaloParticle")
    mplhep.histplot(ret[f"{sample}/particles_pt_target"], label="MLPF target")
    mplhep.histplot(ret[f"{sample}/particles_pt_target_pumask"], label="MLPF target, PU mask")
    plt.xscale("log")
    plt.yscale("log")
    plt.legend(loc=1)
    plt.ylim(1, 1e7)
    plt.title(sample, fontsize=12)

In [None]:
for sample in sample_keys:
    plt.figure()
    mplhep.histplot(ret[f"{sample}/jets_pt_genjet"], label="genJet")
    mplhep.histplot(ret[f"{sample}/jets_pt_cand"], label="PF")
    mplhep.histplot(ret[f"{sample}/jets_pt_target"], label="MLPF target")
    mplhep.histplot(ret[f"{sample}/jets_pt_target_pumask"], label="MLPF target, PU mask")
    plt.xscale("log")
    plt.legend()
    plt.title(sample, fontsize=12)

In [None]:
rebin = 5
for sample in sample_keys:
    plt.figure()
    mplhep.histplot(0.0*ret[f"{sample}/jets_pt_ratio_cand"][bh.rebin(rebin)], yerr=False)
    mplhep.histplot(ret[f"{sample}/jets_pt_ratio_cand"][bh.rebin(rebin)], yerr=False, label="PF")
    # mplhep.histplot(ret[f"{sample}/jets_pt_ratio_caloparticle"][bh.rebin(rebin)], yerr=False, label="CaloParticle")
    mplhep.histplot(ret[f"{sample}/jets_pt_ratio_target"][bh.rebin(rebin)], yerr=False, label="MLPF target")
    mplhep.histplot(ret[f"{sample}/jets_pt_ratio_target_pumask"][bh.rebin(rebin)], yerr=False, label="MLPF target, PU mask")
    plt.legend()
    plt.yscale("log")
    plt.title(sample, fontsize=12)

In [None]:
rebin = 1
for sample in sample_keys:
    plt.figure()
    mplhep.histplot(ret[f"{sample}/met_pythia"][bh.rebin(rebin)], yerr=False, label="Pythia")
    mplhep.histplot(ret[f"{sample}/met_cand"][bh.rebin(rebin)], yerr=False, label="PF")
    mplhep.histplot(ret[f"{sample}/met_target"][bh.rebin(rebin)], yerr=False, label="MLPF target")
    mplhep.histplot(ret[f"{sample}/met_target_pumask"][bh.rebin(rebin)], yerr=False, label="MLPF target, PU masked")
    plt.legend(loc=1)
    plt.yscale("log")
    plt.xscale("log")
    plt.ylim(1, 1e6)
    plt.title(sample, fontsize=12)