In [None]:
import pickle
import numpy as np
import awkward
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

import uproot
import boost_histogram as bh
import mplhep


In [None]:
CMS_PF_CLASS_NAMES = ["none" "charged hadron", "neutral hadron", "hfem", "hfhad", "photon", "electron", "muon"]

ELEM_LABELS_CMS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
ELEM_NAMES_CMS = ["NONE", "TRACK", "PS1", "PS2", "ECAL", "HCAL", "GSF", "BREM", "HFEM", "HFHAD", "SC", "HO"]

CLASS_LABELS_CMS = [0, 211, 130, 1, 2, 22, 11, 13]
CLASS_NAMES_CMS = ["none", "ch.had", "n.had", "HFEM", "HFHAD", "$\gamma$", "$e^\pm$", "$\mu^\pm$"]

class_names = {k: v for k, v in zip(CLASS_LABELS_CMS, CLASS_NAMES_CMS)}

In [None]:
physics_process = "ttbar" #"ttbar", "qcd"

if physics_process == "qcd":
    data_baseline = awkward.Array(pickle.load(open("/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.0/out.pkl", "rb")))
    data_mlpf = awkward.Array(pickle.load(open("/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.13/out.pkl", "rb")))

    fi1 = uproot.open("/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.0/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root")
    fi2 = uproot.open("/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11843.13/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root")
elif physics_process == "ttbar":
    data_mlpf = awkward.Array(pickle.load(open("/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.13/out.pkl", "rb")))
    data_baseline = awkward.Array(pickle.load(open("/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.0/out.pkl", "rb")))

    fi1 = uproot.open("/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.0/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root")
    fi2 = uproot.open("/home/joosep/reco/mlpf/CMSSW_12_1_0_pre3/11834.13/DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root")

In [None]:
def cms_label(ax, x0=0.01, x1=0.1, x2=0.98, y=0.97):
    plt.figtext(x0, y,'CMS',fontweight='bold', wrap=True, horizontalalignment='left', fontsize=12, transform=ax.transAxes)
    plt.figtext(x1, y,'Simulation Preliminary', style='italic', wrap=True, horizontalalignment='left', fontsize=10, transform=ax.transAxes)
    plt.figtext(x2, y,'Run 3 (14 TeV)',  wrap=False, horizontalalignment='right', fontsize=10, transform=ax.transAxes)
    
def sample_label(ax, physics_process=physics_process, x=0.01, y=0.93):
    plt.text(x, y, physics_process_str[physics_process], ha="left", size=10, transform=ax.transAxes)
    
physics_process_str = {
    "ttbar": "$\mathrm{t}\overline{\mathrm{t}}$ events",
    "singlepi": "single $\pi^{\pm}$ events",
    "qcd": "QCD events",
}

In [None]:
def plot_candidates_pf_vs_mlpf(variable, varname, bins):
    plt.figure(figsize=(12,12))
    ax = plt.axes()

    hists_baseline = []
    hists_mlpf = []
    iplot = 1
    for pid in [13,11,22,1,2,130,211]:
        msk1 = np.abs(data_baseline["particleFlow"]["pdgId"]) == pid
        msk2 = np.abs(data_mlpf["particleFlow"]["pdgId"]) == pid

        d1 = awkward.flatten(data_baseline["particleFlow"][variable][msk1])
        d2 = awkward.flatten(data_mlpf["particleFlow"][variable][msk2])
            
        h1 = bh.Histogram(bh.axis.Variable(bins))
        h1.fill(d1)
        h2 = bh.Histogram(bh.axis.Variable(bins))
        h2.fill(d2)
        
        ax = plt.subplot(3,3,iplot)
        plt.sca(ax)

        mplhep.histplot(h1, histtype="step", lw=2, label="PF");
        mplhep.histplot(h2, histtype="step", lw=2, label="MLPF");
        
        if variable!="eta":
            plt.yscale("log")

        plt.legend(loc="best", frameon=False, title=class_names[pid])
        plt.xlabel(varname)
        plt.ylabel("Number of particles / bin")
        sample_label(ax, x=0.08)

        iplot += 1
        
        hists_baseline.append(h1)
        hists_mlpf.append(h2)
    plt.tight_layout()
    return hists_baseline, hists_mlpf

def plot_candidates_pf_vs_mlpf_single(hists):
    plt.figure(figsize=(7, 7))
    ax = plt.axes()
    v1 = mplhep.histplot([h[bh.rebin(2)] for h in hists[0]], stack=True, label=[class_names[k] for k in [13,11,22,1,2,130,211]], lw=1)
    v2 = mplhep.histplot([h[bh.rebin(2)] for h in hists[1]], stack=True, color=[x.stairs.get_edgecolor() for x in v1][::-1], lw=2, histtype="errorbar")

    legend1 = plt.legend(v1, [x.legend_artist.get_label() for x in v1], loc=(0.60, 0.6), title="PF")
    legend2 = plt.legend(v2, [x.legend_artist.get_label() for x in v1], loc=(0.8, 0.6), title="MLPF")
    plt.gca().add_artist(legend1)
    plt.ylabel("Total number of particles / bin")
    cms_label(ax)
    sample_label(ax)

In [None]:
hists = plot_candidates_pf_vs_mlpf("pt", "PFCandidate $p_T$ [GeV]", np.linspace(0,200,101))
# plt.savefig("candidates_pt_{}.pdf".format(physics_process), bbox_inches="tight")
# plt.savefig("candidates_pt_{}.png".format(physics_process), dpi=400, bbox_inches="tight")

In [None]:
plot_candidates_pf_vs_mlpf_single(hists)
plt.xlabel("PFCandidate $p_T$ [GeV]")
plt.yscale("log")
plt.ylim(top=1e7)

plt.savefig("candidates_pt_single_{}.pdf".format(physics_process), bbox_inches="tight")
plt.savefig("candidates_pt_single_{}.png".format(physics_process), dpi=400, bbox_inches="tight")

In [None]:
hists = plot_candidates_pf_vs_mlpf("eta", "PFCandidate $\eta$", np.linspace(-6, 6,101))
plt.savefig("candidates_eta_{}.pdf".format(physics_process), bbox_inches="tight")
plt.savefig("candidates_eta_{}.png".format(physics_process), dpi=400, bbox_inches="tight")

In [None]:
plot_candidates_pf_vs_mlpf_single(hists)
plt.xlabel("PFCandidate $\eta$")
plt.yscale("log")
plt.ylim(top=1e8)
plt.savefig("candidates_eta_single_{}.pdf".format(physics_process), bbox_inches="tight")
plt.savefig("candidates_ete_single_{}.png".format(physics_process), dpi=400, bbox_inches="tight")

In [None]:
def plot_pf_vs_mlpf_jet(jetcoll, variable, bins):
    plt.figure(figsize=(7,7))
    ax = plt.axes()

    h1 = bh.Histogram(bh.axis.Variable(bins))
    h1.fill(awkward.flatten(data_baseline[jetcoll][variable]))

    h2 = bh.Histogram(bh.axis.Variable(bins))
    h2.fill(awkward.flatten(data_mlpf[jetcoll][variable]))

    mplhep.histplot(h1, histtype="step", lw=2, label="PF");
    mplhep.histplot(h2, histtype="step", lw=2, label="MLPF");
    cms_label(ax)
    sample_label(ax, x=0.02)

    plt.ylabel("Number of jets")
    plt.legend(loc=(0.8, 0.85), frameon=False)

    plt.savefig("ak4jet_puppi_energy_{}.pdf".format(physics_process), bbox_inches="tight")

In [None]:
plot_pf_vs_mlpf_jet("ak4PFJetsCHS", "pt", np.linspace(0,500,61))
plt.yscale("log")
plt.ylim(top=1e5)
plt.xlabel("ak4PFJetsCHS $p_T$ [GeV]")
plt.savefig("ak4jet_chs_pt_{}.pdf".format(physics_process), bbox_inches="tight")

In [None]:
plot_pf_vs_mlpf_jet("ak4PFJetsPuppi", "pt", np.linspace(0,500,61))
plt.yscale("log")
plt.ylim(top=1e5)
plt.xlabel("ak4PFJetsPuppi $p_T$ [GeV]")
plt.savefig("ak4jet_puppi_pt_{}.pdf".format(physics_process), bbox_inches="tight")

In [None]:
plot_pf_vs_mlpf_jet("ak4PFJetsCHS", "eta", np.linspace(-6, 6, 61))
plt.ylim(0,10000)
plt.xlabel("ak4PFJetsCHS $\eta$")
plt.savefig("ak4jet_chs_eta_{}.pdf".format(physics_process), bbox_inches="tight")

In [None]:
plot_pf_vs_mlpf_jet("ak4PFJetsPuppi", "eta", np.linspace(-6, 6, 61))
plt.ylim(0,2000)
plt.xlabel("ak4PFJetsPuppi $\eta$")
plt.savefig("ak4jet_puppi_eta_{}.pdf".format(physics_process), bbox_inches="tight")

In [None]:
plot_pf_vs_mlpf_jet("ak4PFJetsCHS", "energy", np.linspace(0,2500,61))
plt.yscale("log")
plt.ylim(top=1e5)
plt.xlabel("ak4PFJetsCHS $E$ [GeV]")
plt.savefig("ak4jet_chs_energy_{}.pdf".format(physics_process), bbox_inches="tight")

In [None]:
plot_pf_vs_mlpf_jet("ak4PFJetsPuppi", "energy", np.linspace(0,2500,61))
plt.yscale("log")
plt.ylim(top=1e5)
plt.xlabel("ak4PFJetsPuppi $E$ [GeV]")
plt.savefig("ak4jet_puppi_energy_{}.pdf".format(physics_process), bbox_inches="tight")

In [None]:
plt.figure(figsize=(7,7))
ax = plt.axes()

bins = np.linspace(0, 500, 41)

h1 = bh.Histogram(bh.axis.Variable(bins))
h1.fill(awkward.flatten(data_baseline["pfMet"]["pt"]))

h2 = bh.Histogram(bh.axis.Variable(bins))
h2.fill(awkward.flatten(data_mlpf["pfMet"]["pt"]))

mplhep.histplot(h1, histtype="step", lw=2, label="PF");
mplhep.histplot(h2, histtype="step", lw=2, label="MLPF");
plt.yscale("log")
plt.ylim(top=1e3)
cms_label(ax)
sample_label(ax, x=0.02)
plt.xlabel("pfMet $p_T$ [GeV]")
plt.ylabel("Number of events")
plt.legend(loc=(0.8, 0.85), frameon=False)

plt.savefig("pfmet_pt_{}.pdf".format(physics_process), bbox_inches="tight")

In [None]:
plt.figure(figsize=(7,7))
ax = plt.axes()

bins = np.linspace(0, 500, 41)

h1 = bh.Histogram(bh.axis.Variable(bins))
h1.fill(awkward.flatten(data_baseline["pfMetPuppi"]["pt"]))

h2 = bh.Histogram(bh.axis.Variable(bins))
h2.fill(awkward.flatten(data_mlpf["pfMetPuppi"]["pt"]))

mplhep.histplot(h1, histtype="step", lw=2, label="PF");
mplhep.histplot(h2, histtype="step", lw=2, label="MLPF");
plt.yscale("log")
plt.ylim(top=1e3)
cms_label(ax)
sample_label(ax, x=0.02)
plt.xlabel("pfMet PUPPI $p_T$ [GeV]")
plt.ylabel("Number of events")
plt.legend(loc=(0.8, 0.85), frameon=False)

plt.savefig("pfmet_puppi_pt_{}.pdf".format(physics_process), bbox_inches="tight")

In [None]:
timing_output = """
Nelem=1600 mean_time=4.66 ms stddev_time=2.55 ms mem_used=711 MB
Nelem=1920 mean_time=4.74 ms stddev_time=0.52 ms mem_used=711 MB
Nelem=2240 mean_time=5.53 ms stddev_time=0.63 ms mem_used=711 MB
Nelem=2560 mean_time=5.88 ms stddev_time=0.52 ms mem_used=711 MB
Nelem=2880 mean_time=6.22 ms stddev_time=0.63 ms mem_used=745 MB
Nelem=3200 mean_time=6.50 ms stddev_time=0.64 ms mem_used=745 MB
Nelem=3520 mean_time=7.07 ms stddev_time=0.61 ms mem_used=745 MB
Nelem=3840 mean_time=7.53 ms stddev_time=0.68 ms mem_used=745 MB
Nelem=4160 mean_time=7.76 ms stddev_time=0.69 ms mem_used=745 MB
Nelem=4480 mean_time=8.66 ms stddev_time=0.72 ms mem_used=745 MB
Nelem=4800 mean_time=9.00 ms stddev_time=0.57 ms mem_used=745 MB
Nelem=5120 mean_time=9.22 ms stddev_time=0.84 ms mem_used=745 MB
Nelem=5440 mean_time=9.64 ms stddev_time=0.73 ms mem_used=812 MB
Nelem=5760 mean_time=10.39 ms stddev_time=1.06 ms mem_used=812 MB
Nelem=6080 mean_time=10.77 ms stddev_time=0.69 ms mem_used=812 MB
Nelem=6400 mean_time=11.33 ms stddev_time=0.75 ms mem_used=812 MB
Nelem=6720 mean_time=12.19 ms stddev_time=0.77 ms mem_used=812 MB
Nelem=7040 mean_time=12.54 ms stddev_time=0.72 ms mem_used=812 MB
Nelem=7360 mean_time=13.08 ms stddev_time=0.78 ms mem_used=812 MB
Nelem=7680 mean_time=13.71 ms stddev_time=0.81 ms mem_used=812 MB
Nelem=8000 mean_time=14.11 ms stddev_time=0.74 ms mem_used=812 MB
Nelem=8320 mean_time=14.85 ms stddev_time=0.86 ms mem_used=812 MB
Nelem=8640 mean_time=15.36 ms stddev_time=0.79 ms mem_used=812 MB
Nelem=8960 mean_time=16.76 ms stddev_time=1.06 ms mem_used=812 MB
Nelem=9280 mean_time=17.27 ms stddev_time=0.71 ms mem_used=812 MB
Nelem=9600 mean_time=17.97 ms stddev_time=0.85 ms mem_used=812 MB
Nelem=9920 mean_time=18.73 ms stddev_time=0.94 ms mem_used=812 MB
Nelem=10240 mean_time=19.26 ms stddev_time=0.89 ms mem_used=812 MB
Nelem=10560 mean_time=19.91 ms stddev_time=0.90 ms mem_used=946 MB
Nelem=10880 mean_time=20.55 ms stddev_time=0.87 ms mem_used=946 MB
Nelem=11200 mean_time=21.82 ms stddev_time=0.78 ms mem_used=940 MB
Nelem=11520 mean_time=22.48 ms stddev_time=0.75 ms mem_used=940 MB
Nelem=11840 mean_time=23.33 ms stddev_time=0.98 ms mem_used=940 MB
Nelem=12160 mean_time=24.28 ms stddev_time=0.85 ms mem_used=940 MB
Nelem=12480 mean_time=24.85 ms stddev_time=0.67 ms mem_used=940 MB
Nelem=12800 mean_time=25.58 ms stddev_time=0.68 ms mem_used=940 MB
Nelem=13120 mean_time=26.58 ms stddev_time=0.78 ms mem_used=940 MB
Nelem=13440 mean_time=27.15 ms stddev_time=0.63 ms mem_used=940 MB
Nelem=13760 mean_time=27.72 ms stddev_time=0.85 ms mem_used=940 MB
Nelem=14080 mean_time=28.08 ms stddev_time=0.66 ms mem_used=940 MB
Nelem=14400 mean_time=28.70 ms stddev_time=0.73 ms mem_used=940 MB
Nelem=14720 mean_time=29.22 ms stddev_time=0.66 ms mem_used=940 MB
Nelem=15040 mean_time=29.73 ms stddev_time=0.80 ms mem_used=940 MB
Nelem=15360 mean_time=30.71 ms stddev_time=0.85 ms mem_used=940 MB
Nelem=15680 mean_time=31.15 ms stddev_time=0.74 ms mem_used=940 MB
Nelem=16000 mean_time=31.74 ms stddev_time=0.80 ms mem_used=940 MB
Nelem=16320 mean_time=32.27 ms stddev_time=0.77 ms mem_used=940 MB
Nelem=16640 mean_time=33.07 ms stddev_time=1.08 ms mem_used=940 MB
Nelem=16960 mean_time=33.60 ms stddev_time=0.69 ms mem_used=940 MB
Nelem=17280 mean_time=34.43 ms stddev_time=0.64 ms mem_used=940 MB
Nelem=17600 mean_time=35.34 ms stddev_time=0.75 ms mem_used=940 MB
Nelem=17920 mean_time=35.84 ms stddev_time=0.68 ms mem_used=940 MB
Nelem=18240 mean_time=36.51 ms stddev_time=0.85 ms mem_used=940 MB
Nelem=18560 mean_time=37.23 ms stddev_time=0.87 ms mem_used=940 MB
Nelem=18880 mean_time=37.72 ms stddev_time=0.78 ms mem_used=940 MB
Nelem=19200 mean_time=38.33 ms stddev_time=0.87 ms mem_used=940 MB
Nelem=19520 mean_time=38.95 ms stddev_time=0.87 ms mem_used=940 MB
Nelem=19840 mean_time=39.73 ms stddev_time=0.74 ms mem_used=940 MB
Nelem=20160 mean_time=40.27 ms stddev_time=0.81 ms mem_used=940 MB
Nelem=20480 mean_time=40.86 ms stddev_time=0.74 ms mem_used=940 MB
Nelem=20800 mean_time=41.71 ms stddev_time=0.94 ms mem_used=940 MB
Nelem=21120 mean_time=42.35 ms stddev_time=1.38 ms mem_used=1209 MB
Nelem=21440 mean_time=42.91 ms stddev_time=1.18 ms mem_used=1209 MB
Nelem=21760 mean_time=43.40 ms stddev_time=0.98 ms mem_used=1184 MB
Nelem=22080 mean_time=44.43 ms stddev_time=1.04 ms mem_used=1184 MB
Nelem=22400 mean_time=45.22 ms stddev_time=1.02 ms mem_used=1184 MB
Nelem=22720 mean_time=45.57 ms stddev_time=0.94 ms mem_used=1184 MB
Nelem=23040 mean_time=46.21 ms stddev_time=0.86 ms mem_used=1184 MB
Nelem=23360 mean_time=46.85 ms stddev_time=0.95 ms mem_used=1184 MB
Nelem=23680 mean_time=47.52 ms stddev_time=1.57 ms mem_used=1184 MB
Nelem=24000 mean_time=48.31 ms stddev_time=0.74 ms mem_used=1184 MB
Nelem=24320 mean_time=48.92 ms stddev_time=0.75 ms mem_used=1184 MB
Nelem=24640 mean_time=49.70 ms stddev_time=0.92 ms mem_used=1184 MB
Nelem=24960 mean_time=50.26 ms stddev_time=0.93 ms mem_used=1184 MB
Nelem=25280 mean_time=50.98 ms stddev_time=0.89 ms mem_used=1184 MB
"""

In [None]:
time_x = []
time_y = []
time_y_err = []
gpu_mem_use = []
for line in timing_output.split("\n"):
    if len(line)>0:
        spl = line.split()
        time_x.append(int(spl[0].split("=")[1]))
        time_y.append(float(spl[1].split("=")[1]))
        time_y_err.append(float(spl[3].split("=")[1]))
        gpu_mem_use.append(float(spl[5].split("=")[1]))

In [None]:
import glob
nelem = []
for fi in glob.glob("../data/TTbar_14TeV_TuneCUETP8M1_cfi/raw/*.pkl"):
    d = pickle.load(open(fi, "rb"))
    for elem in d:
        X = elem["Xelem"][(elem["Xelem"]["typ"]!=2)&(elem["Xelem"]["typ"]!=3)]
        nelem.append(X.shape[0])

In [None]:
plt.figure(figsize=(7, 7))
ax = plt.axes()
plt.hist(nelem, bins=np.linspace(2000,6000,100));
plt.ylabel("Number of events / bin")
plt.xlabel("PFElements per event")
cms_label(ax)
sample_label(ax, physics_process="ttbar")

In [None]:
plt.figure(figsize=(10, 3))
ax = plt.axes()
plt.errorbar(time_x, time_y, yerr=time_y_err, marker=".", label="MLPF")
plt.axvline(np.mean(nelem)-np.std(nelem), color="black", ls="--", lw=1.0, label=r"$t\bar{t}$+PU Run 3")
plt.axvline(np.mean(nelem)+np.std(nelem), color="black", ls="--", lw=1.0)
#plt.xticks(time_x, time_x);
plt.xlim(0,30000)
plt.ylim(0,100)
plt.ylabel("Average runtime per event [ms]")
plt.xlabel("PFElements per event")
plt.legend(loc=4, frameon=False)
cms_label(ax, y=0.93, x1=0.07, x2=0.99)
plt.text(4000, 20, "typical Run3 range", rotation=90)
plt.text(6000, 70, "Inference with ONNXRuntime in a single CPU thread,\nsingle GPU stream on NVIDIA RTX2060S 8GB.\nNot a production-like setup. Synthetic inputs.")
plt.savefig("runtime_scaling.pdf", bbox_inches="tight")
plt.savefig("runtime_scaling.png", bbox_inches="tight", dpi=300)

In [None]:
plt.figure(figsize=(10, 3))
ax = plt.axes()
plt.plot(time_x, gpu_mem_use, marker=".", label="MLPF")
plt.axvline(np.mean(nelem)-np.std(nelem), color="black", ls="--", lw=1.0, label=r"$t\bar{t}$+PU Run 3")
plt.axvline(np.mean(nelem)+np.std(nelem), color="black", ls="--", lw=1.0)
#plt.xticks(time_x, time_x);
plt.xlim(0,30000)
plt.ylim(0,3000)
plt.ylabel("Maximum GPU memory used [MB]")
plt.xlabel("PFElements per event")
plt.legend(loc=4, frameon=False)
cms_label(ax, y=0.93, x1=0.07, x2=0.99)
plt.text(4000, 500, "typical Run3 range", rotation=90)
plt.text(6000, 2100, "Inference with ONNXRuntime in a single CPU thread,\nsingle GPU stream on NVIDIA RTX2060S 8GB.\nNot a production-like setup. Synthetic inputs.")
plt.savefig("memory_scaling.pdf", bbox_inches="tight")
plt.savefig("memory_scaling.png", bbox_inches="tight", dpi=300)