In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob
import matplotlib
import os

import mplhep
mplhep.style.use("CMS")

import sys
sys.path += ["../../mlpf/"]
sys.path += ["../../mlpf/plotting/"]
from plot_utils import cms_label, med_iqr, sample_label

### Runtimes from CMSSW

In [None]:
def load_runtimes(infile_pattern):

    evs = []
    lumis = []
    modules = []
    producers = []
    times = []
    
    for infile in glob.glob(infile_pattern):
        with open(infile, "r") as fi:

            first = True
            for line in fi.readlines():
                if "TimeModule" in line:
                    if "PFProducer" in line or "PFBlockProducer" in line:
                        spl = line.split()
                        ev, lumi, module, producer, time = spl[1], spl[2], spl[3], spl[4], spl[5]

                        #skip the first MLPF line in each file, because the first inference run is slower due to warmup
                        if "MLPFProducer" in line:
                            if first:
                                first = False
                                continue
    
                        evs.append(ev)
                        lumis.append(lumi)
                        modules.append(module)
                        producers.append(producer)
                        times.append(float(time))
    df = pd.DataFrame()
    df["event"] = evs
    df["lumi"] = lumis
    df["module"] = modules
    df["producer"] = producers
    df["time"] = times
    return df

In [None]:
# folder = "QCD_PU"
# physics_process = "cms_pf_qcd"

folder = "TTbar_PU"
physics_process = "cms_pf_ttbar"

outpath = "cmssw/{}".format(folder)
os.makedirs(outpath, exist_ok=True)

In [None]:
times_pf = load_runtimes("/scratch/persistent/joosep/mlpf/results/cms/CMSSW_15_0_1/{}_pf/slurm-cmssw-el8-gpu.sh*".format(folder))
times_mlpf = load_runtimes("/scratch/persistent/joosep/mlpf/results/cms/CMSSW_15_0_1/{}_mlpf/slurm-cmssw-el8-gpu.sh*".format(folder))

In [None]:
times_pf_sel = times_pf[(times_pf["producer"]=="PFProducer") | (times_pf["producer"]=="PFBlockProducer")]
times_mlpf_sel = times_mlpf[(times_mlpf["producer"]=="MLPFProducer")]

In [None]:
times_pf_event = pd.pivot_table(times_pf_sel, values="time", index=["event", "lumi"], columns=["producer"], aggfunc="sum")
times_pf_event_good = times_pf_event[~(np.isnan(times_pf_event["PFBlockProducer"]) | np.isnan(times_pf_event["PFProducer"]))]

In [None]:
times_mlpf_event = pd.pivot_table(times_mlpf_sel, values="time", index=["event", "lumi"], columns=["producer"], aggfunc="sum")
times_mlpf_event_good = times_mlpf_event[~(np.isnan(times_mlpf_event["MLPFProducer"]))]

In [None]:
times_joined = times_pf_event_good.merge(times_mlpf_event_good, on=["event", "lumi"])

In [None]:
fig = plt.figure(figsize=(12.5,11))
ax = plt.axes()

b = np.linspace(0,2.0,101)
plt.hist2d(
    times_joined["PFBlockProducer"]+times_joined["PFProducer"],
    times_joined["MLPFProducer"],
    bins=b,
    norm=matplotlib.colors.LogNorm(vmin=0.1),
    cmap="turbo"
);
plt.xlabel("PFBlock+PFAlgo runtime on CPU [s]")
plt.ylabel("MLPF runtime on GPU [s]")
cms_label(ax)
sample_label(ax, physics_process)
plt.colorbar()
plt.legend(title="CMSSW_15_0_1\nnative ONNXRuntime\n6 jobs per GPU\n8 threads per job", loc=(0.5, 0.68))
plt.savefig("{}/cmssw_runtime_2d.pdf".format(outpath), bbox_inches="tight")

In [None]:
fig = plt.figure()
ax = plt.axes()
b = np.linspace(0, 2, 201)

m, i = med_iqr(times_pf_event_good["PFBlockProducer"]+times_pf_event_good["PFProducer"])
plt.hist(
    times_pf_event_good["PFBlockProducer"]+times_pf_event_good["PFProducer"],
    bins=b, histtype="step", lw=2, label="PFBlock+PFAlgo on CPU\nM={:.2f}s IQR={:.2f}s".format(m,i));

m, i = med_iqr(times_mlpf_event_good["MLPFProducer"])
plt.hist(
    times_mlpf_event_good["MLPFProducer"], bins=b, histtype="step", lw=2, label="MLPF on GPU\nM={:.2f}s IQR={:.2f}s".format(m,i));
cms_label(ax)
sample_label(ax, physics_process)
plt.legend(title="CMSSW_15_0_1\nnative ONNXRuntime\n6 jobs per GPU\n8 threads per job", loc=(0.45, 0.47))
plt.xlabel("Runtime per event [s]")
plt.yscale("log")
plt.savefig("{}/cmssw_runtime.pdf".format(outpath), bbox_inches="tight")