In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob
import matplotlib
import os

import mplhep
mplhep.style.use("CMS")

import sys
sys.path += ["../../mlpf/"]
sys.path += ["../../mlpf/plotting/"]
import plot_utils
from plot_utils import med_iqr, sample_label

matplotlib.rcParams['axes.labelsize'] = 35
legend_fontsize = 30
sample_label_fontsize = 30
addtext_fontsize = 25

In [None]:
def cms_label(ax):
    return plot_utils.experiment_label(ax, experiment="CMS", tag1="(Preliminary)", tag2="Run 3 (13.6 TeV)", x1=0.12)

### Runtimes from CMSSW

In [None]:
def load_runtimes(infile_pattern):

    evs = []
    lumis = []
    modules = []
    producers = []
    times = []
    file_indices = []

    idx_file = 0
    for infile in list(glob.glob(infile_pattern)):
        with open(infile, "r") as fi:

            first = True
            for line in fi.readlines():
                if "TimeModule" in line:
                    if "PFProducer" in line or "PFBlockProducer" in line:
                        spl = line.split()
                        try:
                            ev, lumi, module, producer, time = spl[1], spl[2], spl[3], spl[4], float(spl[5])
                        except Exception as e:
                            print("Could not parse line")
                            print(line)
                            continue

                        #skip the first MLPF line in each file, because the first inference run is slower due to warmup
                        if "MLPFProducer" in line:
                            if first:
                                first = False
                                continue
    
                        file_indices.append(idx_file)
                        evs.append(ev)
                        lumis.append(lumi)
                        modules.append(module)
                        producers.append(producer)
                        times.append(time)
        idx_file += 1
    df = pd.DataFrame()
    df["event"] = evs
    df["file_index"] = file_indices
    df["lumi"] = lumis
    df["module"] = modules
    df["producer"] = producers
    df["time"] = times
    return df

In [None]:
outpath = "cmssw"
os.makedirs(outpath, exist_ok=True)

In [None]:
times_pf = load_runtimes("/scratch/local/joosep/mlpf/results/cms/CMSSW_15_0_5/logs/pf/JetMET0/slurm-cmssw-el8-gpu.sh*")
times_mlpf = load_runtimes("/scratch/local/joosep/mlpf/results/cms/CMSSW_15_0_5/logs/mlpfpu/JetMET0/slurm-cmssw-el8-gpu.sh*")

In [None]:
times_pf_sel = times_pf[(times_pf["producer"]=="PFProducer") | (times_pf["producer"]=="PFBlockProducer")]
times_mlpf_sel = times_mlpf[(times_mlpf["producer"]=="MLPFProducer")]

In [None]:
times_pf_event = pd.pivot_table(times_pf_sel, values="time", index=["event", "lumi"], columns=["producer"], aggfunc="sum")
times_pf_event_good = times_pf_event[~(np.isnan(times_pf_event["PFBlockProducer"]) | np.isnan(times_pf_event["PFProducer"]))]

In [None]:
times_mlpf_event = pd.pivot_table(times_mlpf_sel, values="time", index=["event", "lumi"], columns=["producer"], aggfunc="sum")
times_mlpf_event_good = times_mlpf_event[~(np.isnan(times_mlpf_event["MLPFProducer"]))]

In [None]:
times_joined = times_pf_event_good.merge(times_mlpf_event_good, on=["event", "lumi"])

In [None]:
times_joined

In [None]:
fig = plt.figure(figsize=(13,12))
ax = plt.axes()

b = np.linspace(0, 0.6, 201)
plt.hist2d(
    times_joined["PFBlockProducer"]+times_joined["PFProducer"],
    times_joined["MLPFProducer"],
    bins=b,
    norm=matplotlib.colors.LogNorm(vmin=0.1),
    cmap="turbo"
);
plt.xlabel("PFBlock+PFAlgo runtime on CPU [s]")
plt.ylabel("MLPF runtime on GPU [s]")
cms_label(ax)
plt.colorbar()
plt.xlim(0, 0.6)
plt.ylim(0, 0.6)
plt.text(0.48, 0.77, "CMSSW_15_0_5\nnative ONNXRuntime\n6 jobs per GPU\n8 threads per job", fontsize=legend_fontsize, transform=ax.transAxes)
plt.savefig("{}/cmssw_runtime_2d.pdf".format(outpath), bbox_inches="tight")

In [None]:
fig = plt.figure()
ax = plt.axes()
b = np.linspace(0, 2, 201)

plt.plot([], [])
m, i = med_iqr(times_pf_event_good["PFBlockProducer"]+times_pf_event_good["PFProducer"])
plt.hist(
    times_pf_event_good["PFBlockProducer"]+times_pf_event_good["PFProducer"],
    bins=b, histtype="step", lw=2, label="PFBlock+PFAlgo on CPU\nM={:.2f}s IQR={:.2f}s".format(m,i));

m, i = med_iqr(times_mlpf_event_good["MLPFProducer"])
plt.hist(
    times_mlpf_event_good["MLPFProducer"], bins=b, histtype="step", lw=2, label="MLPF on GPU\nM={:.2f}s IQR={:.2f}s".format(m,i));
cms_label(ax)
# sample_label(ax, physics_process)
plt.legend(title="CMSSW_15_0_5\nnative ONNXRuntime\n6 jobs per GPU\n8 threads per job", loc="best", fontsize=legend_fontsize)
plt.xlabel("Runtime per event [s]")
plt.ylabel("Events")
plt.yscale("log")
plt.ylim(1, 1e6)
plt.savefig("{}/cmssw_runtime.pdf".format(outpath), bbox_inches="tight")