In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob
import matplotlib

import mplhep
mplhep.style.use("CMS")

import sys
sys.path += ["../../mlpf/"]
sys.path += ["../../mlpf/plotting/"]
from plot_utils import cms_label

In [None]:
s_fused_bs2 = """
timing/gpu_fp32_fused_bs2.txt:Nelem=2560 mean_time=20.38 ms stddev_time=0.08 ms mem_used=1268 MB
timing/gpu_fp32_fused_bs2.txt:Nelem=5120 mean_time=49.49 ms stddev_time=0.06 ms mem_used=1537 MB
timing/gpu_fp32_fused_bs2.txt:Nelem=10240 mean_time=135.40 ms stddev_time=0.07 ms mem_used=2074 MB
"""

s_fused_bs4 = """
timing/gpu_fp32_fused_bs4.txt:Nelem=2560 mean_time=19.93 ms stddev_time=0.03 ms mem_used=1537 MB
timing/gpu_fp32_fused_bs4.txt:Nelem=5120 mean_time=48.39 ms stddev_time=0.04 ms mem_used=2074 MB
timing/gpu_fp32_fused_bs4.txt:Nelem=10240 mean_time=134.42 ms stddev_time=0.06 ms mem_used=3147 MB
"""

s_fused_bs1 = """
timing/gpu_fp32_fused.txt:Nelem=2560 mean_time=18.99 ms stddev_time=0.10 ms mem_used=1134 MB
timing/gpu_fp32_fused.txt:Nelem=5120 mean_time=50.43 ms stddev_time=0.10 ms mem_used=1268 MB
timing/gpu_fp32_fused.txt:Nelem=10240 mean_time=137.60 ms stddev_time=0.14 ms mem_used=1537 MB
"""

s_unfused = """
timing/gpu_fp32_unfused.txt:Nelem=2560 mean_time=79.48 ms stddev_time=0.35 ms mem_used=2208 MB
timing/gpu_fp32_unfused.txt:Nelem=5120 mean_time=289.87 ms stddev_time=1.72 ms mem_used=6503 MB
"""

In [None]:
def parse_str(s):
    data = pd.DataFrame()
    Nelem = []
    mean_time = []
    stddev_time = []
    mem_used = []
    for line in s.strip().split("\n"):
        elems = line.split(":")[1].split()
        print(elems)
        Nelem.append(int(elems[0].split("=")[1]))
        mean_time.append(float(elems[1].split("=")[1]))
        stddev_time.append(float(elems[3].split("=")[1]))
        mem_used.append(float(elems[5].split("=")[1]))
    data["Nelem"] = Nelem
    data["mean_time"] = mean_time
    data["stddev_time"] = stddev_time
    data["mem_used"] = mem_used
    return data

In [None]:
data_fused_bs1 = parse_str(s_fused_bs1)
data_fused_bs2 = parse_str(s_fused_bs2)
data_fused_bs4 = parse_str(s_fused_bs4)
data_unfused = parse_str(s_unfused)

In [None]:
plt.figure()
ax = plt.axes()
plt.errorbar(data_unfused["Nelem"], data_unfused["mean_time"], yerr=data_unfused["stddev_time"], marker="o", label="unfused")
plt.errorbar(data_fused_bs1["Nelem"], data_fused_bs1["mean_time"], yerr=data_fused_bs1["stddev_time"], marker="^", label="fused, bs1")
plt.errorbar(data_fused_bs4["Nelem"], data_fused_bs4["mean_time"], yerr=data_fused_bs4["stddev_time"], marker="v", label="fused, bs4")
plt.xticks(data_fused_bs1["Nelem"])
plt.ylabel("Runtime per event [ms]")
plt.xlabel("Tracks/clusters per event")
plt.legend(loc=1, title="MLPF-2x3x256\nONNX backend\nA100-1g.10gb")
plt.ylim(0,400)
cms_label(ax)
plt.savefig("runtime.pdf")

In [None]:
plt.figure()
ax = plt.axes()
plt.errorbar(data_unfused["Nelem"], data_unfused["mem_used"], marker="o", label="unfused")
plt.errorbar(data_fused_bs1["Nelem"], data_fused_bs1["mem_used"], marker="^", label="fused, bs1")
plt.errorbar(data_fused_bs4["Nelem"], data_fused_bs4["mem_used"]/4, marker="v", label="fused, bs4")
plt.xticks(data_fused_bs1["Nelem"])
plt.ylabel("GPU memory per event [MB]")
plt.xlabel("Tracks/clusters per event")
plt.axhline(10200, color="red", ls="--", label="GPU limit")
#plt.yscale("log")
#ytick = [256,512,1024,2048,4096,8192]
#plt.yticks(ytick, ytick)
plt.legend(loc=1, title="MLPF-2x3x256\nONNX backend\nA100-1g.10gb")
cms_label(ax)
plt.savefig("memory.pdf")

### Runtimes from CMSSW

In [None]:
def load_runtimes(infile_pattern):

    evs = []
    lumis = []
    modules = []
    producers = []
    times = []
    
    for infile in glob.glob(infile_pattern):
        with open(infile, "r") as fi:
            
            for line in fi.readlines():
                if "TimeModule" in line:
                    if "PFProducer" in line or "PFBlockProducer" in line:
                        spl = line.split()
                        ev, lumi, module, producer, time = spl[1], spl[2], spl[3], spl[4], spl[5]
                        evs.append(ev)
                        lumis.append(lumi)
                        modules.append(module)
                        producers.append(producer)
                        times.append(float(time))
    df = pd.DataFrame()
    df["event"] = evs
    df["lumi"] = lumis
    df["module"] = modules
    df["producer"] = producers
    df["time"] = times
    return df

In [None]:
times_pf = load_runtimes("/scratch/persistent/joosep/mlpf/results/cms/CMSSW_15_0_1/*_pf/slurm-cmssw-el8-gpu.sh*")
times_mlpf = load_runtimes("/scratch/persistent/joosep/mlpf/results/cms/CMSSW_15_0_1/*_mlpf/slurm-cmssw-el8-gpu.sh*")

In [None]:
times_pf_sel = times_pf[(times_pf["producer"]=="PFProducer") | (times_pf["producer"]=="PFBlockProducer")]
times_mlpf_sel = times_mlpf[(times_mlpf["producer"]=="MLPFProducer")]

In [None]:
times_pf_event = pd.pivot_table(times_pf_sel, values="time", index=["event", "lumi"], columns=["producer"], aggfunc="sum")
times_pf_event_good = times_pf_event[~(np.isnan(times_pf_event["PFBlockProducer"]) | np.isnan(times_pf_event["PFProducer"]))]

In [None]:
times_mlpf_event = pd.pivot_table(times_mlpf_sel, values="time", index=["event", "lumi"], columns=["producer"], aggfunc="sum")
times_mlpf_event_good = times_mlpf_event[~(np.isnan(times_mlpf_event["MLPFProducer"]))]

In [None]:
times_joined = times_pf_event_good.merge(times_mlpf_event_good, on=["event", "lumi"])

In [None]:
fig = plt.figure(figsize=(13,12))
ax = plt.axes()

b = np.linspace(0,0.5,61)
plt.hist2d(
    times_joined["PFBlockProducer"]+times_joined["PFProducer"],
    times_joined["MLPFProducer"],
    bins=b,
    norm=matplotlib.colors.LogNorm(vmin=1),
    cmap="turbo"
);
plt.xlabel("PFBlock+PFAlgo runtime on CPU [s]")
plt.ylabel("MLPF runtime on GPU [s]")
cms_label(ax)
plt.colorbar()

In [None]:
fig = plt.figure()
ax = plt.axes()
b = np.linspace(0, 1.0, 61)
plt.hist(times_pf_event_good["PFBlockProducer"]+times_pf_event_good["PFProducer"], bins=b, histtype="step", lw=2, label="PFBlock+PFAlgo on CPU");
plt.hist(times_mlpf_event_good["MLPFProducer"], bins=b, histtype="step", lw=2, label="MLPF on GPU");
plt.legend(title="CMSSW_15_0_1\nnative ONNXRuntime\n8 streams")
cms_label(ax)
plt.xlabel("Runtime per event [s]")

In [None]:
import uproot
import awkward

In [None]:
ev_pf_cpu = uproot.open("/local/joosep/mlpf/results/cms/CMSSW_15_0_1/timing/ttbar_pu_cpu/step3_NANO_jme.root")["Events"]
arrs_pf_cpu = ev_pf_cpu.arrays(["Jet_pt"])

In [None]:
ev_mlpf_gpu = uproot.open("/local/joosep/mlpf/results/cms/CMSSW_15_0_1/timing/ttbar_pu_gpu/step3_NANO_jme.root")["Events"]
arrs_mlpf_gpu = ev_mlpf_gpu.arrays(["Jet_pt"])

In [None]:
b = np.linspace(10,100,100)
plt.hist(awkward.flatten(arrs_pf_cpu["Jet_pt"]), bins=b, histtype="step", lw=1);
plt.hist(awkward.flatten(arrs_mlpf_gpu["Jet_pt"]), bins=b, histtype="step", lw=1);