In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob
import matplotlib
import os

import mplhep
mplhep.style.use("CMS")

import sys
sys.path += ["../../mlpf/"]
sys.path += ["../../mlpf/plotting/"]
import plot_utils
from plot_utils import med_iqr, sample_label

In [None]:
matplotlib.rcParams['axes.labelsize'] = 35
legend_fontsize = 30
sample_label_fontsize = 30
addtext_fontsize = 25

jet_label_coords = 0.05, 0.82
jet_label_coords_single = 0.05, 0.86
sample_label_coords = 0.05, 0.96

pf_color = "#5790FC"
mlpf_color = "#E42536"

pf_linestyle = "-."
mlpf_linestyle = "-"

In [None]:
def cms_label(ax):
    mplhep.cms.label("", data=False, rlabel='Run 3 (13.6 TeV)')

### Runtimes from CMSSW

In [None]:
def load_runtimes(infile_pattern):

    evs = []
    lumis = []
    modules = []
    producers = []
    times = []
    file_indices = []

    idx_file = 0
    for infile in list(glob.glob(infile_pattern)):
        with open(infile, "r") as fi:

            first = True
            for line in fi.readlines():
                if "TimeModule" in line:
                    if "PFProducer" in line or "PFBlockProducer" in line:
                        spl = line.split()
                        try:
                            ev, lumi, module, producer, time = spl[1], spl[2], spl[3], spl[4], float(spl[5])
                        except Exception as e:
                            print("Could not parse line")
                            print(line)
                            continue
    
                        file_indices.append(idx_file)
                        evs.append(int(ev))
                        lumis.append(int(lumi))
                        modules.append(module)
                        producers.append(producer)
                        times.append(time)
        idx_file += 1
    df = pd.DataFrame()
    df["event"] = evs
    df["file_index"] = file_indices
    df["lumi"] = lumis
    df["module"] = modules
    df["producer"] = producers
    df["time"] = times
    return df

In [None]:
outpath = "cmssw"
os.makedirs(outpath, exist_ok=True)

In [None]:
times_pf = load_runtimes("/mnt/work/particleflow/CMSSW_15_0_5_mlpf_v2.6.0pre1_puppi_2372e2/timing_l4/log_pf_cpu*_par8.txt")
times_mlpf = load_runtimes("/mnt/work/particleflow/CMSSW_15_0_5_mlpf_v2.6.0pre1_puppi_2372e2/timing_l4/log_mlpf_gpu*_par8.txt")

In [None]:
times_pf_sel = times_pf[(times_pf["producer"]=="PFProducer") | (times_pf["producer"]=="PFBlockProducer")]
times_mlpf_sel = times_mlpf[(times_mlpf["producer"]=="MLPFProducer")]

In [None]:
times_pf_event = pd.pivot_table(times_pf_sel, values="time", index=["event", "lumi", "file_index"], columns=["producer"], aggfunc="sum")
times_pf_event_good = times_pf_event[~(np.isnan(times_pf_event["PFBlockProducer"]) | np.isnan(times_pf_event["PFProducer"]))]

In [None]:
times_mlpf_event = pd.pivot_table(times_mlpf_sel, values="time", index=["event", "lumi", "file_index"], columns=["producer"], aggfunc="sum")
times_mlpf_event_good = times_mlpf_event[~(np.isnan(times_mlpf_event["MLPFProducer"]))]

In [None]:
times_joined = times_pf_event_good.merge(times_mlpf_event_good, on=["event", "lumi", "file_index"])

In [None]:
#for each job (file_index), drop events with the 16 slowest MLPF time, because those are related to warmup / initialization
bad_evs = times_joined.groupby(level='file_index')['MLPFProducer'].apply(lambda x: x.sort_values()[-16:])
bad_evs.index = bad_evs.index.droplevel(0)

In [None]:
b = np.linspace(0,0.5,201)
plt.hist(times_joined["MLPFProducer"], bins=b, histtype="step", lw=2);
plt.hist(bad_evs.values, bins=b, histtype="step", lw=2);
plt.yscale("log")

In [None]:
times_joined_drop = times_joined.drop(index=bad_evs.index)

In [None]:
fig = plt.figure(figsize=(12,10))
ax = plt.axes()

b = np.linspace(0, 0.6, 201)
plt.hist2d(
    times_joined_drop["PFBlockProducer"]+times_joined_drop["PFProducer"],
    times_joined_drop["MLPFProducer"],
    bins=b,
    norm=matplotlib.colors.LogNorm(vmin=1),
    cmap="viridis", edgecolor="face"
);
plt.plot([0,0.5],[0,0.5], ls="--", color="black")
plt.xlabel("PF, $t^{CPU}_{ev}$ [s]")
plt.ylabel("MLPF, $t^{GPU}_{ev}$ [s]")
cms_label(ax)
sample_label(ax, "cms_pf_qcd", x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
plt.colorbar(label="Counts")
plt.xlim(0, 0.5)
plt.ylim(0, 0.5)
plt.text(sample_label_coords[0], 0.74, "CMSSW_15_0_5\nnative ONNXRuntime\n8 jobs, 8 threads per job", fontsize=legend_fontsize, transform=ax.transAxes)
plt.savefig("{}/cmssw_runtime_2d.pdf".format(outpath), bbox_inches="tight")

In [None]:
from matplotlib.lines import Line2D  # <--- 1. Import this

fig = plt.figure()
ax = plt.axes()
b = np.linspace(0, 0.5, 81)

# --- PF Plotting ---
x = (times_pf_event_good["PFBlockProducer"]+times_pf_event_good["PFProducer"]).drop(index=bad_evs.index)
m = np.mean(x)
# Save the label to a variable for later use
pf_label = "PF, $\\bar{t}^{CPU}_{ev}$=" +"{:.2f}s".format(m)

plt.hist(
    x,
    bins=b, histtype="step", lw=2, ls=pf_linestyle, color=pf_color
    # label=pf_label  <--- No longer strictly needed inside hist()
)

# --- MLPF Plotting ---
x = times_mlpf_event_good["MLPFProducer"].drop(index=bad_evs.index)
m = np.mean(x)
# Save the label to a variable
mlpf_label = "MLPF, $\\bar{t}^{GPU}_{ev}$=" +"{:.2f}s".format(m)

plt.hist(
    x, bins=b, histtype="step", lw=2, ls=mlpf_linestyle, color=mlpf_color
)

# ... (Your text labels and axis setup remain the same) ...
cms_label(ax)
sample_label(ax, "cms_pf_qcd", x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
plt.text(sample_label_coords[0], 0.74, "CMSSW_15_0_5\nnative ONNXRuntime\n8 jobs, 8 threads per job", fontsize=legend_fontsize, transform=ax.transAxes)

# --- 2. Custom Legend with Lines ---
# Create lines that match your histograms
custom_lines = [
    Line2D([0], [0], color=pf_color, lw=2, ls=pf_linestyle),
    Line2D([0], [0], color=mlpf_color, lw=2, ls=mlpf_linestyle)
]

# Pass the custom lines and the saved labels to the legend
plt.legend(custom_lines, [pf_label, mlpf_label], loc=(0.38, 0.48), fontsize=legend_fontsize)

plt.xlabel("Runtime per event [s]")
plt.ylabel("Counts")
plt.yscale("log")
plt.ylim(1, 1e6)
plt.savefig("{}/cmssw_runtime.pdf".format(outpath))

In [None]:
df_gpu = pd.read_csv("/mnt/work/particleflow/CMSSW_15_0_5_mlpf_v2.6.0pre1_puppi_2372e2/timing_l4/gpu.log", header=None)
df_gpu[0] = [pd.to_datetime(x) for x in df_gpu[0]]
df_gpu[5] = [float(x.split()[0]) for x in df_gpu[5]]
df_gpu[4] = [float(x.split()[0]) for x in df_gpu[4]]

In [None]:
def window(size):
    return np.ones(size)/float(size)

f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [1, 1]}, sharex=True)

plt.sca(a0)
plt.plot(df_gpu[0], df_gpu[5]/1000, marker=".")
plt.ylim(0,24152.9/1000)
plt.ylabel("GPU mem. (GB)")

plt.sca(a1)
plt.plot(df_gpu[0],np.convolve(df_gpu[4],window(60),'same'), ls="", marker=".")
plt.ylim(0,100)
plt.ylabel("GPU util. (%)")
plt.xlabel("job runtime")
a0.text(sample_label_coords[0], 0.60, "Nvidia L4, CMSSW_15_0_5\nnative ONNXRuntime\n8 jobs, 8 threads per job", fontsize=legend_fontsize, transform=a0.transAxes)
plt.savefig("{}/gpu_util.pdf".format(outpath), bbox_inches="tight")