In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import glob
import tqdm
import awkward as ak
import boost_histogram as bh
import sys
import awkward
import vector
import pickle
from pathlib import Path

import mplhep
mplhep.style.use(mplhep.style.CMS)

import sys
sys.path.append("mlpf")
from plotting.plot_utils import pid_to_text, EVALUATION_DATASET_NAMES, save_img, EXPERIMENT_LABELS, labels, sample_label, med_iqr, compute_jet_ratio

In [None]:
def compute_jet_ratio(data, yvals):
    ret = {}
    # flatten across event dimension
    for match1, match2 in [("gen", "pred"), ("gen", "cand"), ("gen", "target"), ("target", "pred"), ("target", "cand")]:
        for val in ["pt", "eta"]:
            ret[f"jet_{match1}_to_{match2}_{match1}{val}"] = awkward.to_numpy(
                awkward.flatten(
                    yvals[f"jets_{match1}_{val}"][data["matched_jets"][f"{match1}_to_{match2}"][match1]],
                    axis=1,
                )
            )
            ret[f"jet_{match1}_to_{match2}_{match2}{val}"] = awkward.to_numpy(
                awkward.flatten(
                    yvals[f"jets_{match2}_{val}"][data["matched_jets"][f"{match1}_to_{match2}"][match2]],
                    axis=1,
                )
            )
            ret[f"jet_ratio_{match1}_to_{match2}_{val}"] = (
                ret[f"jet_{match1}_to_{match2}_{match2}{val}"] / ret[f"jet_{match1}_to_{match2}_{match1}{val}"]
            )
    return ret


def load_eval_data(path, max_files=None):
    yvals = []
    filenames = []
    print("path", path)

    filelist = list(glob.glob(path))

    if max_files is not None:
        filelist = filelist[:max_files]

    for fi in tqdm.tqdm(filelist):
        dd = awkward.from_parquet(fi)
        yvals.append(dd)
        filenames.append(fi)

    data = awkward.concatenate(yvals, axis=0)
    X = data["inputs"]

    yvals = {}
    for typ in ["target", "cand", "pred"]:
        for k in data["particles"][typ].fields:
            yvals["{}_{}".format(typ, k)] = data["particles"][typ][k]

    for typ in ["target", "cand", "pred"]:
        # Compute phi, px, py, pz
        yvals[typ + "_phi"] = np.arctan2(yvals[typ + "_sin_phi"], yvals[typ + "_cos_phi"])
        yvals[typ + "_px"] = yvals[typ + "_pt"] * yvals[typ + "_cos_phi"]
        yvals[typ + "_py"] = yvals[typ + "_pt"] * yvals[typ + "_sin_phi"]
        yvals[typ + "_pz"] = yvals[typ + "_pt"] * np.sinh(yvals[typ + "_eta"])

    for typ in ["gen", "cand", "pred", "target"]:
        # Get the jet vectors
        jetvec = vector.awk(data["jets"][typ])
        jetvec = awkward.Array(jetvec, with_name="Momentum4D")
        for k in ["pt", "eta", "phi", "energy"]:
            yvals["jets_{}_{}".format(typ, k)] = getattr(jetvec, k)

    for typ in ["target", "cand", "pred"]:
        for val in ["pt", "eta", "sin_phi", "cos_phi", "energy"]:
            yvals["{}_{}".format(typ, val)] = yvals["{}_{}".format(typ, val)] * (yvals["{}_cls_id".format(typ)] != 0)

    yvals.update(compute_jet_ratio(data, yvals))
    yvals["gen_met"] = data["genmet"]
    return yvals, X, filenames

In [None]:
yvals, X, _ = load_eval_data("experiments/cld_evaluation_parquets/*.parquet")

In [None]:
def plot_jet_ratio(
    yvals,
    epoch=None,
    cp_dir=None,
    comet_experiment=None,
    title=None,
    bins=None,
    file_modifier="",
    logy=False,
    dataset=None,
    sample=None,
):
    plt.figure()
    ax = plt.axes()

    if bins is None:
        bins = np.linspace(0, 2, 500)

    ret_dict = {}
    p = med_iqr(yvals["jet_ratio_gen_to_target_pt"])
    ret_dict["jet_ratio_gen_to_target_pt"] = {
        "med": p[0],
        "iqr": p[1],
        "match_frac": awkward.count(yvals["jet_ratio_gen_to_target_pt"]) / awkward.count(yvals["jets_gen_pt"]),
    }
    plt.hist(
        yvals["jet_ratio_gen_to_target_pt"],
        bins=bins,
        histtype="step",
        lw=2,
        label="target $M={:.2f}, IQR={:.2f}, f_m={:.2f}$".format(p[0], p[1], ret_dict["jet_ratio_gen_to_target_pt"]["match_frac"]),
    )

    p = med_iqr(yvals["jet_ratio_gen_to_cand_pt"])
    ret_dict["jet_ratio_gen_to_cand_pt"] = {
        "med": p[0],
        "iqr": p[1],
        "match_frac": awkward.count(yvals["jet_ratio_gen_to_cand_pt"]) / awkward.count(yvals["jets_gen_pt"]),
    }
    plt.hist(
        yvals["jet_ratio_gen_to_cand_pt"],
        bins=bins,
        histtype="step",
        lw=2,
        label="PF $M={:.2f}, IQR={:.2f}, f_m={:.2f}$".format(p[0], p[1], ret_dict["jet_ratio_gen_to_cand_pt"]["match_frac"]),
    )

    plt.xlabel("jet $p_{\\mathrm{T}} / p_{\\mathrm{T,truth}}$")
    plt.ylabel("Matched jets [a.u.]")
    plt.legend(loc="best")

    EXPERIMENT_LABELS[dataset](ax)
    sample_label(ax, sample)

    plt.ticklabel_format(axis="y", style="sci", scilimits=(0, 0))

    ylim = ax.get_ylim()
    ax.set_ylim(ylim[0], 1.2 * ylim[1])

    if logy:
        ax.set_yscale("log")
        ax.set_ylim(10, 10 * ylim[1])

    save_img(
        "jet_res{}.png".format(file_modifier),
        epoch,
        cp_dir=cp_dir,
        comet_experiment=comet_experiment,
    )

In [None]:
def plot_jet_response_binned(yvals, epoch=None, cp_dir=None, comet_experiment=None, title=None, sample=None, dataset=None):
    target_genjet_pt = yvals["jet_gen_to_target_genpt"]
    pf_genjet_pt = yvals["jet_gen_to_cand_genpt"]
    mlpf_genjet_pt = yvals["jet_gen_to_pred_genpt"]

    target_response = yvals["jet_ratio_gen_to_target_pt"]
    pf_response = yvals["jet_ratio_gen_to_cand_pt"]
    mlpf_response = yvals["jet_ratio_gen_to_pred_pt"]

    genjet_bins = [10, 20, 40, 60, 80, 100, 200]

    x_vals = []
    target_vals = []
    pf_vals = []
    mlpf_vals = []

    b = np.linspace(0.5, 1.5, 500)
    for ibin in range(len(genjet_bins) - 1):
        plt.figure()
        ax = plt.axes()
        lim_low = genjet_bins[ibin]
        lim_hi = genjet_bins[ibin + 1]
        x_vals.append(np.mean([lim_low, lim_hi]))

        mask_genjet = (target_genjet_pt > lim_low) & (target_genjet_pt <= lim_hi)
        target_subsample = target_response[mask_genjet]
        if len(target_subsample) > 0:
            target_p25 = np.percentile(target_subsample, 25)
            target_p50 = np.percentile(target_subsample, 50)
            target_p75 = np.percentile(target_subsample, 75)
        else:
            target_p25 = 0
            target_p50 = 0
            target_p75 = 0
        target_vals.append([target_p25, target_p50, target_p75])

        mask_genjet = (pf_genjet_pt > lim_low) & (pf_genjet_pt <= lim_hi)
        pf_subsample = pf_response[mask_genjet]
        if len(pf_subsample) > 0:
            pf_p25 = np.percentile(pf_subsample, 25)
            pf_p50 = np.percentile(pf_subsample, 50)
            pf_p75 = np.percentile(pf_subsample, 75)
        else:
            pf_p25 = 0
            pf_p50 = 0
            pf_p75 = 0
        pf_vals.append([pf_p25, pf_p50, pf_p75])

        mask_genjet = (mlpf_genjet_pt > lim_low) & (mlpf_genjet_pt <= lim_hi)
        mlpf_subsample = mlpf_response[mask_genjet]
        if len(mlpf_subsample) > 0:
            mlpf_p25 = np.percentile(mlpf_subsample, 25)
            mlpf_p50 = np.percentile(mlpf_subsample, 50)
            mlpf_p75 = np.percentile(mlpf_subsample, 75)
        else:
            mlpf_p25 = 0
            mlpf_p50 = 0
            mlpf_p75 = 0
        mlpf_vals.append([mlpf_p25, mlpf_p50, mlpf_p75])

        plt.hist(target_subsample, bins=b, histtype="step", lw=1, label="Target ({:.4f}, {:.4f}, {:.4f})".format(target_p25, target_p50, target_p75))
        plt.hist(pf_subsample, bins=b, histtype="step", lw=1, label="PF ({:.4f}, {:.4f}, {:.4f})".format(pf_p25, pf_p50, pf_p75))

        plt.ylabel("Matched jets / bin")
        plt.xlabel(labels["reco_gen_jet_ratio"])
        plt.axvline(1.0, ymax=0.7, color="black", ls="--")
        plt.legend(loc=1, fontsize=16)
        plt.title(labels["gen_jet_range"].format(lim_low, lim_hi))
        plt.yscale("log")
        save_img(
            "jet_response_binned_{}.png".format(lim_low),
            epoch,
            cp_dir=cp_dir,
            comet_experiment=comet_experiment,
        )

    x_vals = np.array(x_vals)
    target_vals = np.array(target_vals)
    pf_vals = np.array(pf_vals)
    mlpf_vals = np.array(mlpf_vals)

    # Plot median and IQR as a function of gen pt
    plt.figure()
    ax = plt.gca()
    plt.plot(x_vals, target_vals[:, 1], marker="o", label="Target")
    plt.plot(x_vals, pf_vals[:, 1], marker="o", label="PF")
    plt.legend(loc=1, fontsize=16, title=title)
    plt.ylabel("Response median")
    plt.xlabel(labels["gen_jet"])
    plt.tight_layout()
    plt.axhline(1.0, color="black", ls="--", lw=0.5)

    EXPERIMENT_LABELS[dataset](ax)
    sample_label(ax, sample)
    plt.ylim(0.95, 1.05)
    save_img(
        "jet_response_med_pt.png",
        epoch,
        cp_dir=cp_dir,
        comet_experiment=comet_experiment,
    )

    plt.figure()
    ax = plt.gca()
    plt.plot(x_vals, (target_vals[:, 2] - target_vals[:, 0]) / target_vals[:, 1], marker="o", label="Target")
    plt.plot(x_vals, (pf_vals[:, 2] - pf_vals[:, 0]) / pf_vals[:, 1], marker="o", label="PF")
    plt.legend(loc=1, fontsize=16, title=title)
    plt.ylabel("Response IQR / median")
    plt.xlabel(labels["gen_jet"])
    plt.tight_layout()
    plt.ylim(bottom=0)
    EXPERIMENT_LABELS[dataset](ax)
    sample_label(ax, sample)
    plt.ylim(0.0, 0.15)
    save_img(
        "jet_response_iqr_over_med_pt.png",
        epoch,
        cp_dir=cp_dir,
        comet_experiment=comet_experiment,
    )


In [None]:
plot_jet_ratio(yvals, dataset="cld", sample="cld_edm_ttbar_pf", cp_dir=Path("clic-target-plots"), logy=True)

In [None]:
plot_jet_response_binned(yvals, dataset="cld", sample="cld_edm_ttbar_pf", cp_dir=Path("clic-target-plots"))

In [21]:
yvals

{'target_cls_id': <Array [[1, 1, 1, 1, 1, 1, ..., 0, 3, 3, 0, 0], ...] type='5000 * var * int64'>,
 'target_charge': <Array [[0, 2, 2, 2, 0, ..., 1, 1, 1, 1, 1], ...] type='5000 * var * float32'>,
 'target_pt': <Array [[1.37, 8.16, 0.369, ..., 2.16, 0, 0], ...] type='5000 * var * float32'>,
 'target_eta': <Array [[0.00435, 0.801, -2.39, ..., 0, 0], ...] type='5000 * var * float32'>,
 'target_sin_phi': <Array [[-0.0149, -0.799, -0.302, ..., 0, 0], ...] type='5000 * var * float32'>,
 'target_cos_phi': <Array [[-1, -0.602, 0.953, ..., 0.994, 0, 0], ...] type='5000 * var * float32'>,
 'target_energy': <Array [[1.38, 10.9, 2.04, ..., 2.61, 0, 0], ...] type='5000 * var * float32'>,
 'target_ispu': <Array [[0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0], ...] type='5000 * var * float32'>,
 'target_generatorStatus': <Array [[1, 1, 1, 1, 1, ..., 0, 1, 1, 0, 0], ...] type='5000 * var * float32'>,
 'target_simulatorStatus': <Array [[3.52e+08, 8.39e+07, ..., 0, 0], ...] type='5000 * var * float32'>,
 'target_