In [None]:
import pickle
import numpy as np
import awkward
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

import uproot
import boost_histogram as bh
import mplhep
import glob
import os
import vector
import shutil

mplhep.style.use("CMS")

In [None]:
import sys

sys.path += ["../../mlpf/plotting/"]
sys.path += ["../../mlpf/"]

import plot_utils
import jet_utils

In [None]:
def cms_label(ax):
    return plot_utils.experiment_label(ax, experiment="CMS", tag1="(Private Work)", tag2="Run 3 (13.6 TeV)", x1=0.12)

In [None]:
tt = uproot.open("/local/joosep/mlpf/results/cms/CMSSW_15_0_1/JetMET0_pf/step4_NANO_24.root").get("Events")
for k in tt.keys():
    if "PV" in k:
        print(k)

In [None]:
def load_nano(fn):
    print(fn)
    tt = uproot.open(fn).get("Events")
    ret = {}
    for k in [
        "PFMET_pt",
        "Jet_pt",
        "Jet_eta",
        "Jet_phi",
        "Jet_mass",
        "HLT_DiPFJetAve40",
        "HLT_DiPFJetAve80",
        "HLT_PFJet40",
        "HLT_PFJet80",
        "PV_npvsGood"
    ]:
        ret[k] = tt.arrays(k)[k]
    return [ret, ]

In [None]:
pf_files = glob.glob("/local/joosep/mlpf/results/cms/CMSSW_15_0_1/JetMET0_pf/step4_NANO_*.root")
mlpf_new_files = glob.glob("/local/joosep/mlpf/results/cms/CMSSW_15_0_1/JetMET0_mlpf/step4_NANO_*.root")

pf_files_d = {os.path.basename(fn): fn for fn in pf_files}
mlpf_new_files_d = {os.path.basename(fn): fn for fn in mlpf_new_files}

common_files = list(set(pf_files_d.keys()).intersection(set(mlpf_new_files_d.keys())))

In [None]:
data_baseline = awkward.Array(sum([load_nano(pf_files_d[fn]) for fn in common_files], []))
data_mlpf = awkward.Array(sum([load_nano(mlpf_new_files_d[fn]) for fn in common_files], []))

data_baseline = awkward.Array({k: awkward.flatten(data_baseline[k], axis=1) for k in data_baseline.fields})
data_mlpf = awkward.Array({k: awkward.flatten(data_mlpf[k], axis=1) for k in data_mlpf.fields})

In [None]:
fig = plt.figure()
ax = plt.axes()

bins = np.linspace(0, 20, 21)

plt.hist(
    awkward.num(data_baseline["Jet_pt"], axis=1),
    bins=bins, histtype="step", label="PF"
);

plt.hist(
    awkward.num(data_mlpf["Jet_pt"], axis=1),
    bins=bins, histtype="step", label="MLPF"
);
# plt.hist(
#     awkward.flatten(awkward.Array(data_mlpf[0])["Jet_pt"]),
#     bins=np.linspace(10,100,21), histtype="step", label="MLPF"
# );

plt.yscale("log")
cms_label(ax)
plt.text(0.02, 0.98, "JetMET0, Run2024B, NANO", transform=ax.transAxes, va="top")
plt.ylim(1, 1e6)
plt.legend()
plt.xlabel("Number of jets")

In [None]:
fig = plt.figure()
ax = plt.axes()

bins = np.linspace(0, 100, 101)

plt.hist(
    data_baseline["PV_npvsGood"],
    bins=bins, histtype="step", label="PF"
);

plt.hist(
    data_mlpf["PV_npvsGood"],
    bins=bins, histtype="step", label="MLPF"
);
# plt.hist(
#     awkward.flatten(awkward.Array(data_mlpf[0])["Jet_pt"]),
#     bins=np.linspace(10,100,21), histtype="step", label="MLPF"
# );

plt.yscale("log")
cms_label(ax)
plt.text(0.02, 0.98, "JetMET0, Run2024B, NANO", transform=ax.transAxes, va="top")
plt.ylim(1, 1e6)
plt.legend()
plt.xlabel("Number of good PVs")

In [None]:
def get_jet_pt(data):
    high_pt_jets = (data["Jet_pt"]>20) & (np.abs(data["Jet_eta"])<2.5)
    two_good_jets = awkward.sum(high_pt_jets, axis=1)>1

    # Reject events with additional jets with pt > 5 GeV
    # First identify all jets with pt > 5 GeV
    low_pt_jets = (data["Jet_pt"] > 5)
    # Count how many such jets in each event
    n_low_pt_jets = awkward.sum(low_pt_jets, axis=1)
    # Keep only events with exactly 2 jets above 5 GeV
    exactly_two_jets_mask = (n_low_pt_jets == 2)

    mask_2_jets = two_good_jets & exactly_two_jets_mask
    evs_2_jets = data[mask_2_jets]
    njets = np.arange(len(evs_2_jets["Jet_pt"]))

    jet_indices = awkward.argsort(evs_2_jets["Jet_pt"], axis=1, ascending=False)
    leading_jet = jet_indices[:, 0]
    subleading_jet = jet_indices[:, 1]

    leading_jet_pt = evs_2_jets["Jet_pt"][njets, leading_jet]
    subleading_jet_pt = evs_2_jets["Jet_pt"][njets, subleading_jet]
    leading_jet_eta = evs_2_jets["Jet_eta"][njets, leading_jet]
    subleading_jet_eta = evs_2_jets["Jet_eta"][njets, subleading_jet]
    leading_jet_phi = evs_2_jets["Jet_phi"][njets, leading_jet]
    subleading_jet_phi = evs_2_jets["Jet_phi"][njets, subleading_jet]
    leading_jet_mass = evs_2_jets["Jet_mass"][njets, leading_jet]
    subleading_jet_mass = evs_2_jets["Jet_mass"][njets, subleading_jet]
    
    delta_phi = abs(leading_jet_phi - subleading_jet_phi)
    delta_phi = np.minimum(delta_phi, 2*np.pi - delta_phi)
    back_to_back_mask = delta_phi > 2.7

    # Apply back-to-back mask to all variables
    delta_phi = delta_phi[back_to_back_mask]
    leading_jet_pt = leading_jet_pt[back_to_back_mask]
    subleading_jet_pt = subleading_jet_pt[back_to_back_mask]
    leading_jet_eta = leading_jet_eta[back_to_back_mask]
    subleading_jet_eta = subleading_jet_eta[back_to_back_mask]
    leading_jet_phi = leading_jet_phi[back_to_back_mask]
    subleading_jet_phi = subleading_jet_phi[back_to_back_mask]
    leading_jet_mass = leading_jet_mass[back_to_back_mask]
    subleading_jet_mass = subleading_jet_mass[back_to_back_mask]
    
    # Calculate four-momentum components for both jets
    # Energy components
    leading_energy = np.sqrt(leading_jet_pt**2 * np.cosh(leading_jet_eta)**2 + leading_jet_mass**2)
    subleading_energy = np.sqrt(subleading_jet_pt**2 * np.cosh(subleading_jet_eta)**2 + subleading_jet_mass**2)
    
    # Momentum components
    leading_px = leading_jet_pt * np.cos(leading_jet_phi)
    leading_py = leading_jet_pt * np.sin(leading_jet_phi)
    leading_pz = leading_jet_pt * np.sinh(leading_jet_eta)
    
    subleading_px = subleading_jet_pt * np.cos(subleading_jet_phi)
    subleading_py = subleading_jet_pt * np.sin(subleading_jet_phi)
    subleading_pz = subleading_jet_pt * np.sinh(subleading_jet_eta)
    
    # Calculate invariant mass using the full 4-vector formula
    dijet_mass = np.sqrt((leading_energy + subleading_energy)**2 - 
                        (leading_px + subleading_px)**2 - 
                        (leading_py + subleading_py)**2 -
                        (leading_pz + subleading_pz)**2)

    return mask_2_jets, leading_jet_pt, subleading_jet_pt, dijet_mass

In [None]:
evmask_pf, lj_pt_pf, slj_pt_pf, dijet_mass_pf = get_jet_pt(data_baseline)
evmask_mlpf, lj_pt_mlpf, slj_pt_mlpf, dijet_mass_mlpf = get_jet_pt(data_mlpf)

In [None]:
event_label = "JetMET0, Run2024B, NANO,\ndijet events, $p_T>20$ GeV"

In [None]:
fig = plt.figure()
ax = plt.axes()

bins = np.linspace(0, 200, 101)

plt.hist(
    data_baseline["PFMET_pt"][evmask_pf],
    bins=bins, histtype="step", label="PF"
);

plt.hist(
    data_mlpf["PFMET_pt"][evmask_mlpf],
    bins=bins, histtype="step", label="MLPF"
);
# plt.hist(
#     awkward.flatten(awkward.Array(data_mlpf[0])["Jet_pt"]),
#     bins=np.linspace(10,100,21), histtype="step", label="MLPF"
# );

plt.yscale("log")
cms_label(ax)
plt.text(0.02, 0.98, event_label, transform=ax.transAxes, va="top")
plt.ylim(1, 1e5)
plt.legend()
plt.xlabel("MET [GeV]")

In [None]:
fig = plt.figure()
ax = plt.axes()

bins = np.linspace(0, 400, 201)

plt.hist(
    lj_pt_pf,
    bins=bins, histtype="step", label="PF"
);

plt.hist(
    lj_pt_mlpf,
    bins=bins, histtype="step", label="MLPF"
);
# plt.hist(
#     awkward.flatten(awkward.Array(data_mlpf[0])["Jet_pt"]),
#     bins=np.linspace(10,100,21), histtype="step", label="MLPF"
# );

plt.yscale("log")
cms_label(ax)
plt.text(0.02, 0.98, event_label, transform=ax.transAxes, va="top")
plt.ylim(1, 1e5)
plt.legend()
plt.xlabel("Leading jet $p_T$ [GeV]")

In [None]:
fig = plt.figure()
ax = plt.axes()

bins = np.linspace(0, 400, 201)

plt.hist(
    slj_pt_pf,
    bins=bins, histtype="step", label="PF"
);

plt.hist(
    slj_pt_mlpf,
    bins=bins, histtype="step", label="MLPF"
);
# plt.hist(
#     awkward.flatten(awkward.Array(data_mlpf[0])["Jet_pt"]),
#     bins=np.linspace(10,100,21), histtype="step", label="MLPF"
# );

plt.yscale("log")
cms_label(ax)
plt.text(0.02, 0.98, event_label, transform=ax.transAxes, va="top")
plt.ylim(1, 1e5)
plt.legend()
plt.xlabel("Subleading jet $p_T$ [GeV]")

In [None]:
fig = plt.figure()
ax = plt.axes()

bins = np.linspace(0, 2000, 201)

plt.hist(
    dijet_mass_pf,
    bins=bins, histtype="step", label="PF"
);

plt.hist(
    dijet_mass_mlpf,
    bins=bins, histtype="step", label="MLPF"
);
# plt.hist(
#     awkward.flatten(awkward.Array(data_mlpf[0])["Jet_pt"]),
#     bins=np.linspace(10,100,21), histtype="step", label="MLPF"
# );

plt.yscale("log")
cms_label(ax)
plt.text(0.02, 0.98, event_label, transform=ax.transAxes, va="top")
plt.ylim(1, 1e5)
plt.legend()
plt.xlabel("Dijet mass [GeV]")