In [None]:
import sklearn
import sklearn.metrics
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas
import mplhep
import pickle
import awkward
import awkward as ak
import glob
import bz2
import os
import tqdm
import fastjet
import vector
import uproot
import numba
vector.register_awkward()
mplhep.style.use("CMS")

import sys
sys.path += ["../../mlpf/"]

import jet_utils
sys.path += ["../../mlpf/plotting/"]

from plot_utils import ELEM_LABELS_CMS, ELEM_NAMES_CMS
from plot_utils import CLASS_LABELS_CMS, CLASS_NAMES_CMS
from plot_utils import cms_label, sample_label
from plot_utils import pid_to_text

import boost_histogram as bh

In [None]:
colors = [
    '#0072B2',  # Blue
    '#D55E00',  # Vermillion (Reddish-Orange)
    '#F0E442',  # Yellow
    '#56B4E9',  # Sky Blue
    '#CC79A7'   # Reddish-Purple
]

matplotlib.rcParams['axes.labelsize'] = 35
legend_fontsize = 30
sample_label_fontsize = 30
addtext_fontsize = 25

jet_label_coords = 0.02, 0.82
jet_label_coords_single = 0.02, 0.86
sample_label_coords = 0.02, 0.96

In [None]:
def load_tree(ttree):
    particles_pythia = ttree.arrays(["gen_pt", "gen_eta", "gen_phi", "gen_energy", "gen_pdgid", "gen_status", "gen_charge", "gen_daughters"])
    particles_cp = ttree.arrays(["caloparticle_pt", "caloparticle_eta", "caloparticle_phi", "caloparticle_energy", "caloparticle_pid", "caloparticle_charge"])
    particles_pf = ttree.arrays(["pfcandidate_pt", "pfcandidate_eta", "pfcandidate_phi", "pfcandidate_energy", "pfcandidate_pdgid"])
    genjet = ttree.arrays(["genjet_pt", "genjet_eta", "genjet_phi", "genjet_energy"])
    element = ttree.arrays(["element_eta", "element_type"])
    genmet = ttree.arrays(["genmet_pt"])
    return awkward.Array({"pythia": particles_pythia, "cp": particles_cp, "pf": particles_pf, "genjet": genjet, "genmet": genmet, "element": element})

def to_bh(data, bins, cumulative=False):
    h1 = bh.Histogram(bh.axis.Variable(bins))
    h1.fill(data)
    if cumulative:
        h1[:] = np.sum(h1.values()) - np.cumsum(h1)
    # h1[:] = sum_overflow_into_last_bin(h1.values(flow=True)[:])
    return h1

@numba.njit
def deltaphi(phi1, phi2):
    diff = phi1 - phi2
    return np.arctan2(np.sin(diff), np.cos(diff))

@numba.njit
def deltar(eta1, phi1, eta2, phi2):
    deta = eta1 - eta2
    dphi = deltaphi(phi1, phi2)
    return np.sqrt(deta**2 + dphi**2)

@numba.njit
def match_particles(eta1, eta2, phi1, phi2, deltaR_cut):
    nev = len(eta1)
    ptcl_inds_1_ev = []
    ptcl_inds_2_ev = []
    best_drs_ev = []
    for iev in range(nev):
        ptcl_inds_1 = []
        ptcl_inds_2 = []
        best_drs = []

        # loop over the first collection
        pfs_used = np.zeros(len(eta2[iev]))
        for ip1 in range(len(eta1[iev])):
            # compute deltaR from this particle to all particles in the other collection
            drs = 999*np.ones(len(eta2[iev]), dtype=np.float64)

            # loop over the second collection
            for ip2 in range(len(eta2[iev])):
                if pfs_used[ip2]==1:
                    continue
                _eta1 = eta1[iev][ip1]
                _eta2 = eta2[iev][ip2]
                _phi1 = phi1[iev][ip1]
                _phi2 = phi2[iev][ip2]

                dr = deltar(_eta1, _phi1, _eta2, _phi2)
                drs[ip2] = dr

            if len(drs) > 0:
                # find closest match to this particle
                min_idx_dr = np.argmin(drs)

                # has to be closer than the deltaR_cut
                if drs[min_idx_dr] < deltaR_cut:
                    ptcl_inds_1.append(ip1)
                    ptcl_inds_2.append(min_idx_dr)
                    best_drs.append(drs[min_idx_dr])
                    pfs_used[min_idx_dr] = 1
                
        ptcl_inds_1_ev.append(ptcl_inds_1)
        ptcl_inds_2_ev.append(ptcl_inds_2)
        best_drs_ev.append(best_drs)
    return ptcl_inds_1_ev, ptcl_inds_2_ev, best_drs_ev

def compute_relative_isolation(vecs, dr=0.4):
    """
    Compute relative isolation per particle:
    RelIso = sum_pt_neighbors_within_dR / pt_particle

    Parameters:
    - vecs: awkward array with Momentum4D behavior
    - dr: deltaR threshold to consider neighbors

    Returns:
    - RelIso: awkward array of relative isolation values, same shape as vecs
    """
    # Pair each particle with all others in the same event
    pairs = ak.cartesian([vecs, vecs], axis=1, nested=True)
    cand, neighbor = ak.unzip(pairs)

    # Compute deltaR between candidate and neighbors
    delta_r = cand.deltaR(neighbor)

    # Mask out self-comparisons
    is_not_self = delta_r > 0
    is_neighbor = (delta_r < dr) & is_not_self

    # Sum neighbor pt within cone if pdgid is ch.had/n.had/photon
    msk = is_neighbor #& ( (neighbor.pdgId==211) | (neighbor.pdgId==130) | (neighbor.pdgId==22) )
    neighbor_pt = ak.where(msk, neighbor.pt, 0)    
    sum_neighbor_pt = ak.sum(neighbor_pt, axis=-1)

    # Candidate pt (same shape as sum_neighbor_pt)
    cand_pt = cand.pt[:, :, 0]  # broadcasted first particle in each pair row

    # Compute isolation
    RelIso = sum_neighbor_pt / cand_pt

    return RelIso

def midpoints(x):
    return (x[1:] + x[:-1]) / 2

In [None]:
rootfiles = sorted(list(glob.glob("/local/joosep/mlpf/cms//20250508_cmssw_15_0_5_d3c6d1/nopu/QCDForPF_14TeV_TuneCUETP8M1_cfi/root/pfntuple*.root")))[:100]
pklfiles = [r.replace("/root/", "/raw/").replace(".root", ".pkl.bz2") for r in rootfiles]

In [None]:
len(rootfiles), len(pklfiles)

In [None]:
#Download from https://jpata.web.cern.ch/jpata/mlpf/cms/20240823_simcluster/nopu/
tts = [
    load_tree(uproot.open(fn)["pfana/pftree"]) for fn in tqdm.tqdm(rootfiles)
]
tts = awkward.concatenate(tts, axis=0)

In [None]:
pickle_data = sum(
    [pickle.load(bz2.BZ2File(fn)) for fn in tqdm.tqdm(pklfiles)],
    [],
)

In [None]:
def remap_pid(pdgid, charge):
    pid = np.abs(np.asarray(ak.flatten(pdgid)))
    pa = ak.flatten(np.abs(pdgid))
    pc = ak.flatten(np.abs(charge))
    # pid[(pa!=11) & (pa!=13) & (pa!=22) & (pa!=12) & (pa!=14) & (pa!=16) & (pa!=511) & (pa!=513) & (pa!=521) & (pa!=531) & (pa!=311) & (pa!=411) & (pa!=421) & (pa!=2122) & (pa!=3122) & (pa!=4122) & (pa!=5122) & (pc==1)] = 211
    # pid[(pa!=11) & (pa!=13) & (pa!=22) & (pa!=12) & (pa!=14) & (pa!=16) & (pa!=511) & (pa!=513) & (pa!=521) & (pa!=531) & (pa!=311) & (pa!=411) & (pa!=421) & (pa!=2122) & (pa!=3122) & (pa!=4122) & (pa!=5122) & (pc==0)] = 130
    pid[(pa!=11) & (pa!=13) & (pa!=22) & (pa!=12) & (pa!=14) & (pa!=16) & (pc==1)] = 211
    pid[(pa!=11) & (pa!=13) & (pa!=22) & (pa!=12) & (pa!=14) & (pa!=16) & (pc==0)] = 130
    return ak.unflatten(pid, ak.count(pdgid, axis=1))

In [None]:
for i in range(len(pickle_data)):
    for coll in ["ytarget", "ycand"]:
        pickle_data[i][coll] = pandas.DataFrame(pickle_data[i][coll])
        pickle_data[i][coll]["phi"] = np.arctan2(pickle_data[i][coll]["sin_phi"], pickle_data[i][coll]["cos_phi"])

arrs_awk = {}
arrs_flat = {}
for coll in ["ytarget", "ycand"]:
    arrs_awk[coll] = {}
    arrs_flat[coll] = {}
    for feat in ["pid", "pt", "eta", "phi", "energy", "ispu"]:
        dt = np.float64
        if feat == "pid":
            dt = np.int64
        arr = [np.array(p[coll][feat][p[coll]["pid"] != 0], dtype=dt) for p in pickle_data]
        arrs_awk[coll][feat] = ak.unflatten(ak.concatenate(arr), [len(a) for a in arr])
        arr = [np.array(p[coll][feat], dtype=dt) for p in pickle_data]
        arrs_flat[coll][feat] = ak.unflatten(ak.concatenate(arr), [len(a) for a in arr])

In [None]:
gen_pid = remap_pid(tts["pythia"]["gen_pdgid"], tts["pythia"]["gen_charge"])
cp_pid = remap_pid(tts["cp"]["caloparticle_pid"], tts["cp"]["caloparticle_charge"])

In [None]:
import particle
from particle import Particle
from particle import PDGID # https://github.com/scikit-hep/particle

import numba
from numba import njit
from numba.typed import Dict
from numba.core import types

@numba.njit
def get_charge_numba(pids, pid_to_charge_numbadict):
    ret = np.zeros(len(pids))
    for i in range(len(pids)):
        ret[i] = pid_to_charge_numbadict[pids[i]]
    return ret
    
def get_charge_array(pdgids):
    pids_uniq = np.unique(pdgids)
    pid_to_charge = {
        pid: Particle.from_pdgid(pid).charge for pid in pids_uniq
    }
    
    pid_to_charge_numbadict = Dict.empty(key_type=types.int64, value_type=types.float64)
    for pid, c in pid_to_charge.items():
        pid_to_charge_numbadict[pid] = c

    ret = get_charge_numba(pdgids, pid_to_charge_numbadict)
    return ak.Array(ret)

def remap_pid_gen(pdgids):
    pid = np.abs(np.asarray(ak.flatten(pdgids)))
    
    pa = ak.flatten(np.abs(pdgids))
    pc = np.abs(get_charge_array(ak.flatten(pdgids)))

    pid[(pa!=11) & (pa!=13) & (pa!=22) & (pc==1)] = 211
    pid[(pa!=11) & (pa!=13) & (pa!=22) & (pc==0)] = 130
    return ak.unflatten(pid, ak.count(pdgids, axis=1))

In [None]:
ytarget_pid = remap_pid_gen(arrs_awk["ytarget"]["pid"])

In [None]:
b = np.logspace(-1,3,41)
f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)
pa = np.abs(arrs_awk["ytarget"]["pid"])
for i, p in enumerate([211, 130, 22, 11, 13]):
    plt.sca(a0)
    h0 = plt.hist(ak.flatten(tts["pythia"]["gen_pt"][(
        (gen_pid==p)
        & (
            (tts["pythia"]["gen_status"]==1) 
        ) 
        & (tts["pythia"]["gen_pt"]>0.1)
        & (np.abs(tts["pythia"]["gen_eta"])<5))
        ]), bins=b, histtype="step", color=colors[i], ls="--", lw=2);
    msk_nohadron = ((pa!=511) & (pa!=513) & (pa!=521) & (pa!=531) & (pa!=311) & (pa!=411) & (pa!=421) & (pa!=2122) & (pa!=3122) & (pa!=4122) & (pa!=5122))
    h1 = plt.hist(ak.flatten(arrs_awk["ytarget"]["pt"][(
        ((ytarget_pid==p) & msk_nohadron) &
        (arrs_awk["ytarget"]["pt"]>0.1) &
        (np.abs(arrs_awk["ytarget"]["eta"])<5))
        ]), bins=b, histtype="step", color=colors[i], lw=2, label=str(p));
    r = h1[0]/h0[0]
    r[np.isnan(r)] = 0
    r[np.isinf(r)] = 0
    plt.sca(a1)
    plt.plot(midpoints(h0[1]), r, color=colors[i], lw=2)

plt.sca(a0)
plt.xscale("log")
plt.yscale("log")
plt.ylim(1, a0.get_ylim()[1]*100)
plt.legend(loc="best")
mplhep.cms.label("Preliminary", data=False, com=14, year='Run 3')
plt.ylabel("Count")
sample_label(a0, "cms_pf_qcd", x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)

plt.sca(a1)
plt.ylim(0,2)
plt.axhline(1.0, ls="--", color="black")
plt.xlabel("Particle $p_T$ (GeV)")
plt.ylabel("truth / target")

In [None]:
b = np.linspace(-5,5,101)

f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)
for i, p in enumerate([211, 130, 22, 11, 13]):
    plt.sca(a0)
    h0 = plt.hist(ak.flatten(tts["pythia"]["gen_eta"][(
        (gen_pid==p) &
        (tts["pythia"]["gen_status"]==1) & 
        (tts["pythia"]["gen_pt"]>1) &
        (np.abs(tts["pythia"]["gen_eta"])<5))
        ]), bins=b, histtype="step", color=colors[i], ls="--", lw=2);
    h1 = plt.hist(ak.flatten(tts["cp"]["caloparticle_eta"][(
        (cp_pid==p) &
        (tts["cp"]["caloparticle_pt"]>1) &
        (np.abs(tts["cp"]["caloparticle_eta"])<5))
        ]), bins=b, histtype="step", color=colors[i], lw=2, label=str(p));
    r = h1[0]/h0[0]
    r[np.isnan(r)] = 0
    r[np.isinf(r)] = 0
    plt.sca(a1)
    plt.plot(midpoints(h0[1]), r, color=colors[i], lw=2)

plt.sca(a0)
plt.yscale("log")
plt.ylim(1, a0.get_ylim()[1]*1000)
plt.legend(loc="best")
plt.ylabel("Count")

plt.sca(a1)
plt.ylim(0,2)
plt.axhline(1.0, ls="--", color="black")
plt.xlabel("Particle $\eta$")
plt.ylabel("truth / target")

In [None]:
msk_pythia = (tts["pythia"]["gen_status"]==1) & (tts["pythia"]["gen_pt"]>1) & (tts["pythia"]["gen_pdgid"]!=12) & (tts["pythia"]["gen_pdgid"]!=14) & (tts["pythia"]["gen_pdgid"]!=16)
pythia_p4 = ak.zip(
    {
        "pt": tts["pythia"]["gen_pt"][msk_pythia], 
        "eta": tts["pythia"]["gen_eta"][msk_pythia], 
        "phi": tts["pythia"]["gen_phi"][msk_pythia], 
        "energy": tts["pythia"]["gen_energy"][msk_pythia], 
    },
    with_name="Momentum4D",
)

msk_cp = (tts["cp"]["caloparticle_pt"]>1)
cp_p4 = ak.zip(
    {
        "pt": tts["cp"]["caloparticle_pt"][msk_cp], 
        "eta": tts["cp"]["caloparticle_eta"][msk_cp], 
        "phi": tts["cp"]["caloparticle_phi"][msk_cp], 
        "energy": tts["cp"]["caloparticle_energy"][msk_cp], 
    },
    with_name="Momentum4D",
)

In [None]:
gen_reliso = compute_relative_isolation(pythia_p4, dr=0.1)
gen_reliso = ak.unflatten(np.clip(ak.flatten(gen_reliso), 0, 2), ak.count(gen_reliso, axis=1))

cp_reliso = compute_relative_isolation(cp_p4, dr=0.1)
cp_reliso = ak.unflatten(np.clip(ak.flatten(cp_reliso), 0, 2), ak.count(cp_reliso, axis=1))

In [None]:
b = np.linspace(0,2,101)
plt.hist(ak.flatten(gen_reliso[np.abs(gen_pid[msk_pythia])==11]), bins=b, density=1, histtype="step", lw=2, label="ele");
plt.hist(ak.flatten(gen_reliso[np.abs(gen_pid[msk_pythia])==13]), bins=b, density=1, histtype="step", lw=2, label="mu");
plt.hist(ak.flatten(gen_reliso[np.abs(gen_pid[msk_pythia])==22]), bins=b, density=1, histtype="step", lw=2, label="photon");
plt.hist(ak.flatten(gen_reliso[np.abs(gen_pid[msk_pythia])==211]), bins=b, density=1, histtype="step", lw=2, label="ch.had");
plt.hist(ak.flatten(gen_reliso[np.abs(gen_pid[msk_pythia])==130]), bins=b, density=1, histtype="step", lw=2, label="n.had");
plt.yscale("log")
plt.legend(title="Pythia st=1, pt>1 GeV")
plt.xlabel("Relative isolation dR=0.1")

In [None]:
b = np.linspace(0,2,101)
plt.hist(ak.flatten(cp_reliso[np.abs(cp_pid[msk_cp])==11]), bins=b, density=1, histtype="step", lw=2, label="ele");
plt.hist(ak.flatten(cp_reliso[np.abs(cp_pid[msk_cp])==13]), bins=b, density=1, histtype="step", lw=2, label="mu");
plt.hist(ak.flatten(cp_reliso[np.abs(cp_pid[msk_cp])==22]), bins=b, density=1, histtype="step", lw=2, label="photon");
plt.hist(ak.flatten(cp_reliso[np.abs(cp_pid[msk_cp])==211]), bins=b, density=1, histtype="step", lw=2, label="ch.had");
plt.hist(ak.flatten(cp_reliso[np.abs(cp_pid[msk_cp])==130]), bins=b, density=1, histtype="step", lw=2, label="n.had");
plt.yscale("log")
plt.legend(title="CaloParticle st=1, pt>1 GeV")
plt.xlabel("Relative isolation dR=0.1")

In [None]:
msk_pi0 = (np.abs(tts["pythia"]["gen_status"])==1) & (np.abs(tts["pythia"]["gen_pdgid"])==211) & (tts["pythia"]["gen_pt"]>1)
pi0_eta = tts["pythia"]["gen_eta"][msk_pi0]
pi0_phi = tts["pythia"]["gen_phi"][msk_pi0]
pi0_pt = tts["pythia"]["gen_pt"][msk_pi0]

msk_cp = tts["cp"]["caloparticle_pt"]>2
cp_pt = tts["cp"]["caloparticle_pt"][msk_cp]
cp_eta = tts["cp"]["caloparticle_eta"][msk_cp]
cp_phi = tts["cp"]["caloparticle_phi"][msk_cp]
cp_pid = tts["cp"]["caloparticle_pid"][msk_cp]

In [None]:
ptcl_inds_1_ev, ptcl_inds_2_ev, best_drs_ev = match_particles(cp_eta, pi0_eta, cp_phi, pi0_phi, 0.02)

In [None]:
b = np.linspace(0,5,100)
plt.hist(
    ak.sum(pi0_pt[ptcl_inds_2_ev], axis=1)/
    ak.sum(tts["cp"]["caloparticle_pt"][msk_cp][ptcl_inds_1_ev], axis=1),
    bins=b
);
#plt.yscale("log")

In [None]:
pids, counts = np.unique(ak.flatten(np.abs(cp_pid[ptcl_inds_1_ev])), return_counts=True)
pids_counts = zip(pids, counts)
pids_sorted = sorted(pids_counts, key=lambda x: x[1], reverse=True)
print(pids_sorted)
b = np.linspace(-5,5,41)
for pid, count in pids_sorted[:6]:
    plt.hist(
        ak.flatten(cp_eta[ptcl_inds_1_ev][cp_pid[ptcl_inds_1_ev]==pid]),
        bins=b,
        weights=ak.flatten(cp_pt[ptcl_inds_1_ev][cp_pid[ptcl_inds_1_ev]==pid]),
        histtype="step", label=str(pid)
    )
plt.legend()

In [None]:
# pickle_data = sum(
#     [pickle.load(bz2.BZ2File(fn)) for fn in pklfiles],
#     [],
# )

In [None]:
# for i in range(len(pickle_data)):
#     for coll in ["ytarget", "ycand"]:
#         pickle_data[i][coll] = pandas.DataFrame(pickle_data[i][coll])
#         pickle_data[i][coll]["phi"] = np.arctan2(pickle_data[i][coll]["sin_phi"], pickle_data[i][coll]["cos_phi"])


# arrs_awk = {}

# for coll in ["ytarget", "ycand"]:
#     arrs_awk[coll] = {}
#     for feat in ["pid", "pt", "eta", "phi", "energy", "ispu"]:
#         arr = [np.array(p[coll][feat][p[coll]["pid"] != 0]) for p in pickle_data]
#         arrs_awk[coll][feat] = awkward.unflatten(awkward.concatenate(arr), [len(a) for a in arr])
        
# arrs_awk["pythia"] = {}
# arrs_awk["pythia"]["pid"] = awkward.from_regular([np.array(p["pythia"][:, 0]) for p in pickle_data])
# arrs_awk["pythia"]["pt"] = awkward.from_regular([np.array(p["pythia"][:, 1]) for p in pickle_data])
# arrs_awk["pythia"]["eta"] = awkward.from_regular([np.array(p["pythia"][:, 2]) for p in pickle_data])
# arrs_awk["pythia"]["phi"] = awkward.from_regular([np.array(p["pythia"][:, 3]) for p in pickle_data])
# arrs_awk["pythia"]["energy"] = awkward.from_regular([np.array(p["pythia"][:, 4]) for p in pickle_data])

In [None]:
particles_pythia = tts["pythia"]
particles_cp = tts["cp"]
particles_pf = tts["pf"]

#genjets are directly from CMSSW, have some cuts applied on the particles
#see e.g. https://github.com/cms-sw/cmssw/blob/master/RecoJets/Configuration/python/GenJetParticles_cff.py#L8
#ideally, we should apply the same cuts on the pythia particles in mask_pythia
genjets = vector.awk(awkward.Array(
    awkward.zip(
        {   
            "pt": tts["genjet"]["genjet_pt"],
            "eta": tts["genjet"]["genjet_eta"],
            "phi": tts["genjet"]["genjet_phi"],
            "energy": tts["genjet"]["genjet_energy"],
        }
    ), with_name="Mometum4D"
))
genmet = tts["genmet"]["genmet_pt"]

In [None]:
b = np.logspace(-3,3,100)
plt.figure(figsize=(5,5))

abs_pid = np.abs(particles_pythia["gen_pdgid"])
mask_pythia = (particles_pythia["gen_status"]==1) & (np.abs(particles_pythia["gen_eta"])<5)
mask_pythia_nonu = (particles_pythia["gen_status"]==1) & (np.abs(particles_pythia["gen_eta"])<5) & (abs_pid!=12) & (abs_pid!=14) & (abs_pid!=16)
mask_cp = np.abs(particles_cp["caloparticle_eta"])<5

plt.hist(awkward.flatten(particles_pythia[mask_pythia_nonu]["gen_pt"]), bins=b, label="Pythia", histtype="step")
plt.hist(awkward.flatten(particles_pf["pfcandidate_pt"]), bins=b, label="PF", histtype="step")
plt.hist(awkward.flatten(particles_cp[mask_cp]["caloparticle_pt"]), bins=b, label="CaloParticle", histtype="step")

plt.xscale("log")
plt.yscale("log")
plt.xlabel("Particle $p_T$ [GeV]")
plt.legend()

In [None]:
b = np.linspace(-5,5,41)
plt.figure(figsize=(5,5))

plt.hist(awkward.flatten(particles_pythia[mask_pythia_nonu]["gen_eta"]), bins=b, label="Pythia", histtype="step")
plt.hist(awkward.flatten(particles_cp[mask_cp]["caloparticle_eta"]), bins=b, label="CaloParticle", histtype="step")
plt.hist(awkward.flatten(particles_pf["pfcandidate_eta"]), bins=b, label="PF", histtype="step")

plt.xlabel("Particle $\eta$")
plt.legend()

In [None]:
def remap_pid(pdgid, charge):
    pid = np.abs(np.asarray(ak.flatten(pdgid)))
    pa = ak.flatten(np.abs(pdgid))
    pc = ak.flatten(np.abs(charge))
    pid[(pa!=11) & (pa!=13) & (pa!=22) & (pc==1)] = 211
    pid[(pa!=11) & (pa!=13) & (pa!=22) & (pc==0)] = 130
    return ak.unflatten(pid, ak.count(pdgid, axis=1))

#map HF to neutral hadron for consistency with gen
def remap_pid_pf(pdgid):
    pid = np.abs(np.asarray(ak.flatten(pdgid)))
    pa = ak.flatten(np.abs(pdgid))
    pid[pa==1] = 130
    pid[pa==2] = 130
    return ak.unflatten(pid, ak.count(pdgid, axis=1))

In [None]:
caloparticle_pid_remapped = remap_pid(particles_cp["caloparticle_pid"], particles_cp["caloparticle_charge"])
gen_pid_remapped = remap_pid(particles_pythia["gen_pdgid"], particles_pythia["gen_charge"])
pf_pid_remapped = remap_pid_pf(particles_pf["pfcandidate_pdgid"])

In [None]:
# b = np.logspace(-3,4,100)

# pid1 = np.abs(particles_pythia["gen_pdgid"])
# pid2 = np.abs(particles_cp["caloparticle_pid"])
# pid3 = np.abs(particles_pf["pfcandidate_pdgid"])

# mask_pythia_nonu = (particles_pythia["gen_status"]==1) & (np.abs(particles_pythia["gen_eta"])<5) & (abs_pid!=12) & (abs_pid!=14) & (abs_pid!=16)
# mask_cp = np.abs(particles_cp["caloparticle_eta"])<5

# uniq_pid = np.unique(awkward.flatten(pid2[mask_pythia_nonu]))
# fig, axs = plt.subplots(4,4, figsize=(20,20))

# iax = 0
# axs = axs.flatten()

# for pid in uniq_pid:
#     if (np.sum(pid1==pid)>0) & (np.sum(pid2==pid)>0):
#         plt.sca(axs[iax])
#         plt.hist(awkward.flatten(particles_pythia[mask_pythia_nonu & (pid1==pid)]["gen_pt"]), bins=b, label="Pythia", histtype="step")
#         plt.hist(awkward.flatten(particles_cp[mask_cp & (pid2==pid)]["caloparticle_pt"]), bins=b, label="CaloParticle", histtype="step")
#         plt.hist(awkward.flatten(particles_pf[(pid3==pid)]["pfcandidate_pt"]), bins=b, label="PF", histtype="step")
        
#         plt.xscale("log")
#         plt.yscale("log")
#         plt.xlabel("Particle $p_T$ [GeV]")
#         plt.legend(fontsize=8)
#         plt.title("pdgid {}".format(pid))
#         iax += 1
# plt.tight_layout()
# plt.savefig("TTbar_noPU_sim_particle_pt_pdgid.pdf", bbox_inches="tight")

In [None]:
b = np.logspace(-3,4,100)

pid1 = np.abs(gen_pid_remapped)
pid2 = np.abs(caloparticle_pid_remapped)
pid3 = np.abs(pf_pid_remapped)

mask_pythia_nonu = (particles_pythia["gen_status"]==1) & (np.abs(particles_pythia["gen_eta"])<5) & (abs_pid!=12) & (abs_pid!=14) & (abs_pid!=16)
mask_cp = np.abs(particles_cp["caloparticle_eta"])<5

uniq_pid = np.unique(awkward.flatten(pid1[mask_pythia_nonu]))
fig, axs = plt.subplots(3,2, figsize=(2*5,3*5))

iax = 0
axs = axs.flatten()

for pid in uniq_pid:
    if (np.sum(pid1==pid)>0) & (np.sum(pid2==pid)>0):
        plt.sca(axs[iax])
        plt.hist(awkward.flatten(particles_pythia[mask_pythia_nonu & (pid1==pid)]["gen_pt"]), bins=b, label="Pythia", histtype="step")
        plt.hist(awkward.flatten(particles_cp[mask_cp & (pid2==pid)]["caloparticle_pt"]), bins=b, label="CaloParticle", histtype="step")
        plt.hist(awkward.flatten(particles_pf[(pid3==pid)]["pfcandidate_pt"]), bins=b, label="PF", histtype="step")
        
        plt.xscale("log")
        plt.yscale("log")
        plt.xlabel("Particle $p_T$ [GeV]")
        plt.legend(fontsize=8)
        plt.title("pid {}".format(pid))

        iax += 1
axs[-1].set_axis_off()
plt.tight_layout()
plt.savefig("TTbar_noPU_sim_particle_pt_pid.pdf", bbox_inches="tight")

In [None]:
b = np.linspace(0,200,200)
plt.hist(awkward.flatten(particles_pythia[mask_pythia_nonu & (np.abs(particles_pythia["gen_pdgid"])==211)]["gen_pt"]), bins=b, label="Pythia", histtype="step");
plt.hist(awkward.flatten(particles_cp[(np.abs(particles_cp["caloparticle_pid"])==211)]["caloparticle_pt"]), bins=b, label="CaloParticle", histtype="step")
plt.yscale("log")

In [None]:
b = np.linspace(0,200,200)
plt.hist(awkward.flatten(particles_pythia[mask_pythia_nonu & (np.abs(particles_pythia["gen_pdgid"])==13)]["gen_pt"]), bins=b, label="Pythia", histtype="step");
plt.hist(awkward.flatten(particles_cp[(np.abs(particles_cp["caloparticle_pid"])==13)]["caloparticle_pt"]), bins=b, label="CaloParticle", histtype="step")
plt.yscale("log")

In [None]:
b = np.linspace(-5,5,61)

fig, axs = plt.subplots(3,2, figsize=(2*5,3*5))

iax = 0
axs = axs.flatten()

for pid in uniq_pid:
    if (np.sum(pid1==pid)>0) & (np.sum(pid2==pid)>0):
        plt.sca(axs[iax])
        plt.hist(awkward.flatten(particles_pythia[mask_pythia_nonu & (pid1==pid)]["gen_eta"]), bins=b, label="Pythia", histtype="step")
        plt.hist(awkward.flatten(particles_cp[mask_cp & (pid2==pid)]["caloparticle_eta"]), bins=b, label="CaloParticle", histtype="step")
        plt.hist(awkward.flatten(particles_pf[(pid3==pid)]["pfcandidate_eta"]), bins=b, label="PF", histtype="step")
        
        plt.xlabel("Particle $\eta$")
        plt.legend(fontsize=8)
        plt.title("pid {}".format(pid))
        plt.gca().ticklabel_format(axis="both", style="sci", scilimits=(0,0))
        iax += 1
axs[-1].set_axis_off()
plt.tight_layout()
plt.savefig("TTbar_noPU_sim_particle_eta_pid.pdf", bbox_inches="tight")

In [None]:
#Sum pT in event
b = np.linspace(0,1000,100)
fig, axs = plt.subplots(1,2, figsize=(10,5))


plt.sca(axs[0])

plt.hist2d(
    awkward.to_numpy(awkward.sum(particles_pythia[mask_pythia_nonu]["gen_pt"], axis=1)),
    awkward.to_numpy(awkward.sum(particles_cp[mask_cp]["caloparticle_pt"], axis=1)),
    bins=(b,b), cmap="hot_r", #norm=matplotlib.colors.LogNorm()
)
plt.plot([0,2000],[0,2000], color="black", lw=0.5)
plt.xlabel("Pythia, no nu")
plt.ylabel("CaloParticle")

plt.sca(axs[1])

#it seems like caloparticles and p
pythia_ptcut = ((particles_pythia["gen_pdgid"]==22) & (particles_pythia["gen_pt"]>0.3)) | (particles_pythia["gen_pdgid"]!=22)

plt.title("$p_T>0.3$ GeV")
plt.hist2d(
    awkward.to_numpy(awkward.sum(particles_pythia[mask_pythia_nonu & pythia_ptcut]["gen_pt"], axis=1)),
    awkward.to_numpy(awkward.sum(particles_cp[mask_cp]["caloparticle_pt"], axis=1)),
    bins=(b,b), cmap="hot_r", #norm=matplotlib.colors.LogNorm()
)
plt.plot([0,2000],[0,2000], color="black", lw=0.5)
plt.xlabel("Pythia, no nu")
plt.ylabel("CaloParticle")
plt.tight_layout()
plt.suptitle("sum $p_T$")

## Jets

In [None]:
jets_coll = {}
jets_coll["genjet"] = genjets
jet_constituents = {}

vec = vector.awk(
    awkward.zip(
        {   
            "pt": particles_pythia[mask_pythia_nonu]["gen_pt"],
            "eta": particles_pythia[mask_pythia_nonu]["gen_eta"],
            "phi": particles_pythia[mask_pythia_nonu]["gen_phi"],
            "energy": particles_pythia[mask_pythia_nonu]["gen_energy"],
        }
    )
)
jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)
cluster = fastjet.ClusterSequence(vec.to_xyzt(), jetdef)
jets_coll["pythia_nonu"] = cluster.inclusive_jets(min_pt=3)
jet_constituents["pythia_nonu"] = cluster.constituent_index(min_pt=3)

vec = vector.awk(
    awkward.zip(
        {   
            "pt": particles_pythia[mask_pythia_nonu & pythia_ptcut]["gen_pt"],
            "eta": particles_pythia[mask_pythia_nonu & pythia_ptcut]["gen_eta"],
            "phi": particles_pythia[mask_pythia_nonu & pythia_ptcut]["gen_phi"],
            "energy": particles_pythia[mask_pythia_nonu & pythia_ptcut]["gen_energy"],
        }
    )
)
jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)
cluster = fastjet.ClusterSequence(vec.to_xyzt(), jetdef)
jets_coll["pythia_nonu_ptcut"] = cluster.inclusive_jets(min_pt=3)
jet_constituents["pythia_nonu_ptcut"] = cluster.constituent_index(min_pt=3)

vec = vector.awk(
    awkward.zip(
        {   
            "pt": particles_pythia[mask_pythia]["gen_pt"],
            "eta": particles_pythia[mask_pythia]["gen_eta"],
            "phi": particles_pythia[mask_pythia]["gen_phi"],
            "energy": particles_pythia[mask_pythia]["gen_energy"],
        }
    )
)
jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)
cluster = fastjet.ClusterSequence(vec.to_xyzt(), jetdef)
jets_coll["pythia"] = cluster.inclusive_jets(min_pt=3)

vec = vector.awk(
    awkward.zip(
        {   
            "pt": particles_cp[mask_cp]["caloparticle_pt"],
            "eta": particles_cp[mask_cp]["caloparticle_eta"],
            "phi": particles_cp[mask_cp]["caloparticle_phi"],
            "energy": particles_cp[mask_cp]["caloparticle_energy"],
        }
    )
)
jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)
cluster = fastjet.ClusterSequence(vec.to_xyzt(), jetdef)
jets_coll["cp"] = cluster.inclusive_jets(min_pt=3)
jet_constituents["cp"] = cluster.constituent_index(min_pt=3)


vec = vector.awk(
    awkward.zip(
        {   
            "pt": particles_pf["pfcandidate_pt"],
            "eta": particles_pf["pfcandidate_eta"],
            "phi": particles_pf["pfcandidate_phi"],
            "energy": particles_pf["pfcandidate_energy"],
        }
    )
)
jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)
cluster = fastjet.ClusterSequence(vec.to_xyzt(), jetdef)
jets_coll["pf"] = cluster.inclusive_jets(min_pt=3)
jet_constituents["pf"] = cluster.constituent_index(min_pt=3)

genjet_to_pythia = jet_utils.match_two_jet_collections(jets_coll, "genjet", "pythia", 0.1)
genjet_to_pythia_nonu_ptcut = jet_utils.match_two_jet_collections(jets_coll, "genjet", "pythia_nonu_ptcut", 0.1)
genjet_to_cp = jet_utils.match_two_jet_collections(jets_coll, "genjet", "cp", 0.1)
genjet_to_pf = jet_utils.match_two_jet_collections(jets_coll, "genjet", "pf", 0.1)

pythia_to_cp = jet_utils.match_two_jet_collections(jets_coll, "pythia", "cp", 0.1)
pythia_nonu_to_cp = jet_utils.match_two_jet_collections(jets_coll, "pythia_nonu", "cp", 0.1)
pythia_nonu_ptcut_to_cp = jet_utils.match_two_jet_collections(jets_coll, "pythia_nonu_ptcut", "cp", 0.1)

In [None]:
b = np.logspace(0,3,200)
plt.hist(awkward.flatten(jets_coll["genjet"].pt), bins=b, histtype="step", lw=1, label="genjet")
plt.hist(awkward.flatten(jets_coll["cp"].pt), bins=b, histtype="step", lw=1, label="CaloParticle")
plt.hist(awkward.flatten(jets_coll["pythia"].pt), bins=b, histtype="step", lw=1, label="Pythia")
plt.hist(awkward.flatten(jets_coll["pythia_nonu"].pt), bins=b, histtype="step", lw=1, label="Pythia, NoNu")
plt.hist(awkward.flatten(jets_coll["pythia_nonu_ptcut"].pt), bins=b, histtype="step", lw=1, label="Pythia, NoNu, pt>0.3 GeV")
plt.xscale("log")
plt.yscale("log")
plt.xlabel("jet $p_T$ [GeV]")
plt.ylabel("number of jets")
plt.legend()

In [None]:
plt.figure(figsize=(5,5))
b = np.logspace(-1,1,600)

# plt.hist(
#     awkward.flatten(
#         (jets_coll["pythia"][genjet_to_pythia["pythia"]].pt / jets_coll["genjet"][genjet_to_pythia["genjet"]].pt)
#     ), bins=b, histtype="step", lw=1, label="Pythia"
# );

plt.hist(
    awkward.flatten(
        (jets_coll["cp"][genjet_to_cp["cp"]].pt / jets_coll["genjet"][genjet_to_cp["genjet"]].pt)
    ), bins=b, histtype="step", lw=1, label="CaloParticle"
);

plt.hist(
    awkward.flatten(
        (jets_coll["pf"][genjet_to_pf["pf"]].pt / jets_coll["genjet"][genjet_to_pf["genjet"]].pt)
    ), bins=b, histtype="step", lw=1, label="PF"
);

plt.xscale("log")
plt.yscale("log")
plt.xlabel("jet $p_T$ / GenJet jet $p_T$")
plt.legend(loc=2, fontsize=12)
plt.axvline(1.0, color="black", ls="--", lw=0.5)

In [None]:
plt.figure(figsize=(5,5))
b = np.linspace(0,2,600)

# plt.hist(
#     awkward.flatten(
#         (jets_coll["pythia"][genjet_to_pythia["pythia"]].pt / jets_coll["genjet"][genjet_to_pythia["genjet"]].pt)
#     ), bins=b, histtype="step", lw=1, label="Pythia"
# );

plt.hist(
    awkward.flatten(
        (jets_coll["cp"][genjet_to_cp["cp"]].pt / jets_coll["genjet"][genjet_to_cp["genjet"]].pt)
    ), bins=b, histtype="step", lw=1, label="CaloParticle"
);

plt.hist(
    awkward.flatten(
        (jets_coll["pf"][genjet_to_pf["pf"]].pt / jets_coll["genjet"][genjet_to_pf["genjet"]].pt)
    ), bins=b, histtype="step", lw=1, label="PF"
);

#plt.xscale("log")
#plt.yscale("log")
plt.xlabel("jet $p_T$ / GenJet jet $p_T$")
plt.legend(loc=2, fontsize=12)
plt.axvline(1.0, color="black", ls="--", lw=0.5)

## MET

In [None]:
def met(pt, phi):
    px = pt * np.cos(phi)
    py = pt * np.sin(phi)
    pt = np.sqrt(awkward.sum(px, axis=1)**2 + awkward.sum(py, axis=1)**2)
    return pt

met_pythia_nonu = met(particles_pythia[mask_pythia_nonu]["gen_pt"], particles_pythia[mask_pythia_nonu]["gen_phi"])
met_pythia_nonu_ptcut = met(particles_pythia[mask_pythia_nonu & pythia_ptcut]["gen_pt"], particles_pythia[mask_pythia_nonu & pythia_ptcut]["gen_phi"])
met_cp = met(particles_cp[mask_cp]["caloparticle_pt"], particles_cp[mask_cp]["caloparticle_phi"])
met_pf = met(particles_pf["pfcandidate_pt"], particles_pf["pfcandidate_phi"])

In [None]:
b = np.linspace(0,200,101)
plt.figure(figsize=(5,5))
plt.hist(genmet, bins=b, histtype="step", label="genMetTrue")
plt.hist(met_pythia_nonu, bins=b, histtype="step", label="Pythia, no nu")
plt.hist(met_cp, bins=b, histtype="step", label="CaloParticle")
plt.hist(met_pf, bins=b, histtype="step", label="PF")
#plt.xscale("log")
plt.yscale("log")
plt.legend(loc=1, fontsize=12)
plt.xlabel("MET [GeV]")

In [None]:
b = np.linspace(0,5,101)
plt.figure(figsize=(5,5))
plt.plot([], [])
plt.hist((met_pythia_nonu/genmet[:, 0])[genmet[:, 0]>1], bins=b, histtype="step", label="Pythia, no nu")
plt.hist((met_cp/genmet[:, 0])[genmet[:, 0]>1], bins=b, histtype="step", label="CaloParticle")
plt.hist((met_pf/genmet[:, 0])[genmet[:, 0]>1], bins=b, histtype="step", label="PF")
#plt.xscale("log")
#plt.yscale("log")
plt.legend(loc=1, fontsize=12)
plt.title("genMET>1")
plt.xlabel("MET / genMET")

In [None]:
plt.figure(figsize=(5,5))
fig, axs = plt.subplots(1,2, figsize=(10,5))

b = np.logspace(-2,3,100)
plt.sca(axs[0])
plt.hist2d(ak.to_numpy(genmet[:, 0]), ak.to_numpy(met_pythia_nonu), bins=(b,b), cmap="Blues")
plt.xlabel("genMetTrue")
plt.ylabel("Pythia MET")
plt.xscale("log")
plt.yscale("log")

plt.sca(axs[1])
plt.hist2d(ak.to_numpy(met_pythia_nonu), ak.to_numpy(met_cp), bins=(b,b), cmap="Blues")
plt.xlabel("Pythia MET")
plt.ylabel("CaloParticle MET")
plt.xscale("log")
plt.yscale("log")

plt.tight_layout()