In [None]:
import matplotlib.pyplot as plt
import uproot
import awkward as ak
import vector
import numpy as np
vector.register_awkward()

In [None]:
def find_pf_cones(data, target_pdgId, min_gen_pt, max_gen_pt, delta_r_cut, pf_prefix="PFCands"):
    """
    Finds PF candidates within a deltaR cone around selected Gen candidates
    using direct broadcasting for deltaR calculation. Concise version.

    Args:
        data (ak.Array): Awkward array with Gen and PF branches.
        target_pdgId (int): PDG ID of Gen candidates to select.
        min_gen_pt (float): Minimum pt of Gen candidates to select.
        delta_r_cut (float): Maximum deltaR for matching PF candidates.
        pf_prefix (str): Prefix for PF candidate branches (e.g., "PFCands").

    Returns:
        ak.Array: Jagged array of {gen: Momentum4D, pfs: [Momentum4D]} records.
                  Includes only Gen candidates with at least one associated PF candidate.
    """
    #1. Create Lorentz vectors
    gen_vecs = ak.zip(
        { "pt": data["GenCands_pt"], "eta": data["GenCands_eta"], "phi": data["GenCands_phi"], "mass": data["GenCands_mass"], "pdgId": data["GenCands_pdgId"], },
        with_name="Momentum4D",
    )
    pf_vecs = ak.zip(
        { "pt": data[f"{pf_prefix}_pt"], "eta": data[f"{pf_prefix}_eta"], "phi": data[f"{pf_prefix}_phi"], "mass": data[f"{pf_prefix}_mass"], "pdgId": data[f"{pf_prefix}_pdgId"], },
        with_name="Momentum4D",
    )
    
    # 2. Filter Gen candidates
    gen_mask = (gen_vecs.pdgId == target_pdgId) & (gen_vecs.pt >= min_gen_pt) & (gen_vecs.pt < max_gen_pt)
    selected_gen_vecs = gen_vecs[gen_mask]
    
    # 3. Create and apply event mask (only process events with potential matches)
    event_mask = (ak.num(selected_gen_vecs, axis=1) > 0) & (ak.num(pf_vecs, axis=1) > 0)
    selected_gen_vecs_filt = selected_gen_vecs[event_mask]
    pf_vecs_filt = pf_vecs[event_mask]
    
    gen_list = []
    pf_list = []
    for iev in range(len(selected_gen_vecs_filt)):
        for igen in range(len(selected_gen_vecs_filt[iev])):
            gen_list.append(selected_gen_vecs_filt[iev][igen])
            deltar_gen_to_pfs = selected_gen_vecs_filt[iev][igen].deltaR(pf_vecs_filt[iev])
            cone_mask = deltar_gen_to_pfs < delta_r_cut
            pf_list.append(pf_vecs_filt[iev][cone_mask])
    
    
    gen_list = ak.Array(gen_list)
    pf_list = ak.Array(pf_list)

    # 9. Combine the original selected/filtered Gen candidates with their associated PF lists
    result_all_gens = ak.Array({
        "gen": gen_list,
        "pfs": pf_list
    })

    return result_all_gens

In [None]:
!ls /local/joosep/mlpf/results/cms/CMSSW_14_1_0_74d149_btvnano/

In [None]:
ret_pfs = []
ret_mlpfs = []
for ifile in range(1,7):
    ev_pf = uproot.open("/local/joosep/mlpf/results/cms/CMSSW_14_1_0_74d149_btvnano/TTbar_noPU_pf/step3_NANO_btv_{}.root".format(ifile)).get("Events")
    ev_mlpf = uproot.open("/local/joosep/mlpf/results/cms/CMSSW_14_1_0_74d149_btvnano/TTbar_noPU_mlpf/step3_NANO_btv_{}.root".format(ifile)).get("Events")
    data_pf = ev_pf.arrays([
        "GenCands_pt", "GenCands_eta", "GenCands_phi", "GenCands_mass", "GenCands_pdgId",
        "PFCands_pt", "PFCands_eta", "PFCands_phi", "PFCands_mass", "PFCands_pdgId",
    ])
    
    data_mlpf = ev_mlpf.arrays([
        "GenCands_pt", "GenCands_eta", "GenCands_phi", "GenCands_mass", "GenCands_pdgId",
        "PFCands_pt", "PFCands_eta", "PFCands_phi", "PFCands_mass", "PFCands_pdgId",
    ])

    #pick particles around gen neutral hadrons, 20<pt/GeV<10000
    #get PF, MLPF particles in a cone around dR<0.02 around each gen particle
    ret_pfs.append(find_pf_cones(data_pf, 13, 20, 10000, 0.02))
    ret_mlpfs.append(find_pf_cones(data_mlpf, 13, 20, 10000, 0.02))

ret_pfs = ak.concatenate(ret_pfs)
ret_mlpfs = ak.concatenate(ret_mlpfs)

In [None]:
ret_pfs.gen.pt[:8]

In [None]:
ret_pfs.pfs.pt[:8]

In [None]:
ret_pfs.pfs.pdgId[:8]

In [None]:
b = np.linspace(0,4,100)
plt.hist(
    ak.sum(ret_pfs.pfs.pt[ret_pfs.pfs.pdgId==130], axis=1)/ret_pfs.gen.pt,
    bins=b, histtype="step", label="130"
);

plt.hist(
    ak.sum(ret_pfs.pfs.pt[ret_pfs.pfs.pdgId==22], axis=1)/ret_pfs.gen.pt,
    bins=b, histtype="step", label="22"
);

plt.hist(
    ak.sum(ret_pfs.pfs.pt[ret_pfs.pfs.pdgId==211], axis=1)/ret_pfs.gen.pt,
    bins=b, histtype="step", label="211"
);

plt.hist(
    ak.sum(ret_pfs.pfs.pt[ret_pfs.pfs.pdgId==13], axis=1)/ret_pfs.gen.pt,
    bins=b, histtype="step", label="13"
);

plt.legend()
plt.yscale("log")

In [None]:
plt.hist(
    ak.sum(ret_pfs.pfs.pt, axis=1)/ret_pfs.gen.pt,
    bins=b, histtype="step"
);

plt.hist(
    ak.sum(ret_mlpfs.pfs.pt, axis=1)/ret_mlpfs.gen.pt,
    bins=b, histtype="step"
);
plt.yscale("log")