In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import awkward
import vector
import fastjet
import numba
import particle
import bz2
import glob

In [None]:
@numba.njit
def deltaphi(phi1, phi2):
    diff = phi1 - phi2
    return np.arctan2(np.sin(diff), np.cos(diff))

@numba.njit
def deltar(eta1, phi1, eta2, phi2):
    deta = eta1 - eta2
    dphi = deltaphi(phi1, phi2)
    return np.sqrt(deta**2 + dphi**2)

@numba.njit
def match_jets(jets1, jets2, deltaR_cut):
    iev = len(jets1)
    jet_inds_1_ev = []
    jet_inds_2_ev = []
    for ev in range(iev):
        j1 = jets1[ev]
        j2 = jets2[ev]

        jet_inds_1 = []
        jet_inds_2 = []
        for ij1 in range(len(j1)):
            drs = np.zeros(len(j2), dtype=np.float64)
            for ij2 in range(len(j2)):
                eta1 = j1.eta[ij1]
                eta2 = j2.eta[ij2]
                phi1 = j1.phi[ij1]
                phi2 = j2.phi[ij2]

                # Workaround for https://github.com/scikit-hep/vector/issues/303
                # dr = j1[ij1].deltaR(j2[ij2])
                dr = deltar(eta1, phi1, eta2, phi2)
                drs[ij2] = dr
            if len(drs) > 0:
                min_idx_dr = np.argmin(drs)
                if drs[min_idx_dr] < deltaR_cut:
                    jet_inds_1.append(ij1)
                    jet_inds_2.append(min_idx_dr)
        jet_inds_1_ev.append(jet_inds_1)
        jet_inds_2_ev.append(jet_inds_2)
    return jet_inds_1_ev, jet_inds_2_ev

def map_pdgid_to_candid(pdgid, charge):
    if pdgid in [22, 11, 13]:
        return pdgid

    # charged hadron
    if abs(charge) > 0:
        return 211

    # neutral hadron
    return 130

In [None]:
jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)

In [None]:
data = []
for fn in glob.glob("/media/joosep/data/cms/v3_3/nopu/TTbar_14TeV_TuneCUETP8M1_cfi/raw/pfntuple_*.pkl.bz2"):
    this_data = pickle.load(bz2.BZ2File(fn, "rb"))
    # this_data = pickle.load(open(fn, "rb"))
    data.append(this_data)
data = sum(data, [])
print(len(data))

sample_title = "ttbar_noPU"
#sample_title = "ttbar_PU"
#sample_title = "QCD_PU"
#sample_title = "SingleEle"
#sample_title = "SingleGamma"
#sample_title = "SingleNeutron"
#sample_title = "SinglePi0"
#sample_title = "SinglePiMinus"
#sample_title = "SingleProtonMinus"

In [None]:
def to_awk(label):
    dd_mlpf = awkward.from_iter([data[i][label][data[i][label]["typ"]!=0][["typ", "pt", "eta", "sin_phi", "cos_phi", "e", "ispu"]] for i in range(len(data))])
    p4 = vector.awk(awkward.zip(
        {
            "pt": dd_mlpf[:, :, 1],
            "eta": dd_mlpf[:, :, 2],
            "phi": np.arctan2(dd_mlpf[:, :, 3], dd_mlpf[:, :, 4]),
            "energy": dd_mlpf[:, :, 5],
        }
    ))
    
    other = awkward.zip({
        "type": np.abs(dd_mlpf[:, :, 0]),
        "ispu": np.abs(dd_mlpf[:, :, 6]),
    })
    return p4, other


def jet_to_awk(jets):
    return vector.awk(
        awkward.zip(
            {
                "pt": jets.pt,
                "eta": jets.eta,
                "phi": jets.phi,
                "energy": jets.energy,
            }
        )
    )

In [None]:
gen_mlpf_p4, gen_mlpf = to_awk("ygen")
px = awkward.sum(gen_mlpf_p4.pt*np.cos(gen_mlpf_p4.phi), axis=1)
py = awkward.sum(gen_mlpf_p4.pt*np.sin(gen_mlpf_p4.phi), axis=1)
gen_mlpf_met = np.sqrt(px**2 + py**2)

px = awkward.sum((gen_mlpf_p4.pt*np.cos(gen_mlpf_p4.phi))[gen_mlpf["ispu"]<1], axis=1)
py = awkward.sum((gen_mlpf_p4.pt*np.sin(gen_mlpf_p4.phi))[gen_mlpf["ispu"]<1], axis=1)
gen_mlpf_met_nopu = np.sqrt(px**2 + py**2)

cluster = fastjet.ClusterSequence(gen_mlpf_p4.to_xyzt(), jetdef)
gen_mlpf_jets = jet_to_awk(cluster.inclusive_jets(min_pt=10.0))

cluster = fastjet.ClusterSequence(gen_mlpf_p4.to_xyzt()[gen_mlpf["ispu"]<1], jetdef)
gen_mlpf_jets_nopu = jet_to_awk(cluster.inclusive_jets(min_pt=10.0))

In [None]:
plt.figure(figsize=(5,5))
plt.hist(awkward.flatten(gen_mlpf["ispu"]), bins=np.linspace(0,1,100));
plt.yscale('log')
plt.xlabel("MLPF truth ispu")

In [None]:
cand_pf_p4, cand_pf = to_awk("ycand")
px = awkward.sum(cand_pf_p4.pt*np.cos(cand_pf_p4.phi), axis=1)
py = awkward.sum(cand_pf_p4.pt*np.sin(cand_pf_p4.phi), axis=1)
cand_pf_met = np.sqrt(px**2 + py**2)

cluster = fastjet.ClusterSequence(cand_pf_p4.to_xyzt(), jetdef)
cand_pf_jets = jet_to_awk(cluster.inclusive_jets(min_pt=10.0))

In [None]:
dd_gen = awkward.from_iter([data[i]["pythia"] for i in range(len(data))])
gen_pythia_p4 = vector.awk(awkward.zip({"pt": dd_gen[:, :, 1], "eta": dd_gen[:, :, 2], "phi": dd_gen[:, :, 3], "energy": dd_gen[:, :, 4]}))
    
gen_pythia = awkward.zip({"type": np.abs(dd_gen[:, :, 0])})
msk = (gen_pythia["type"]!=12) & (gen_pythia["type"]!=14) & (gen_pythia["type"]!=16)# & (np.abs(gen_pythia_p4.eta)<5)

gen_pythia_p4 = gen_pythia_p4[msk]
gen_pythia_p4_abseta5 = gen_pythia_p4[(np.abs(gen_pythia_p4.eta)<5) & (gen_pythia_p4.pt>1)]
gen_pythia = gen_pythia[msk]

all_pdgs = []
for ev in gen_pythia["type"]:
    pdgs = [particle.PDGID(p) for p in ev]
    pdgs = [map_pdgid_to_candid(int(p), p.charge) for p in pdgs]
    all_pdgs.append(pdgs)
gen_pythia = awkward.zip({"type": all_pdgs})

px = awkward.sum(gen_pythia_p4.pt*np.cos(gen_pythia_p4.phi), axis=1)
py = awkward.sum(gen_pythia_p4.pt*np.sin(gen_pythia_p4.phi), axis=1)
gen_pythia_met = np.sqrt(px**2 + py**2)

gen_cmssw_met = np.array([data[i]["genmet"][0,0] for i in range(len(data))])

cluster = fastjet.ClusterSequence(gen_pythia_p4.to_xyzt(), jetdef)
gen_pythia_jets = jet_to_awk(cluster.inclusive_jets(min_pt=10.0))

In [None]:
gen_cmssw_jet = awkward.from_iter([data[i]["genjet"] for i in range(len(data))])

gen_cmssw_jet = gen_cmssw_jet[gen_cmssw_jet[:, :, 0]>10]
gen_cmssw_jet = vector.awk(awkward.zip(
    {
        "pt": gen_cmssw_jet[:, :, 0],
        "eta": gen_cmssw_jet[:, :, 1],
        "phi": gen_cmssw_jet[:, :, 2],
        "energy": gen_cmssw_jet[:, :, 3],
    }
))

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(gen_cmssw_met, gen_pythia_met)

In [None]:
plt.figure(figsize=(5,5))
b = np.logspace(-1, 3, 101)
plt.hist(gen_cmssw_met, bins=b, histtype="step", lw=1, label="CMSSW genMetTrue");
plt.hist(gen_pythia_met, bins=b, histtype="step", lw=1, label="Pythia");
plt.hist(gen_mlpf_met, bins=b, histtype="step", lw=1, label="MLPF truth");
plt.hist(gen_mlpf_met_nopu, bins=b, histtype="step", lw=1, label="MLPF truth, no PU");
plt.hist(cand_pf_met, bins=b, histtype="step", lw=1, label="PF reco");
plt.yscale("log")
plt.xscale("log")
plt.legend()
plt.xlabel("MET")
plt.title(sample_title)
plt.savefig(sample_title + "_met.pdf")
plt.savefig(sample_title + "_met.png")

In [None]:
plt.figure(figsize=(5,5))
b = np.linspace(0, 200, 101)
plt.hist(gen_cmssw_met, bins=b, histtype="step", lw=1, label="CMSSW genMetTrue");
plt.hist(gen_pythia_met, bins=b, histtype="step", lw=1, label="Pythia");
plt.hist(gen_mlpf_met, bins=b, histtype="step", lw=1, label="MLPF truth");
plt.hist(gen_mlpf_met_nopu, bins=b, histtype="step", lw=1, label="MLPF truth, no PU");
plt.hist(cand_pf_met, bins=b, histtype="step", lw=1, label="PF reco");
#plt.yscale("log")
#plt.xscale("log")
plt.legend()
plt.xlabel("MET")
plt.title(sample_title)

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(gen_cmssw_met, gen_mlpf_met, label="MLPF truth", marker=".", alpha=0.5)
plt.scatter(gen_cmssw_met, gen_mlpf_met_nopu, label="MLPF truth, no PU", marker=".", alpha=0.5)
plt.xlabel("genMetTrue")
plt.ylabel("MLPF truth MET")
plt.legend(loc="best")
plt.title(sample_title)
plt.savefig(sample_title + "_met_corr.pdf")
plt.savefig(sample_title + "_met_corr.png")

In [None]:
plt.figure(figsize=(5,5))
b = np.logspace(-1,1,100)
plt.plot([], [])
msk = gen_cmssw_met>5
plt.hist((gen_pythia_met/gen_cmssw_met)[msk], bins=b, histtype="step", lw=1, label="Pythia");
plt.hist((gen_mlpf_met/gen_cmssw_met)[msk], bins=b, histtype="step", lw=1, label="MLPF truth");
plt.hist((gen_mlpf_met_nopu/gen_cmssw_met)[msk], bins=b, histtype="step", lw=1, label="MLPF truth, no PU");
plt.hist((cand_pf_met/gen_cmssw_met)[msk], bins=b, histtype="step", lw=1, label="CMSSW, reco PF");
plt.legend()
plt.yscale("log")
plt.xscale("log")
plt.xlabel("MET / CMSSW genMetTrue")
plt.title(sample_title + ", genMetTrue>5")
plt.savefig(sample_title + "_met_ratio.pdf")
plt.savefig(sample_title + "_met_ratio.png")

In [None]:
plt.figure(figsize=(5,5))
b = np.linspace(0,200,100)
plt.hist(awkward.flatten(gen_cmssw_jet.pt), bins=b, histtype="step", lw=1, label="CMSSW");
plt.hist(awkward.flatten(gen_pythia_jets.pt), bins=b, histtype="step", lw=1, label="Pythia");
plt.hist(awkward.flatten(gen_mlpf_jets.pt), bins=b, histtype="step", lw=1, label="MLPF truth");
plt.hist(awkward.flatten(gen_mlpf_jets_nopu.pt), bins=b, histtype="step", lw=1, label="MLPF truth, no PU");
plt.hist(awkward.flatten(cand_pf_jets.pt), bins=b, histtype="step", lw=1, label="CMSSW, reco PF");
plt.xlabel("jet pt")
plt.legend()
plt.yscale("log")
plt.title(sample_title)
plt.savefig(sample_title + "_jet_pt.pdf")
plt.savefig(sample_title + "_jet_pt.png")

In [None]:
plt.figure(figsize=(5,5))

b = np.linspace(0,4,100)

plt.plot([], [])

idx1, idx2 = match_jets(gen_cmssw_jet, gen_pythia_jets, 0.01)
plt.hist(awkward.flatten(gen_pythia_jets[idx2].pt/gen_cmssw_jet[idx1].pt), bins=b, label="Pythia", histtype="step", lw=1);

idx1, idx2 = match_jets(gen_cmssw_jet, gen_mlpf_jets, 0.01)
plt.hist(awkward.flatten(gen_mlpf_jets[idx2].pt/gen_cmssw_jet[idx1].pt), bins=b, label="MLPF truth", histtype="step", lw=1);

idx1, idx2 = match_jets(gen_cmssw_jet, gen_mlpf_jets_nopu, 0.01)
plt.hist(awkward.flatten(gen_mlpf_jets_nopu[idx2].pt/gen_cmssw_jet[idx1].pt), bins=b, label="MLPF truth, no PU", histtype="step", lw=1);

idx1, idx2 = match_jets(gen_cmssw_jet, cand_pf_jets, 0.01)
plt.hist(awkward.flatten(cand_pf_jets[idx2].pt/gen_cmssw_jet[idx1].pt), bins=b, label="PF reco", histtype="step", lw=1);

plt.legend()
plt.yscale("log")
plt.xlabel("jet pt / CMSSW genjet pt")
plt.title(sample_title)
plt.savefig(sample_title + "_jet_pt_ratio.pdf")
plt.savefig(sample_title + "_jet_pt_ratio.png")

In [None]:
iev = 4
plt.figure(figsize=(10, 10))
msk = gen_mlpf["ispu"][iev]==0
plt.scatter(
    gen_mlpf_p4[iev].eta, gen_mlpf_p4[iev].phi, s=10*gen_mlpf_p4[iev].pt, alpha=np.clip(1e-1+1.0-gen_mlpf["ispu"][iev], 0, 1)
)

plt.scatter(
    gen_pythia_p4[iev].eta, gen_pythia_p4[iev].phi, s=10*gen_pythia_p4[iev].pt,
    marker="x"
)
plt.xlim(-7,7)
plt.ylim(-4,4)