In [None]:
import sys, os

sys.path.insert(0, "")
sys.path.append("/home/users/rkansal/hhbbvv/LundReweighting")

from utils.Utils import *

In [1]:
import uproot
import awkward as ak
from collections import OrderedDict
import numpy as np

from coffea import nanoevents
from coffea.nanoevents.methods.base import NanoEventsArray
from coffea.analysis_tools import Weights, PackedSelection
from coffea.nanoevents.methods import nanoaod
from coffea.nanoevents.methods import vector
from coffea.lookup_tools.dense_lookup import dense_lookup

In [2]:
import HHbbVV
from HHbbVV.processors.utils import P4
from HHbbVV import run_utils

In [3]:
%load_ext autoreload
%autoreload 2

In [None]:
run_utils.add_mixins(nanoevents)

events = nanoevents.NanoEventsFactory.from_root(
    "root://cmseos.fnal.gov///store/user/lpcpfnano/cmantill/v2_3/2018/HH/GluGluToHHTobbVV_node_cHHH1_TuneCP5_13TeV-powheg-pythia8/GluGluToHHTobbVV_node_cHHH1/220808_145933/0000/nano_mc2018_1-1.root",
    schemaclass=nanoevents.NanoAODSchema,
).events()[:5000]

In [5]:
events = events[ak.count(events.FatJet.pt, axis=1) >= 2]

In [6]:
gen_weights = np.sign(events["genWeight"])
n_events = np.sum(gen_weights)

selection = PackedSelection()
cutflow = OrderedDict()
cutflow["all"] = n_events

selection_args = (selection, cutflow, False, gen_weights)

In [7]:
gen_vars_dict, (genbb, genq) = HHbbVV.processors.GenSelection.gen_selection_HHbbVV(
    events, events.FatJet, selection, cutflow, gen_weights, P4
)

In [None]:
vvfj1 = gen_vars_dict["ak8FatJetHVV"][:, 0].astype(bool)
vvfj4 = gen_vars_dict["ak8FatJetHVVNumProngs"] == 4
selector = vvfj1 * vvfj4

ak8_pfcands = events[selector].FatJetPFCands
ak8_pfcands = ak8_pfcands[ak8_pfcands.jetIdx == 0]
pfcands = events[selector].PFCands[ak8_pfcands.pFCandsIdx]
pfc4 = ak.fill_none((ak.count(pfcands.pt, axis=1) >= 4), False)
selector[selector] = pfc4
selector = np.ma.filled(selector, fill_value=False)

vv_fatjets = events.FatJet[selector][:, 0:1]
gen_quarks = genq[selector]

In [10]:
lpsfargs = [
    "2018",
    events[selector],
    vv_fatjets,
    0,
    4,
    gen_quarks,
    gen_weights[selector],
    "GluGluToHHTobbVV_node_cHHH1",
]

# lpsfs, lp_hist = HHbbVV.processors.corrections.get_lund_SFs(
# lpsfs, lp_hist = HHbbVV.processors.corrections.get_lund_SFs(*lpsfargs)
outlp = HHbbVV.processors.corrections.get_lund_SFs(*lpsfargs)

In [None]:
outlp[1][0]

## Oz version

In [15]:
f_ratio_name = "../corrections/lp_ratios/ratio_2018.root"
f_ratio = ROOT.TFile.Open(f_ratio_name)
LP_rw = LundReweighter(f_ratio=f_ratio)

In [16]:
ak8_pfcands = events[selector].FatJetPFCands
ak8_pfcands = ak8_pfcands[ak8_pfcands.jetIdx == 0]
pfcands = events[selector].PFCands[ak8_pfcands.pFCandsIdx]

pfcands_vector_ptetaphi = ak.Array(
    [
        [{kin_key: cand[kin_key] for kin_key in P4} for cand in event_cands]
        for event_cands in pfcands
    ],
    with_name="PtEtaPhiMLorentzVector",
)

In [17]:
gen_parts_eta_phi = [np.array([[gq.eta, gq.phi] for gq in genqs]) for genqs in gen_quarks]
pf_cands = [
    np.array([[pfc.px, pfc.py, pfc.pz, pfc.E] for pfc in pcands])
    for pcands in pfcands_vector_ptetaphi
]
ak8_jets = np.array([[fj.pt, fj.eta, fj.phi, fj.mass] for fj in vv_fatjets])

In [None]:
ozlpsfs = LP_rw.get_all_weights(pf_cands, gen_parts_eta_phi, ak8_jets, normalize=False)

## Comparing

#### Check subjets

In [None]:
np.array(ozlpsfs["reclust_nom"][0].subjet)[:, 0]

In [None]:
outlp[1][0]

Subjets matching

#### Check LP variables

In [None]:
# [i, pseudojet.subjet_pt, pseudojet.order, delta, kt]
idx = 4
ozlpsfs = LP_rw.get_all_weights(
    pf_cands[idx : idx + 1],
    gen_parts_eta_phi[idx : idx + 1],
    ak8_jets[idx : idx + 1],
    normalize=False,
)
ozlpsfs["reclust_nom"][0].split

In [None]:
lpsfargs = [
    "2018",
    events[selector][idx : idx + 1],
    vv_fatjets[idx : idx + 1],
    0,
    4,
    gen_quarks[idx : idx + 1],
    gen_weights[selector][idx : idx + 1],
    "GluGluToHHTobbVV_node_cHHH1",
]

# lpsfs, lp_hist = HHbbVV.processors.corrections.get_lund_SFs(
# lpsfs, lp_hist = HHbbVV.processors.corrections.get_lund_SFs(*lpsfargs)
outlp = HHbbVV.processors.corrections.get_lund_SFs(*lpsfargs)

In [None]:
[np.array(outlp[2].Delta), outlp[2].kt.to_numpy()]

LP variables matching if I remove 1 GeV pf candidate cut

### Check SFs

In [None]:
ozlpsfs = LP_rw.get_all_weights(pf_cands, gen_parts_eta_phi, ak8_jets, normalize=False)
ozlpsfs["nom"]

In [None]:
lpsfargs = [
    "2018",
    events[selector],
    vv_fatjets,
    0,
    4,
    gen_quarks,
    gen_weights[selector],
    "GluGluToHHTobbVV_node_cHHH1",
]

# lpsfs, lp_hist = HHbbVV.processors.corrections.get_lund_SFs(
# lpsfs, lp_hist = HHbbVV.processors.corrections.get_lund_SFs(*lpsfargs)
outlp = HHbbVV.processors.corrections.get_lund_SFs(*lpsfargs)
# outlp['lp_sf_lnN'][:, 0]

In [None]:
outlp[0]["lp_sf_lnN"][:, 0]

In [None]:
np.mean(np.isclose(outlp[0]["lp_sf_lnN"][:, 0], ozlpsfs["nom"], rtol=0.00001))

In [None]:
print(np.mean(np.isclose(outlp[0]["lp_sf_sys_up"][:, 0], ozlpsfs["sys_up"], rtol=0.001)))
print(np.mean(np.isclose(outlp[0]["lp_sf_sys_down"][:, 0], ozlpsfs["sys_down"], rtol=0.001)))

In [None]:
np.argmin(np.isclose(outlp[0]["lp_sf_sys_up"][:, 0], ozlpsfs["sys_up"], rtol=0.001))

In [None]:
outlp[0]["lp_sf_sys_up"][:, 0][62]

In [None]:
ozlpsfs["sys_up"][62]

### SFs for 1 jet

In [None]:
# oz subjets [182.61677331494036, 123.00436085912942, 110.07500015490386, 13.654364638246314]
idx = 62

ozlpsfs = LP_rw.get_all_weights(
    pf_cands[idx : idx + 1],
    gen_parts_eta_phi[idx : idx + 1],
    ak8_jets[idx : idx + 1],
    normalize=False,
)

# print("nom:", ozlpsfs['nom'])
print("sys_up:", ozlpsfs["sys_up"])


# [1.37, 0.917, 1.05, 1.08, 0.991, 1.08, 1]

In [None]:
np.log(0.02)

In [None]:
# my subjets [78.4, 76.8, 118, 138]

lpsfargs = [
    "2018",
    events[selector][idx : idx + 1],
    vv_fatjets[idx : idx + 1],
    0,
    4,
    gen_quarks[idx : idx + 1],
    gen_weights[selector][idx : idx + 1],
    "GluGluToHHTobbVV_node_cHHH1",
]

# lpsfs, lp_hist = HHbbVV.processors.corrections.get_lund_SFs(
# lpsfs, lp_hist = HHbbVV.processors.corrections.get_lund_SFs(*lpsfargs)
outlp = HHbbVV.processors.corrections.get_lund_SFs(*lpsfargs)

In [None]:
outlp["lp_sf_sys_up"]

Matching if I ignore unmatched subjets!

In [87]:
num_prongs = 4
sj_matched_idx = np.array([[3, 0, 3, 2], [1, 2, 0, 1], [2, 1, 1, 0], [0, 3, 2, 3]])

# fill a boolean array of [len(events), num_prongs] with True if the subjet is matched, parallelized


# sj_matched = np.sum(sj_matched_idx[:, None, :] == sj_matched_idx[:, :, None], axis=2)

In [51]:
tpfc = np.array([[j.pt(), j.eta(), j.phi(), j.m()] for j in reclust.split])

In [None]:
len(tpfc)

In [None]:
print("pt")
print(np.sort(tpfc[:, 0]))
print("eta")
print(tpfc[:, 1][np.argsort(tpfc[:, 0])])

In [None]:
_ = HHbbVV.processors.corrections.get_lund_SFs(*lpsfargs)

In [None]:
[np.array(lds[2][0].Delta), lds[2][0].kt.to_numpy()]

In [None]:
lds[0].Delta[0]

## Comparing 4 particle jet

In [None]:
pfcands_vector_ptetaphi[0][1:5].pt

In [None]:
LP_rw.get_splittings(pf_cands[0][:5], num_excjets=1)

In [44]:
import fastjet

In [97]:
jetdef = fastjet.JetDefinition(fastjet.cambridge_algorithm, 1000.0)
cluster = fastjet._pyjet.AwkwardClusterSequence(pfcands_vector_ptetaphi[0][1:5], jetdef)
lds = cluster.exclusive_jets_lund_declusterings(1)

In [45]:
jet_def = fastjet.JetDefinition(fastjet.kt_algorithm, 0.8)
cs = fastjet.ClusterSequence(pfcands_vector_ptetaphi[0][:5], jet_def)
ldskt = cs.exclusive_jets_lund_declusterings(1)

In [None]:
lds[0].Delta

In [None]:
ldskt[0].kt

In [None]:
(
    pfcands_vector_ptetaphi[0][:5][0]
    + pfcands_vector_ptetaphi[0][:5][1]
    + pfcands_vector_ptetaphi[0][:5][2]
    + pfcands_vector_ptetaphi[0][:5][3]
    + pfcands_vector_ptetaphi[0][:5][4]
).pt

In [None]:
ak.sum(pfcands_vector_ptetaphi[0][:5])

In [None]:
lpsfs["lp_sf_lnN"][:, 0]

In [None]:
ozlpsfs["nom"]

## Comparing full jet

In [None]:
# pfcands_vector_ptetaphi[0]
LP_rw.get_splittings(pf_cands[0], num_excjets=4)

In [None]:
jet_def = fastjet.JetDefinition(fastjet.kt_algorithm, 0.8)
cs = fastjet.ClusterSequence(pfcands_vector_ptetaphi[0][:5], jet_def)
ldskt = cs.exclusive_jets_lund_declusterings(1)

kt_clustering = fastjet.ClusterSequence(pfcands_vector_ptetaphi, jet_def)
kt_subjets = kt_clustering.exclusive_jets(4)

kt_subjets_vec = ak.zip(
    {"x": kt_subjets.px, "y": kt_subjets.py, "z": kt_subjets.pz, "t": kt_subjets.E},
    with_name="LorentzVector",
)

# save subjet pT * JEC scaling
kt_subjets_pt = kt_subjets_vec.pt * 1
# get constituents
kt_subjet_consts = kt_clustering.exclusive_jets_constituents(4)
kt_subjet_consts = kt_subjet_consts[kt_subjet_consts.pt > min_pt]
kt_subjet_consts = ak.flatten(kt_subjet_consts, axis=1)

# dummy particle to pad empty subjets. SF for these subjets will be 1
dummy_particle = ak.Array(
    [{kin_key: 0.0 for kin_key in P4}],
    with_name="PtEtaPhiMLorentzVector",
)

# pad empty subjets
kt_subjet_consts = ak.fill_none(ak.pad_none(kt_subjet_consts, 1, axis=1), dummy_particle[0])

# then re-cluster with CA
# won't need to flatten once https://github.com/scikit-hep/fastjet/pull/145 is released
reclustering = fastjet.ClusterSequence(kt_subjet_consts, recluster_def)
lds = reclustering.exclusive_jets_lund_declusterings(1)

## Dist hist filling

In [None]:
out = HHbbVV.processors.corrections.get_lund_SFs(*lpsfargs)

In [12]:
from HHbbVV.postprocessing import plotting

In [None]:
# won't show if Oz's stuff is imported for some reason
plotting.plot_lund_plane_six(out[1], show=True)

In [None]:
out[1][1, ...]

In [195]:
arr = np.array([10, 20, 30])

In [None]:
np.tile(arr[:, None], (1, 4))

In [None]:
isinstance(kt_subjets_pt[sj_matched][0], float)

In [None]:
lds

In [None]:
ak.flatten(lds[sj_matched.reshape(-1)], axis=1)

In [None]:
sj_matched