In [None]:
import uproot
import awkward as ak
from coffea import nanoevents
from coffea.nanoevents.methods.base import NanoEventsArray
from coffea.analysis_tools import Weights, PackedSelection
from coffea.nanoevents.methods import nanoaod
from coffea.nanoevents.methods import vector

ak.behavior.update(vector.behavior)

import pickle, json, gzip
import numpy as np

from typing import Optional, List, Dict
from copy import copy

import matplotlib.pyplot as plt
import mplhep as hep
from matplotlib import colors

from tqdm import tqdm

from pathlib import Path
import os

plt.rcParams.update({"font.size": 16})
plt.style.use(hep.style.CMS)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
plot_dir = Path("../../plots/VBF/24Jul18")
plot_dir.mkdir(parents=True, exist_ok=True)

In [None]:
samples = {
    "VBF_HHTobbVV_CV_1_C2V_1_C3_2": "root://cmseos.fnal.gov///store/user/lpcpfnano/cmantill/v2_3/2018/HH/VBF_HHTobbVV_CV_1_C2V_1_C3_2_TuneCP5_13TeV-madgraph-pythia8/VBF_HHTobbVV_CV_1_C2V_1_C3_2/220808_150332/0000/nano_mc2018_9.root",
    "VBF_HHTobbVV_CV_1_C2V_1_C3_0": "root://cmseos.fnal.gov///store/user/lpcpfnano/cmantill/v2_3/2018/HH/VBF_HHTobbVV_CV_1_C2V_1_C3_0_TuneCP5_13TeV-madgraph-pythia8/VBF_HHTobbVV_CV_1_C2V_1_C3_0/220808_150123/0000/nano_mc2018_1-1.root",
    "VBF_HHTobbVV_CV_1_C2V_1_C3_1": "root://cmseos.fnal.gov///store/user/lpcpfnano/cmantill/v2_3/2018/HH/VBF_HHTobbVV_CV_1_C2V_1_C3_1_TuneCP5_13TeV-madgraph-pythia8/VBF_HHTobbVV_CV_1_C2V_1_C3_1/220808_150149/0000/nano_mc2018_1-1.root",
    "VBF_HHTobbVV_CV_1_C2V_0_C3_1": "root://cmseos.fnal.gov///store/user/lpcpfnano/cmantill/v2_3/2018/HH/VBF_HHTobbVV_CV_1_C2V_0_C3_1_TuneCP5_13TeV-madgraph-pythia8/VBF_HHTobbVV_CV_1_C2V_0_C3_1/220808_150000/0000/nano_mc2018_1-1.root",
    "VBF_HHTobbVV_CV_1_C2V_2_C3_1": "root://cmseos.fnal.gov///store/user/lpcpfnano/cmantill/v2_3/2018/HH/VBF_HHTobbVV_CV_1_C2V_2_C3_1_TuneCP5_13TeV-madgraph-pythia8/VBF_HHTobbVV_CV_1_C2V_2_C3_1/220808_150239/0000/nano_mc2018_1-10.root",
    "VBF_HHTobbVV_CV_0_5_C2V_1_C3_1": "root://cmseos.fnal.gov///store/user/lpcpfnano/cmantill/v2_3/2018/HH/VBF_HHTobbVV_CV_0_5_C2V_1_C3_1_TuneCP5_13TeV-madgraph-pythia8/VBF_HHTobbVV_CV_0_5_C2V_1_C3_1/220808_150057/0000/nano_mc2018_1-1.root",
}

In [None]:
xsecs = {
    "VBF_HHTobbVV_CV_1_C2V_1_C3_1": "0.0017260 * 5.824e-01 * (0.2154 * 0.676 ** 2 + 0.02643 * 0.692 ** 2) * 2",
    "VBF_HHTobbVV_CV_1_C2V_1_C3_0": "0.0046089 * 5.824e-01 * (0.2154 * 0.676 ** 2 + 0.02643 * 0.692 ** 2) * 2",
    "VBF_HHTobbVV_CV_1_C2V_1_C3_2": "0.0014228 * 5.824e-01 * (0.2154 * 0.676 ** 2 + 0.02643 * 0.692 ** 2) * 2",
    "VBF_HHTobbVV_CV_1_C2V_0_C3_1": "0.0270800 * 5.824e-01 * (0.2154 * 0.676 ** 2 + 0.02643 * 0.692 ** 2) * 2",
    "VBF_HHTobbVV_CV_1_C2V_2_C3_1": "0.0142178 * 5.824e-01 * (0.2154 * 0.676 ** 2 + 0.02643 * 0.692 ** 2) * 2",
    "VBF_HHTobbVV_CV_0_5_C2V_1_C3_1": "0.0108237 * 5.824e-01 * (0.2154 * 0.676 ** 2 + 0.02643 * 0.692 ** 2) * 2",
    "VBF_HHTobbVV_CV_1_5_C2V_1_C3_1": "0.0660185 * 5.824e-01 * (0.2154 * 0.676 ** 2 + 0.02643 * 0.692 ** 2) * 2",
}

for key in xsecs:
    xsecs[key] = eval(xsecs[key])

In [None]:
events_dict = {}

for sample, file in samples.items():
    print(sample)
    events_dict[sample] = nanoevents.NanoEventsFactory.from_root(
        file,
        schemaclass=nanoevents.NanoAODSchema,
    ).events()

In [None]:
d_PDGID = 1
b_PDGID = 5
g_PDGID = 21
TOP_PDGID = 6

ELE_PDGID = 11
vELE_PDGID = 12
MU_PDGID = 13
vMU_PDGID = 14
TAU_PDGID = 15
vTAU_PDGID = 16

Z_PDGID = 23
W_PDGID = 24
HIGGS_PDGID = 25

b_PDGIDS = [511, 521, 523]

GRAV_PDGID = 39

GEN_FLAGS = ["fromHardProcess", "isLastCopy"]

In [None]:
def get_interpolation(mhh):
    from scipy.interpolate import interp1d, CubicSpline, UnivariateSpline

    counts, bins = np.histogram(mhh, bins=np.logspace(np.log10(280), np.log10(1500), 20))

    # Calculate bin centers
    bin_centers = (bins[:-1] + bins[1:]) / 2
    bin_sizes = np.diff(bins)

    # Interpolate between bins
    interp_func = UnivariateSpline(bin_centers, counts / bin_sizes, s=3)

    # New bin centers for interpolation
    new_bin_centers = np.linspace(bin_centers.min(), bin_centers.max(), 1000)

    # Perform interpolation
    smooth_counts = interp_func(new_bin_centers)

    return smooth_counts

In [None]:
def plot_interpolation(ax, mhh, weights, label):
    from scipy.interpolate import interp1d, CubicSpline, UnivariateSpline

    counts, bins = np.histogram(
        mhh, bins=np.logspace(np.log10(280), np.log10(1500), 31), weights=weights
    )

    # Calculate bin centers
    bin_centers = (bins[:-1] + bins[1:]) / 2
    bin_sizes = np.diff(bins)

    # Interpolate between bins
    interp_func = UnivariateSpline(bin_centers, counts / bin_sizes, s=3)

    # New bin centers for interpolation
    new_bin_centers = np.linspace(bin_centers.min(), bin_centers.max(), 1000)

    # Perform interpolation
    smooth_counts = interp_func(new_bin_centers)

    # Plot smooth interpolation
    ax.plot(new_bin_centers, smooth_counts, label=label)

Get coefficients for interpolation

In [None]:
import sympy

csamples = [
    (1.0, 1.0, 1.0),
    (1.0, 1.0, 0.0),
    (1.0, 1.0, 2.0),
    (1.0, 0.0, 1.0),
    (1.0, 2.0, 1.0),
    (0.5, 1.0, 1.0),
    # (1.5, 1., 1.),
]

M = sympy.Matrix(
    [
        [
            CV**2 * kl**2,
            CV**4,
            C2V**2,
            CV**3 * kl,
            CV * C2V * kl,
            CV**2 * C2V,
        ]
        for i, (CV, C2V, kl) in enumerate(csamples)
    ]
)

# the vector of couplings
CV, C2V, kl = sympy.symbols("CV C2V kl")
c = sympy.Matrix(
    [
        [CV**2 * kl**2],
        [CV**4],
        [C2V**2],
        [CV**3 * kl],
        [CV * C2V * kl],
        [CV**2 * C2V],
    ]
)

# the vector of symbolic sample cross sections
s = sympy.Matrix([[sympy.Symbol("xs{}".format(i))] for i in range(len(csamples))])

# actual computation, i.e., matrix inversion and multiplications with vectors
M_inv = M.pinv()
coeffs = c.transpose() * M_inv
sigma = coeffs * s

In [None]:
def get_hists(mhh, weights):
    # return np.histogram(mhh, np.arange(260, 1200, 50), weights=weights)
    return np.histogram(mhh, np.logspace(np.log10(260), np.log10(1200), 30), weights=weights)


def plot_hists(ax, mhh, weights, label):
    ax.hist(mhh, np.arange(280, 1200, 30), histtype="step", label=label, weights=weights)

In [None]:
hists = []

for sample in samples:
    events = events_dict[sample]
    higgs = events.GenPart[
        (abs(events.GenPart.pdgId) == HIGGS_PDGID) * events.GenPart.hasFlags(GEN_FLAGS)
    ]
    mhh = (higgs[:, 0] + higgs[:, 1]).mass
    w = events.genWeight
    w = w * xsecs[sample] / np.sum(w) * 1e3
    counts, bins = get_hists(mhh, w)
    hists.append(counts)

In [None]:
def get_hist_interp(cv, c2v, Kl):
    sigma_val = sigma.subs({CV: cv, C2V: c2v, kl: Kl})
    counts = []
    for i in range(len(hists[0])):
        count = sigma_val.subs({sympy.Symbol(f"xs{j}"): hists[j][i] for j in range(len(samples))})
        counts.append(np.array(count)[0][0])

    return counts

In [None]:
plot_interference = False

fig, ax = plt.subplots(1, 1, figsize=(12, 12 if plot_interference else 8))

bin_centers = (bins[1:] + bins[:-1]) / 2
scs = []

colors = ["blue", "orange", "red", "purple", "green"]

# for i, (sample, label) in enumerate(
#     [
#         ((1, 1, 1), "SM"),
#         ((1, 0, 1), r"No HHVV diagram ($\kappa_{2V}=0$)"),
#         ((0, 1, 0), r"HHVV diagram ($\kappa_{V}=\kappa_\lambda=0$)"),
#         ((1, 0, 0), r"(HVV)$^2$ diagram ($\kappa_{2V}=\kappa_\lambda=0$)"),
#         ((1, 2, 1), r"Enhanced $\kappa_{2V}=2$ Coupling"),
#     ]
# ):

for i, (sample, label) in enumerate(
    [
        ((1, 1, 1), "SM"),
        ((1, 0, 1), r"$\kappa_{2V}=0$"),
        ((0, 1, 0), r"$\kappa_{V}=\kappa_\lambda=0$"),
        ((1, 0, 0), r"$\kappa_{2V}=\kappa_\lambda=0$"),
        ((1, 2, 1), r"$\kappa_{2V}=2$"),
    ]
):
    from scipy.interpolate import interp1d, CubicSpline, UnivariateSpline, splrep, BSpline, Rbf

    bin_sizes = np.diff(bins)
    interp_counts = get_hist_interp(*sample) / bin_sizes
    # interp_func = UnivariateSpline(np.array(bin_centers).astype(float), np.array(interp_counts).astype(float), s=3)
    interp_func = Rbf(
        np.array(bin_centers).astype(float), np.array(interp_counts).astype(float), smooth=1
    )

    # New bin centers for interpolation
    new_bin_centers = np.linspace(bin_centers.min(), bin_centers.max(), 1000)

    # Perform interpolation
    smooth_counts = interp_func(new_bin_centers)
    scs.append(smooth_counts)
    ax.plot(new_bin_centers, smooth_counts, label=label, color=colors[i])
    # ax.plot(bin_centers, interp_counts, label=f"CV={sample[0]}, C2V={sample[1]}, kl={sample[2]}")

if plot_interference:
    ax.plot(
        new_bin_centers,
        # -np.sqrt(np.power(scs[1], 2) + np.power(scs[2], 2) - np.power(scs[0], 2)),
        -(scs[1] + scs[2] - scs[0]),
        label="Interference",
        color="gray",
    )

ax.set_xlim(bin_centers.min(), 800)
ax.hlines(0, bin_centers.min(), 800, linestyle="--", color="gray")
ax.set_ylim(-0.012 if plot_interference else 0, 0.012)
ax.set_ylabel(r"d$\sigma$/d$m_{HH}$ [fb/GeV]")
ax.set_xlabel(r"$m_{HH}$ [GeV]")
hep.cms.label(label="Preliminary", data=False, com="13", ax=ax)
ax.legend()
plt.savefig(plot_dir / "diagrams_prelim.pdf", bbox_inches="tight")
plt.show()