In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import mplhep as hep
import matplotlib.ticker as mticker
import numpy as np

plt.style.use(hep.style.CMS)
hep.style.use("CMS")
formatter = mticker.ScalarFormatter(useMathText=True)
formatter.set_powerlimits((-3, 3))
plt.rcParams.update({"font.size": 16})

In this file we will be testing our selection process using the vbf generator information. We will first be investigating whether gen jets make sense

In [4]:
df = pd.read_parquet("0-10.parquet")
df.columns.values.tolist()

[('GenHiggsEta', 0),
 ('GenHiggsEta', 1),
 ('GenHiggsPhi', 0),
 ('GenHiggsPhi', 1),
 ('GenHiggsMass', 0),
 ('GenHiggsMass', 1),
 ('GenHiggsPt', 0),
 ('GenHiggsPt', 1),
 ('GenHiggsChildren', 0),
 ('GenHiggsChildren', 1),
 ('GenbbEta', 0),
 ('GenbbEta', 1),
 ('GenbbPhi', 0),
 ('GenbbPhi', 1),
 ('GenbbMass', 0),
 ('GenbbMass', 1),
 ('GenbbPt', 0),
 ('GenbbPt', 1),
 ('GenVVEta', 0),
 ('GenVVEta', 1),
 ('GenVVPhi', 0),
 ('GenVVPhi', 1),
 ('GenVVMass', 0),
 ('GenVVMass', 1),
 ('GenVVPt', 0),
 ('GenVVPt', 1),
 ('Gen4qEta', 0),
 ('Gen4qEta', 1),
 ('Gen4qEta', 2),
 ('Gen4qEta', 3),
 ('Gen4qPhi', 0),
 ('Gen4qPhi', 1),
 ('Gen4qPhi', 2),
 ('Gen4qPhi', 3),
 ('Gen4qMass', 0),
 ('Gen4qMass', 1),
 ('Gen4qMass', 2),
 ('Gen4qMass', 3),
 ('Gen4qPt', 0),
 ('Gen4qPt', 1),
 ('Gen4qPt', 2),
 ('Gen4qPt', 3),
 ('ak8FatJetHbb', 0),
 ('ak8FatJetHbb', 1),
 ('ak8FatJetHVV', 0),
 ('ak8FatJetHVV', 1),
 ('ak8FatJetHVVNumProngs', 0),
 ('vbfptGen', 0),
 ('vbfptGen', 1),
 ('vbfetaGen', 0),
 ('vbfetaGen', 1),
 ('vbfphiGe

bbSkimmer modified gives us the two VBF jets which we will be able to access and manipulate. it also gives us the results of cuts on the events (there needs to be atleast 2 VBF jets). We can apply these filters and graph the kinematic variables. We can also record the before and after applying these cuts. 


In [5]:
# Apply lepton veto selections ($N_\mu =0$ and $N_e=0$)
print(np.shape(df))
df_em = df[(df[("nGoodMuons", 0)] == 0) & (df[("nGoodElectrons", 0)] == 0)]

# 2 vbf jets
df_vbf = df[(df[("nGoodVBFJetsUnsorted", 0)] >= 2)]

# lepton veto and 2 vbf jets
df_unsorted = df[
    (df[("nGoodMuons", 0)] == 0)
    & (df[("nGoodElectrons", 0)] == 0)
    & (df[("nGoodVBFJetsUnsorted", 0)] >= 2)
    & (df[("nGoodJets", 0)] == 0)
]
df_sorted_pt = df[
    (df[("nGoodMuons", 0)] == 0)
    & (df[("nGoodElectrons", 0)] == 0)
    & (df[("nGoodVBFJetsSortedpt", 0)] >= 2)
    & (df[("nGoodJets", 0)] == 0)
]
df_sorted_M = df[
    (df[("nGoodMuons", 0)] == 0)
    & (df[("nGoodElectrons", 0)] == 0)
    & (df[("nGoodVBFJetsSortedM", 0)] >= 2)
    & (df[("nGoodJets", 0)] == 0)
]
df_sorted_eta = df[
    (df[("nGoodMuons", 0)] == 0)
    & (df[("nGoodElectrons", 0)] == 0)
    & (df[("nGoodVBFJetsSortedeta", 0)] >= 2)
    & (df[("nGoodJets", 0)] == 0)
]


# generate all variables that are needed for the ak8 jet selections in VBF HH4b paper.
# df[('DijetDeltaPhi', 0)] = np.abs(df[('ak8FatJetPhi', 0)] - df[('ak8FatJetPhi', 1)])
# df[('DijetDeltaEta', 0)] = np.abs(df[('ak8FatJetEta', 0)] - df[('ak8FatJetEta', 1)])
print(
    np.shape(df)[0],
    np.shape(df_unsorted)[0],
    np.shape(df_sorted_pt)[0],
    np.shape(df_sorted_M)[0],
    np.shape(df_sorted_eta)[0],
)

(2901, 407)
2901 1411 666 790 805


Graphing the variables for our matched jets:

In [8]:
import numpy as np


# Define the delta R function
def deltaR(eta1, phi1, eta2, phi2):
    deta = eta1 - eta2
    dphi = np.abs(phi1 - phi2)
    dphi[dphi > np.pi] -= 2 * np.pi  # Take care of circular limits
    return np.sqrt(deta**2 + dphi**2)


# Create a dictionary mapping sorting methods to their dataframes
sorting_methods_to_dfs = {
    "Rand": df_unsorted,
    "pt": df_sorted_pt,
    "M": df_sorted_M,
    "eta": df_sorted_eta,
}

# Iterate over each DataFrame and modify the column names
for df_name, df_method in sorting_methods_to_dfs.items():
    df_method.rename(
        columns={("vbfphiUnsortedRand", jet): ("vbfphiSortedRand", jet) for jet in [0, 1]},
        inplace=True,
    )
    df_method.rename(
        columns={("vbfMUnsortedRand", jet): ("vbfMSortedRand", jet) for jet in [0, 1]}, inplace=True
    )

for df_name, df_method in sorting_methods_to_dfs.items():
    print(f"{df_name} DataFrame:\n")
    print(df_method.columns.values.tolist())
    print("\n---\n")


# Iterate over each sorting method and calculate delta R
for method, df_method in sorting_methods_to_dfs.items():
    for jet in [0, 1]:
        eta_gen = df_method[(f"vbfetaGen", jet)]
        phi_gen = df_method[(f"vbfphiGen", jet)]

        eta_sorted = df_method[(f"vbfetaSorted{method}", jet)]
        phi_sorted = df_method[(f"vbfphiSorted{method}", jet)]

        df_method[(f"deltaR_{method}", jet)] = deltaR(eta_gen, phi_gen, eta_sorted, phi_sorted)

# Print DataFrames to check results
for df_name, df_method in sorting_methods_to_dfs.items():
    print(f"{df_name} DataFrame:\n")
    print(df_method)
    print("\n---\n")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_method.rename(columns={('vbfphiUnsortedRand', jet): ('vbfphiSortedRand', jet) for jet in [0, 1]}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_method.rename(columns={('vbfMUnsortedRand', jet): ('vbfMSortedRand', jet) for jet in [0, 1]}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_method.rename(columns={('vbfphiUnsortedRand', jet): ('vbfphiSortedRand', jet) for jet in [0, 1]}, inplace=True)
A value is trying to be set on a co

Rand DataFrame:

     GenHiggsEta           GenHiggsPhi           GenHiggsMass         \
               0         1           0         1            0      1   
3       0.485352  0.677734   -0.587891 -0.566406        125.0  125.0   
5       0.196289 -0.335938    2.843750 -1.167969        125.0  125.0   
10     -0.251953  2.039062    3.023438 -0.309570        125.0  125.0   
12     -4.125000  0.886719   -2.140625 -3.085938        125.0  125.0   
13     -0.377930 -1.132812   -1.550781  1.664062        125.0  125.0   
...          ...       ...         ...       ...          ...    ...   
2893   -2.031250  1.242188    1.464844 -1.664062        125.0  125.0   
2894   -1.460938  1.027344    2.585938 -0.071045        125.0  125.0   
2896    1.027344 -1.554688    1.925781 -1.304688        125.0  125.0   
2898   -0.589844 -0.937500   -1.777344  1.894531        125.0  125.0   
2900   -0.253906  0.446289   -1.839844  0.550781        125.0  125.0   

     GenHiggsPt        GenHiggsChildren      .

KeyError: ('vbfphiSortedRand', 0)