In [2]:
import hist
import awkward as ak
import glob
import os
import pyarrow.parquet as pq
from pathlib import Path

import matplotlib.pyplot as plt
import mplhep as hep
import numpy as np
plt.style.use(hep.style.CMS)
plt.rcParams.update({'font.size': 20})

In [3]:
lpc_fileprefix = "/eos/uscms/store/group/lpcdihiggsboost/tsievert/HiggsDNA_parquet/v2/Run3_2022_merged_v1/sim"

# FILEPATHS_DICT = {
#     '2022': {
#         'ggF HH': [lpc_fileprefix+f"/preEE/GluGlutoHHto2B2G_kl_1p00_kt_1p00_c2_0p00/nominal/*merged.parquet"],
#         'bbH': [lpc_fileprefix+f"/preEE/BBHto2G_M_125/nominal/*merged.parquet"],
#         'ZH': [lpc_fileprefix+f"/preEE/ZH_Hto2G_Zto2Q_M-125/nominal/*merged.parquet"],
#         'W-H': [lpc_fileprefix+f"/preEE/WminusH_Hto2G_Wto2Q_M-125/nominal/*merged.parquet"],
#         'W+H': [lpc_fileprefix+f"/preEE/WplusH_Hto2G_Wto2Q_M-125/nominal/*merged.parquet"],
#     },
#     '2022EE': {
#         'ggF HH': [lpc_fileprefix+f"/postEE/GluGluToHH/nominal/*merged.parquet"],
#         'bbH': [lpc_fileprefix+f"/postEE/BBHto2G_M_125/nominal/*merged.parquet"],
#         'ZH': [lpc_fileprefix+f"/postEE/ZH_Hto2G_Zto2Q_M-125/nominal/*merged.parquet"],
#         'W-H': [lpc_fileprefix+f"/postEE/WminusH_Hto2G_Wto2Q_M-125/nominal/*merged.parquet"],
#         'W+H': [lpc_fileprefix+f"/postEE/WplusH_Hto2G_Wto2Q_M-125/nominal/*merged.parquet"],
#     }
# }
FILEPATHS_DICT = {
    'ggF HH': [
        lpc_fileprefix+f"/preEE/GluGlutoHHto2B2G_kl_1p00_kt_1p00_c2_0p00/nominal/*merged.parquet", 
        lpc_fileprefix+f"/postEE/GluGluToHH/nominal/*merged.parquet"
    ],
    # 'GGJets': [
    #     # GG + 3Jets
    #     lpc_fileprefix+f"/preEE/GGJets/nominal/*merged.parquet", 
    #     lpc_fileprefix+f"/postEE/GGJets/nominal/*merged.parquet",
    # ],
    # 'GJet': [
    #     # GJet pT 20-40
    #     lpc_fileprefix+f"/preEE/GJetPt20To40/nominal/*merged.parquet", 
    #     lpc_fileprefix+f"/postEE/GJetPt20To40/nominal/*merged.parquet",
    #     # GJet pT 40-inf
    #     lpc_fileprefix+f"/preEE/GJetPt40/nominal/*merged.parquet", 
    #     lpc_fileprefix+f"/postEE/GJetPt40/nominal/*merged.parquet",
    # ],
    'non-res': [
        # GG + 3Jets
        lpc_fileprefix+f"/preEE/GGJets/nominal/*merged.parquet", 
        lpc_fileprefix+f"/postEE/GGJets/nominal/*merged.parquet",
        # GJet pT 20-40
        lpc_fileprefix+f"/preEE/GJetPt20To40/nominal/*merged.parquet", 
        lpc_fileprefix+f"/postEE/GJetPt20To40/nominal/*merged.parquet",
        # GJet pT 40-inf
        lpc_fileprefix+f"/preEE/GJetPt40/nominal/*merged.parquet", 
        lpc_fileprefix+f"/postEE/GJetPt40/nominal/*merged.parquet",
    ],
    'ttH': [
        # ttH
        lpc_fileprefix+f"/preEE/ttHtoGG_M_125/nominal/*merged.parquet", 
        lpc_fileprefix+f"/postEE/ttHToGG/nominal/*merged.parquet",
    ],
}

CURRENT_DIRPATH = str(Path().absolute())
output_dirpath = os.path.join(CURRENT_DIRPATH, "march_19_pres_plots")
if not os.path.exists(output_dirpath):
    os.makedirs(output_dirpath)

In [4]:
sample_dict = {
    sample_name: ak.concatenate(
        [
            ak.from_parquet(glob.glob(filepath))[
                ak.from_parquet(glob.glob(filepath))['nonRes_has_two_btagged_jets'] 
                & ak.from_parquet(glob.glob(filepath))['is_nonRes']
                & ak.from_parquet(glob.glob(filepath))['fiducialGeometricFlag']
            ]
            for filepath in FILEPATHS_DICT[sample_name]
        ]
    ) for sample_name in FILEPATHS_DICT.keys()
}


In [5]:
print(sample_dict['ggF HH'].fields)

['lead_seediEtaOriX', 'lead_cutBased', 'lead_electronVeto', 'lead_hasConversionTracks', 'lead_isScEtaEB', 'lead_isScEtaEE', 'lead_mvaID_WP80', 'lead_mvaID_WP90', 'lead_pixelSeed', 'lead_seedGain', 'lead_electronIdx', 'lead_jetIdx', 'lead_seediPhiOriY', 'lead_vidNestedWPBitmap', 'lead_energyRaw', 'lead_eta', 'lead_haloTaggerMVAVal', 'lead_hoe_PUcorr', 'lead_pfChargedIsoPFPV', 'lead_pfPhoIso03', 'lead_pfRelIso03_all_quadratic', 'lead_pfRelIso03_chg_quadratic', 'lead_phi', 'lead_sipip', 'lead_x_calo', 'lead_y_calo', 'lead_z_calo', 'lead_genPartFlav', 'lead_genPartIdx', 'lead_electronIdxG', 'lead_genPartIdxG', 'lead_jetIdxG', 'lead_ScEta', 'lead_rho_smear', 'lead_energy', 'lead_pt', 'lead_mvaID_run3', 'lead_mvaID_nano', 'lead_raw_r9', 'lead_r9', 'lead_raw_sieie', 'lead_sieie', 'lead_raw_etaWidth', 'lead_etaWidth', 'lead_raw_phiWidth', 'lead_phiWidth', 'lead_raw_sieip', 'lead_sieip', 'lead_raw_s4', 'lead_s4', 'lead_raw_hoe', 'lead_hoe', 'lead_raw_ecalPFClusterIso', 'lead_ecalPFClusterIso', 

In [6]:
import math
import re

hists_axes={}
# photon vars #
hists_axes['lead_mvaID_run3'] = hist.axis.Regular(100, -1., 1, name='var', label=r'lead $\gamma$ MVA ID', growth=False, underflow=False, overflow=False)
hists_axes['sublead_mvaID_run3'] = hist.axis.Regular(100, -1., 1, name='var', label=r'sublead $\gamma$ MVA ID', growth=False, underflow=False, overflow=False)

# bjet vars #
hists_axes['nonRes_lead_bjet_btagPNetB'] = hist.axis.Regular(50, 0., 1, name='var', label=r'lead jet PNet btag score', growth=False, underflow=False, overflow=False)
hists_axes['nonRes_sublead_bjet_btagPNetB'] = hist.axis.Regular(50, 0., 1, name='var', label=r'sublead jet PNet btag score', growth=False, underflow=False, overflow=False)


# # Dipohoton vars #
# hists_axes['mass'] = hist.axis.Regular(50, 25., 150., name='var', label=r'$M_{\gamma\gamma}$ [GeV]', growth=False, underflow=False, overflow=False)
# hists_axes['pt'] = hist.axis.Regular(50, 20., 2000, name='var', label=r' $\gamma\gamma p_{T}$ [GeV]', growth=False, underflow=False, overflow=False)

# # lepton vars #
# hists_axes['lepton1_pt'] = hist.axis.Regular(50, 0., 200, name='var', label=r'lead lepton $p_T$ [GeV]', growth=False, underflow=False, overflow=False)
# hists_axes['lepton2_pt'] = hist.axis.Regular(50, 0., 200, name='var', label=r'sublead lepton $p_T$ [GeV]', growth=False, underflow=False, overflow=False)

# # angular vars #
# hists_axes['nonRes_CosThetaStar_CS'] = hist.axis.Regular(50, -1, 1, name='var', label=r'cos$(\theta_{CS})$', growth=False, underflow=False, overflow=False)
# hists_axes['nonRes_CosThetaStar_jj'] = hist.axis.Regular(50, -1, 1, name='var', label=r'cos$(\theta_{jj})$', growth=False, underflow=False, overflow=False)

# # hadronic W vars #
# hists_axes['nonRes_chi_t0'] = hist.axis.Regular(70, 0., 150, name='var', label=r'$\chi_{t0}^2$', growth=False, underflow=False, overflow=False)
# hists_axes['nonRes_chi_t1'] = hist.axis.Regular(70, 0., 500, name='var', label=r'$\chi_{t1}^2$', growth=False)

# # dijet vars #
# hists_axes['dijet_PNetRegPt'] = hist.axis.Regular(100, 0., 500, name='var', label=r'dijet $p_T$ [GeV]', growth=False, underflow=False, overflow=False)
# hists_axes['dijet_PNetRegMass'] = hist.axis.Regular(50, 0., 200, name='var', label=r'$M_{jj}$ [GeV]', growth=False, underflow=False, overflow=False)

# # HH vars #
# hists_axes['HH_PNetRegPt'] = hist.axis.Regular(100, 0., 500, name='var', label=r'HH $p_T$ [GeV]', growth=False, underflow=False, overflow=False)
# hists_axes['HH_PNetRegMass'] = hist.axis.Regular(50, 150., 800, name='var', label=r'$M_{HH}$ [GeV]', growth=False, underflow=False, overflow=False)

# # bjet vars #
# hists_axes['nonRes_lead_bjet_btagPNetB'] = hist.axis.Regular(25, 0., 1., name='var', label=r'lead bjet PNet btag', growth=False, underflow=False, overflow=False)
# hists_axes['nonRes_sublead_bjet_btagPNetB'] = hist.axis.Regular(25, 0., 1., name='var', label=r'sublead bjet PNet btag', growth=False, underflow=False, overflow=False)



In [7]:
hists = {}
for sample_name, sample in sample_dict.items():
        
    hists[sample_name] = {}
    for field in hists_axes.keys():
        temp_pq = sample[field][~ak.is_none(sample[field], axis=0)]
        htemp = hist.Hist(hists_axes[field])
        htemp.fill(var = temp_pq)
        hists[sample_name][field] = htemp

In [8]:
import os
import sys
import argparse
import matplotlib.colors as mcolors

simplified_labels = {
    "ggF HH": r"${HH \rightarrow b\bar{b}\gamma\gamma}$", 
    "GGJets": r"$\gamma\gamma+3j$",
    "GJet": r"$\gamma+j$, 20GeV<$p_T$",
    "ttH": r"${t\bar{t}H \rightarrow \gamma\gamma}$",
    "non-res": r"$\gamma\gamma+3j$ + $\gamma+j$, 20GeV<$p_T$",
    "bbH": r"${bbH \rightarrow \gamma\gamma}$",
    "ZH": r"${ZH \rightarrow q\bar{q}\gamma\gamma}$",
    "W-H": r"${W^{-}H \rightarrow q\bar{q}\gamma\gamma}$",
    "W+H": r"${W^{+}H \rightarrow q\bar{q}\gamma\gamma}$",
}

In [9]:
LUMINOSITIES = {
    '2022preEE': 7.9804, 
    '2022postEE': 26.6717,
    # Need to fill in lumis for other eras #
}
LUMINOSITIES['total_lumi'] = sum(LUMINOSITIES.values())

In [11]:
output_dirpath = os.path.join(CURRENT_DIRPATH, f"check_sample_plots/")

for field in hists_axes.keys():
    
    fig, ax = plt.subplots(figsize=(10, 8))
    hep.histplot(
        [hists['ggF HH'][field], hists['non-res'][field], hists['ttH'][field]], ax=ax, 
        label=[simplified_labels['ggF HH'], simplified_labels['non-res'], simplified_labels['ttH']], 
        linewidth=3, density=True
    )
    hep.cms.lumitext(f"{LUMINOSITIES['total_lumi']:.2f}" + r"fb$^{-1}$ (13.6 TeV)", ax=ax)
    hep.cms.text("Work in Progress", ax=ax)
    ax.set_yscale('linear')
    ax.legend()
    plt.savefig(f'{output_dirpath}/{field}_tth_nonres_signal.png')
    plt.close()