In [2]:
import dask_awkward as dak
import awkward as ak
from distributed import LocalCluster, Client, progress
import time
import numpy as np
import matplotlib.pyplot as plt
import json
import mplhep as hep
import glob
import pandas as pd

plt.style.use(hep.style.CMS)

client =  Client(n_workers=120,  threads_per_worker=2, processes=True, memory_limit='8 GiB') 


Perhaps you already have a cluster running?
Hosting the HTTP server on port 45419 instead


In [44]:
"""
This code prints ggH/VBF channel yields after applying category cuts
"""

def applyVBF_cutV1(events):
    btag_cut =ak.fill_none((events.nBtagLoose_nominal >= 2), value=False) | ak.fill_none((events.nBtagMedium_nominal >= 1), value=False)
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5) & (events.jet1_pt_nominal > 35) 
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    dimuon_mass = events.dimuon_mass
    VBF_filter = (
        vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    trues = ak.ones_like(dimuon_mass, dtype="bool")
    falses = ak.zeros_like(dimuon_mass, dtype="bool")
    events["vbf_filter"] = ak.where(VBF_filter, trues,falses)
    return events[VBF_filter]

def applyGGH_cutV1(events):
    btag_cut =ak.fill_none((events.nBtagLoose_nominal >= 2), value=False) | ak.fill_none((events.nBtagMedium_nominal >= 1), value=False)
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5) & (events.jet1_pt_nominal > 35) 
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    dimuon_mass = events.dimuon_mass
    ggH_filter = (
        ~vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    return events[ggH_filter]


def applyGGH_new(events):
    btagLoose_filter = ak.fill_none((events.nBtagLoose_nominal >= 2), value=False)
    btagMedium_filter = ak.fill_none((events.nBtagMedium_nominal >= 1), value=False) & ak.fill_none((events.njets_nominal >= 2), value=False)
    btag_cut = (btagLoose_filter | btagMedium_filter)
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5) & (events.jet1_pt_nominal > 35) 
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    dimuon_mass = events.dimuon_mass
    ggH_filter = (
        ~vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    print(f"events.jet1_pt_nominal : {events.jet1_pt_nominal }")
    print(f"events.jj_mass_nominal: {events.jj_mass_nominal}")
    print(f"events.jj_dEta_nominal: {events.jj_dEta_nominal}")
    print(f"events.nBtagLoose_nominal: {events.nBtagLoose_nominal}")
    print(f"events.nBtagMedium_nominal: {events.nBtagMedium_nominal}")
    print(f"events.njets_nominal: {events.njets_nominal}")
    print(f"not btag_cut: {~btag_cut}")
    print(f"ggH_filter: {ggH_filter}")
    return events[ggH_filter]

def applyGGH_cutflow(events):
    btagLoose_filter = ak.fill_none((events.nBtagLoose_nominal >= 2), value=False)
    btagMedium_filter = ak.fill_none((events.nBtagMedium_nominal >= 1), value=False) & ak.fill_none((events.njets_nominal >= 2), value=False)
    btag_cut = btagLoose_filter | btagMedium_filter
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5) & (events.jet1_pt_nominal > 35) 
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    dimuon_mass = events.dimuon_mass
    ggH_filter = (
        ~vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    return events[ggH_filter]

def applyGGH_noJetPt(events):
    btag_cut =ak.fill_none((events.nBtagLoose_nominal >= 2), value=False) | ak.fill_none((events.nBtagMedium_nominal >= 1), value=False)
    vbf_cut = (events.jj_mass_nominal > 400) & (events.jj_dEta_nominal > 2.5)
    vbf_cut = ak.fill_none(vbf_cut, value=False)
    dimuon_mass = events.dimuon_mass
    ggH_filter = (
        ~vbf_cut & 
        ~btag_cut # btag cut is for VH and ttH categories
    )
    return events[ggH_filter]

def veto_ttH_VH(events):
    btagLoose_filter = ak.fill_none((events.nBtagLoose_nominal >= 2), value=False)
    btagMedium_filter = ak.fill_none((events.nBtagMedium_nominal >= 1), value=False) & ak.fill_none((events.njets_nominal >= 2), value=False)
    btag_cut = btagLoose_filter | btagMedium_filter
    
    bool_filter = (
        ~btag_cut # btag cut is for VH and ttH categories
    )
    return events[bool_filter]


def veto_nJetGeq3(events):
    njet_filter = ak.fill_none((events.njets_nominal <= 2), value=True)
    bool_filter = (
        njet_filter # btag cut is for VH and ttH categories
    )
    return events[bool_filter]

def filterRegion(events, region="h-peak"):
    dimuon_mass = events.dimuon_mass
    if region =="h-peak":
        region = (dimuon_mass > 115.03) & (dimuon_mass < 135.03)
    elif region =="h-sidebands":
        region = ((dimuon_mass > 110) & (dimuon_mass < 115.03)) | ((dimuon_mass > 135.03) & (dimuon_mass < 150))
    elif region =="signal":
        region = (dimuon_mass >= 110) & (dimuon_mass <= 150.0)
    elif region =="z-peak":
        region = (dimuon_mass >= 70) & (dimuon_mass <= 110.0)
    elif region =="combined":
        region = (dimuon_mass >= 70) & (dimuon_mass <= 150.0)

    # mu1_pt = events.mu1_pt
    # mu1ptOfInterest = (mu1_pt > 75) & (mu1_pt < 150.0)
    # events = events[region&mu1ptOfInterest]
    events = events[region]
    return events

V1_fields_2compute = [
    "wgt_nominal",
    "nBtagLoose_nominal",
    "nBtagMedium_nominal",
    "mu1_pt",
    "mu2_pt",
    "mu1_eta",
    "mu2_eta",
    "mu1_phi",
    "mu2_phi",
    "dimuon_pt",
    "dimuon_eta",
    "dimuon_phi",
    "dimuon_mass",
    "jet1_phi_nominal",
    "jet1_pt_nominal",
    "jet2_pt_nominal",
    "jet2_phi_nominal",
    "jet1_eta_nominal",
    "jet2_eta_nominal",
    "jj_mass_nominal",
    "jj_dEta_nominal",
    # "region",
    "event",
    "njets_nominal",
]
 
#

In [45]:
# year = "2018"
# # year="*"
# # year = "2017"
# # label="V2_Jan29_JecOn_TrigMatchFixed_2016UlJetIdFix"

# # label="DYamcNLO_11Apr2025"
# # label="UpdatedDY_100_200_CrossSection_24Feb_jetpuidOff"
# # label="test_test"
# # label="DYMiNNLO_30Mar2025"
# # label="DYMiNNLO_11Apr2025"
# label="DYMiNNLO_HemVetoOff_17Apr2025"
# # label="DYMiNNLO_HemVetoOff_18Apr2025_singleMuTrigMatch"

# # year = "2022preEE"
# # label="Run3_nanoAODv12_TEST"
# load_path =f"/depot/cms/users/yun79/hmm/copperheadV1clean/{label}/stage1_output/{year}/f1_0"


# # # events_data = dak.from_parquet(f"{load_path}/data_D/*.parquet")
# # # events_data = dak.from_parquet(f"{load_path}/data_F/*.parquet")
# # # filelist = glob.glob(f"{load_path}/data_F")
# # # filelist = glob.glob(f"{load_path}/data_*")
# # filelist = glob.glob(f"{load_path}/data_*")
# filelist = glob.glob(f"{load_path}/data_D")

# # filelist = glob.glob(f"{load_path}/dy*")
# total_integral = 0
# for file in filelist:
#     print(f"file: {file}")
#     events_data = dak.from_parquet(f"{file}/*/*.parquet")

#     events_data = ak.zip({field: events_data[field] for field in V1_fields_2compute}).compute()
#     events_data = filterRegion(events_data, region="signal")
#     # events_data = applyGGH_cutV1(events_data)
#     events_data = applyGGH_new(events_data)
#     # events_data = veto_ttH_VH(events_data)
    
    
#     # events_data = applyVBF_cutV1(events_data)

#     # jj_dEta = events_data.jj_dEta_nominal
#     # print(events_data.jj_dEta_nominal)
#     # print(events_data.jet1_pt_nominal)
#     # print(events_data.jet2_pt_nominal)
#     # jj_dEta_filter = ak.fill_none((jj_dEta > 5), value=False)
#     # jj_dEtaSum = ak.sum(jj_dEta_filter)
#     # print(f"jj_dEta_filter: {jj_dEta_filter}")
#     # print(f"jj_dEtaSum: {jj_dEtaSum}")
#     # print(f"jj_dEta: {jj_dEta[jj_dEta_filter]}")
#     # jj_dEta_filter_yield = ak.sum(events_data.wgt_nominal[jj_dEta_filter])
#     # print(events_data.wgt_nominal[jj_dEta_filter])
#     # print(f"jj_dEta_filter_yield: {jj_dEta_filter_yield}")
#     # # events_data = applyVBF_cutV1(events_data)
#     # # events_data = veto_ttH_VH(events_data)
#     # print(f"ak.any(ak.is_none(events_data.wgt_nominal)): {ak.any(ak.is_none(events_data.wgt_nominal))}")
    
#     # data_yield = ak.sum(events_data.wgt_nominal, axis=0)
#     wgts = ak.fill_none(events_data.wgt_nominal, value=1.0)
#     data_yield = ak.sum(wgts)
#     df = pd.DataFrame({field: ak.fill_none(events_data[field], value=-999.9) for field in events_data.fields})
#     print(f"data_yield for {file}: {data_yield}")
#     total_integral += data_yield
# total_integral


In [46]:
year = "2018"
# year = "*"
# year = "2016"
# label="V2_Jan29_JecOn_TrigMatchFixed_2016UlJetIdFix"
# label="test_test"
# label="DYMiNNLO_30Mar2025"
# label="rereco_yun_Dec05_btagSystFixed_JesJerUncOn"
# label="test_cutflow"
# label="test_cutflow_applyAllMuCorrection_17Apr2025"
# label="test_cutflow_applyEcalGapVeto_17Apr2025"
label="test"


# year = "2022preEE"
# label="Run3_nanoAODv12_TEST"
load_path =f"/depot/cms/users/yun79/hmm/copperheadV1clean/{label}/stage1_output/{year}"
# load_path =f"/depot/cms/users/yun79/hmm/copperheadV1clean/{label}/stage1_output/*"


# # events_data = dak.from_parquet(f"{load_path}/data_D/*.parquet")
# # events_data = dak.from_parquet(f"{load_path}/data_F/*.parquet")
# cle# filelist = glob.glob(f"{load_path}/data_F")
# # filelist = glob.glob(f"{load_path}/data_*")
# filelist = glob.glob(f"{load_path}/data_*")
filelist = glob.glob(f"{load_path}/data_B")

# filelist = glob.glob(f"{load_path}/dy*")
total_integral = 0
for file in filelist:
    print(f"file: {file}")
    events_data = dak.from_parquet(f"{file}/*.parquet")

    events_data = ak.zip({field: events_data[field] for field in V1_fields_2compute}).compute()
    events_data = filterRegion(events_data, region="signal")
    # events_data = applyGGH_cutV1(events_data)
    events_data = applyGGH_new(events_data)
    # events_data = veto_ttH_VH(events_data)
    # events_data = veto_nJetGeq3(events_data)
    
    # events_data = applyGGH_noJetPt(events_data)
    # events_data = applyVBF_cutV1(events_data)
    # events_data = veto_ttH_VH(events_data)
    
    
    data_yield = ak.num(events_data.dimuon_mass, axis=0)
    # wgts = ak.fill_none(events_data.wgt_nominal, value=1.0)
    # data_yield = ak.sum(wgts)
    df = pd.DataFrame({field: ak.fill_none(events_data[field], value=-999.9) for field in events_data.fields})
    print(f"data_yield for {file}: {data_yield}")
    total_integral += data_yield
total_integral


file: /depot/cms/users/yun79/hmm/copperheadV1clean/test/stage1_output/2018/data_B
events.jet1_pt_nominal : [74.6, 35.3, 25.6, None, 40.4, None, 32.5, 41.2, None, None, None]
events.jj_mass_nominal: [126, None, None, None, None, None, None, None, None, None, None]
events.jj_dEta_nominal: [1.44, None, None, None, None, None, None, None, None, None, None]
events.nBtagLoose_nominal: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
events.nBtagMedium_nominal: [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
events.njets_nominal: [2, 1, 1, None, 1, None, 1, 1, None, None, None]
not btag_cut: [False, True, True, True, True, True, True, True, True, True, True]
ggH_filter: [False, True, True, True, True, True, True, True, True, True, True]
data_yield for /depot/cms/users/yun79/hmm/copperheadV1clean/test/stage1_output/2018/data_B: 10


np.int64(10)

In [26]:
events_data.jet1_pt_nominal
# events_data.njets_nominal

In [18]:
print(events_data.njets_nominal[:50] <=2)
print(ak.fill_none(events_data.njets_nominal[:50] <=2, value=True))

[True, True, None, None, None, True, ..., None, None, True, True, None, True]
[True, True, True, True, True, True, ..., True, True, True, True, True, True]


In [4]:
year = "2017"
# label="V2_Jan29_JecOn_TrigMatchFixed_2016UlJetIdFix"
label="DYMiNNLO_30Mar2025"
# label="test_test"
# year = "2022preEE"
# label="Run3_nanoAODv12_TEST"
load_path =f"/depot/cms/users/yun79/hmm/copperheadV1clean/{label}/stage1_output/{year}/f1_0"

# filelist = glob.glob(f"{load_path}/dy*")
filelist = glob.glob(f"{load_path}/dy_M-50_MiNNLO")

total_integral = 0
for file in filelist:
    print(f"file: {file}")
    events_data = dak.from_parquet(f"{file}/*/*.parquet")
    # events_data = filterRegion(events_data, region="signal")
    events_data = filterRegion(events_data, region="z-peak")
    wgt = events_data.wgt_nominal.compute()
    # print(f"wgt sum: {wgt}")
    print(f"wgt sum: {ak.sum(wgt)}")
    comp = ak.ones_like(wgt)
    for field in events_data.fields:
        if "separate" in field:
            value = events_data[field].compute()
            print(f"{field} arr: {value}")
            comp = comp*value
            # print(f"{field} curent wgt: {comp}")
    # diff = comp- wgt
    # print(f"comp : {comp}")
    # print(f"wgt : {wgt}")
    # print(f"sum wgt : {ak.sum(wgt)}")
    # print(f"difference : {diff}")
            # print(f"{field} max val: {ak.max(value)}")

file: /depot/cms/users/yun79/hmm/copperheadV1clean/DYMiNNLO_30Mar2025/stage1_output/2017/f1_0/dy_M-50_MiNNLO
wgt sum: 26022170.15837729
separate_wgt_genWeight arr: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..., 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
separate_wgt_genWeight curent wgt: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..., 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1]
separate_wgt_genWeight_normalization arr: [7.46e-09, 7.46e-09, 7.46e-09, 7.46e-09, ..., 7.46e-09, 7.46e-09, 7.46e-09]
separate_wgt_genWeight_normalization curent wgt: [7.46e-09, 7.46e-09, 7.46e-09, 7.46e-09, ..., 7.46e-09, 7.46e-09, 7.46e-09]
separate_wgt_xsec arr: [1.98e+03, 1.98e+03, 1.98e+03, 1.98e+03, ..., 1.98e+03, 1.98e+03, 1.98e+03]
separate_wgt_xsec curent wgt: [1.47e-05, 1.47e-05, 1.47e-05, 1.47e-05, ..., 1.47e-05, 1.47e-05, 1.47e-05]
separate_wgt_lumi arr: [4.15e+04, 4.15e+04, 4.15e+04, 4.15e+04, ..., 4.15e+04, 4.15e+04, 4.15e+04]
separate_wgt_lumi curent wgt: [0.611, 0.611, 0.611, 0.611, 0.611, ..., 0.611, 0.611, 0.611, 0.61

In [7]:
2.36e+03 * 228348879
41,158,111.73464724
191,709,872

538903354440.0

In [None]:
2.5292969635125805e+20 

In [29]:
wgt_nominal = events_data["wgt_nominal"].compute()
ak.sum(wgt_nominal)

13492336.522684855

In [30]:
test = wgt_nominal/ events_data["separate_wgt_qgl_wgt"].compute()
ak.sum(test)

13470176.805324513

In [5]:
gen_wgt = events_data["separate_wgt_genWeight"].compute()
ak.sum(gen_wgt)

160148244540.72314

In [16]:
ak.sum(gen_wgt)*7.1e-12

0.13252662922514083

In [13]:
events_data["separate_wgt_genWeight_normalization"].compute()

In [18]:
ak.sum(events_data["wgt_nominal"].compute())

13492336.522684855