In [1]:
import hist
import awkward as ak

In [2]:
import matplotlib.pyplot as plt
import mplhep as hep
import numpy as np
plt.style.use(hep.style.CMS)
plt.rcParams.update({'font.size': 20})

In [3]:
import glob
import pyarrow.parquet as pq


def compile_data(data_era, luminosities, cross_sections):
    print(20*'=')
    print(data_era)
    print(20*'=')
    
    sumGenWeights = {}
    
    bare_yields = {}
    preSelection_yields = {}
    massWindow_yields = {}
    massWindow_bScore_yields = {}
    
    for sample_name in cross_sections.keys():
        print(sample_name)
        
        # Obtain sum of genWeights before pre-selection
        sumGenWeights[sample_name] = sum(
            float(pq.read_table(file).schema.metadata[b'sum_genw_presel']) for file in glob.glob(
                f"./v1{'' if data_era == 'postEE' else f'_{data_era}'}/{sample_name}/nominal/*.parquet"
            )
        )

        # Compute bare process yields
        bare_yields[sample_name] = cross_sections[sample_name] * luminosities[data_era]

        # Load process parquet for more in-depth yield calculations
        sample_pq = ak.from_parquet(
            glob.glob(f"./v1{'' if data_era == 'postEE' else f'_{data_era}'}/{sample_name}/nominal/*.parquet")
        )

        # Compute pre-selection process yields
        preSelection_yields[sample_name] = sum(sample_pq['genWeight']) * bare_yields[sample_name] / sumGenWeights[sample_name]
        
        # Compute mass window process yields
        genWeight_masked_massWindow = ak.where(
            # (ak.where(sample_pq['dijet_mass'] > 100, True, False) & ak.where(sample_pq['dijet_mass'] < 150, True, False)) &
            # (ak.where(sample_pq['mass'] > 100, True, False) & ak.where(sample_pq['mass'] < 150, True, False)),
            (ak.where(sample_pq['dijet_mass'] > 90, True, False) & ak.where(sample_pq['dijet_mass'] < 160, True, False)) &
            (ak.where(sample_pq['mass'] > 90, True, False) & ak.where(sample_pq['mass'] < 160, True, False)),
            sample_pq['genWeight'], 0
        )
        massWindow_yields[sample_name] = sum(genWeight_masked_massWindow) * bare_yields[sample_name] / sumGenWeights[sample_name]

        # Compute mass window + b-tag score process yields
        # WPs for single b-tagging PN in 2022 and 2022EE 
        single_b_WPs = {
            'preEE': {'L': 0.047, 'M': 0.245, 'T': 0.6734, 'XT': 0.7862, 'XXT': 0.961},
            'postEE': {'L': 0.0499, 'M': 0.2605, 'T': 0.6915, 'XT': 0.8033, 'XXT': 0.9664}
        }
        genWeight_masked_massWindow_bScore = ak.where(
            # (ak.where(sample_pq['dijet_mass'] > 100, True, False) & ak.where(sample_pq['dijet_mass'] < 150, True, False)) &
            # (ak.where(sample_pq['mass'] > 100, True, False) & ak.where(sample_pq['mass'] < 150, True, False)) & 
            (ak.where(sample_pq['dijet_mass'] > 90, True, False) & ak.where(sample_pq['dijet_mass'] < 160, True, False)) &
            (ak.where(sample_pq['mass'] > 90, True, False) & ak.where(sample_pq['mass'] < 160, True, False)) &
            (ak.where(sample_pq['lead_bjet_btagPNetB'] > single_b_WPs[data_era]['L'], True, False) & ak.where(sample_pq['sublead_bjet_btagPNetB'] > single_b_WPs[data_era]['L'], True, False)),
            sample_pq['genWeight'], 0
        )
        massWindow_bScore_yields[sample_name] = sum(genWeight_masked_massWindow_bScore) * bare_yields[sample_name] / sumGenWeights[sample_name]
    
    return sumGenWeights, bare_yields, preSelection_yields, massWindow_yields, massWindow_bScore_yields

# MC Era: total era luminosity [fb^-1] #
luminosities = {'preEE': 7.874, 'postEE': 26.337}

# Name: cross section [fb] @ sqrrt{s}=13.6 TeV & m_H=125.09 GeV #
#   -> Do we not need to care about other HH processes? https://arxiv.org/pdf/1910.00012.pdf
cross_sections = {
    # https://twiki.cern.ch/twiki/bin/view/LHCPhysics/LHCHWGHH?redirectedfrom=LHCPhysics.LHCHXSWGHH#Current_recommendations_for_HH_c
    'GluGluToHH': 34.43*0.0026,
    # https://xsdb-temp.app.cern.ch/xsdb/?columns=37748736&currentPage=0&pageSize=10&searchQuery=DAS%3DGG-Box-3Jets_MGG-80_13p6TeV_sherpa
    'GGJets': 88.75, 
    # https://xsdb-temp.app.cern.ch/xsdb/?columns=37748736&currentPage=0&pageSize=10&searchQuery=DAS%3DGJet_PT-20to40_DoubleEMEnriched_MGG-80_TuneCP5_13p6TeV_pythia8
    'GJetPt20To40': 242.5, 
    # https://xsdb-temp.app.cern.ch/xsdb/?columns=37748736&currentPage=0&pageSize=10&searchQuery=DAS%3DGJet_PT-40_DoubleEMEnriched_MGG-80_TuneCP5_13p6TeV_pythia8
    'GJetPt40': 919.1, 
    # https://twiki.cern.ch/twiki/bin/view/LHCPhysics/CERNYellowReportPageAt13TeV#gluon_gluon_Fusion_Process
    'GluGluHToGG': 48520*0.00228,
    # https://twiki.cern.ch/twiki/bin/view/LHCPhysics/CERNYellowReportPageAt13TeV#ttH_Process
    'ttHToGG': 506.5*0.00228,
    # https://twiki.cern.ch/twiki/bin/view/LHCPhysics/CERNYellowReportPageAt13TeV#VBF_Process
    'VBFHToGG': 3779*0.00228,
    # https://twiki.cern.ch/twiki/bin/view/LHCPhysics/CERNYellowReportPageAt13TeV#WH_Process + https://twiki.cern.ch/twiki/bin/view/LHCPhysics/CERNYellowReportPageAt13TeV#ZH_Process
    'VHToGG': (1369 + 882.4)*0.00228,
}

# Load preEE samples
preEE_sumGenWeights, preEE_bare_yields, preEE_preSelection_yields, preEE_massWindow_yields, preEE_massWindow_bScore_yields = compile_data(
    'preEE', luminosities, cross_sections)
# Load postEE samples
postEE_sumGenWeights, postEE_bare_yields, postEE_preSelection_yields, postEE_massWindow_yields, postEE_massWindow_bScore_yields = compile_data(
    'postEE', luminosities, cross_sections)


preEE
GluGluToHH
GGJets
GJetPt20To40
GJetPt40
GluGluHToGG
ttHToGG
VBFHToGG
VHToGG
postEE
GluGluToHH
GGJets
GJetPt20To40
GJetPt40
GluGluHToGG
ttHToGG
VBFHToGG
VHToGG


In [4]:
print(20*'=')
print('preEE bare yields')
print(20*'=')
for sample_name, yield_ in preEE_bare_yields.items():
    print(f'{sample_name}: {yield_}')

print(20*'=')
print('postEE bare yields')
print(20*'=')
for sample_name, yield_ in postEE_bare_yields.items():
    print(f'{sample_name}: {yield_}')

print(20*'=')
print(20*'=')

print(20*'=')
print('preEE yields with pre-selection')
print(20*'=')
for sample_name, yield_ in preEE_preSelection_yields.items():
    print(f'{sample_name}: {yield_}')

print(20*'=')
print('postEE yields with pre-selection')
print(20*'=')
for sample_name, yield_ in postEE_preSelection_yields.items():
    print(f'{sample_name}: {yield_}')

print(20*'=')
print(20*'=')

print(20*'=')
print('preEE yields with pre-selection and ($100<M_{bb}<150$ and $100<M_{gg}<150$)')
print(20*'=')
for sample_name, yield_ in preEE_massWindow_yields.items():
    print(f'{sample_name}: {yield_}')

print(20*'=')
print('postEE yields with pre-selection and ($100<M_{bb}<150$ and $100<M_{gg}<150$)')
print(20*'=')
for sample_name, yield_ in postEE_massWindow_yields.items():
    print(f'{sample_name}: {yield_}')

print(20*'=')
print(20*'=')

print(20*'=')
print('preEE yields with pre-selection and ($100<M_{bb}<150$ and $100<M_{gg}<150$) and ($lead_PN_bTag > loose WP = 0.047$ $sublead_PN_bTag > loose WP$)')
print(20*'=')
for sample_name, yield_ in preEE_massWindow_bScore_yields.items():
    print(f'{sample_name}: {yield_}')

print(20*'=')
print('postEE yields with pre-selection and ($100<M_{bb}<150$ and $100<M_{gg}<150$) and ($lead_PN_bTag > loose WP = 0.0499$ $sublead_PN_bTag > loose WP$)')
print(20*'=')
for sample_name, yield_ in postEE_massWindow_bScore_yields.items():
    print(f'{sample_name}: {yield_}')

preEE bare yields
GluGluToHH: 0.704864732
GGJets: 698.8175
GJetPt20To40: 1909.445
GJetPt40: 7236.9934
GluGluHToGG: 871.0659743999998
ttHToGG: 9.09305268
VBFHToGG: 67.84332888
VHToGG: 40.418753808
postEE bare yields
GluGluToHH: 2.357635566
GGJets: 2337.40875
GJetPt20To40: 6386.7225
GJetPt40: 24206.3367
GluGluHToGG: 2913.5464272
ttHToGG: 30.414494339999997
VBFHToGG: 226.92275244
VHToGG: 135.192877704
preEE yields with pre-selection
GluGluToHH: 0.24143747264757512
GGJets: 13.69369763824784
GJetPt20To40: 0.6499902204840362
GJetPt40: 15.297503615166367
GluGluHToGG: 44.84161344243253
ttHToGG: 3.642533808236749
VBFHToGG: 3.1788043248402027
VHToGG: 6.69920888132631
postEE yields with pre-selection
GluGluToHH: 0.7988853527036979
GGJets: 46.19867405993185
GJetPt20To40: 2.218918876358823
GJetPt40: 51.14279384938104
GluGluHToGG: 149.34260577742262
ttHToGG: 11.971635099367111
VBFHToGG: 10.753879006503004
VHToGG: 21.841069437694742
preEE yields with pre-selection and ($100<M_{bb}<150$ and $100<M_{gg

In [5]:
print(20*'=')
print('bare yields')
print(20*'=')
for sample_name in preEE_bare_yields.keys():
    if sample_name == 'GJetPt20To40':
        print(f"{'GJetPt20'}: {preEE_bare_yields['GJetPt40'] + postEE_bare_yields['GJetPt40'] + preEE_bare_yields[sample_name] + postEE_bare_yields[sample_name]}")
    elif sample_name == 'GJetPt40':
        continue
    else:
        print(f'{sample_name}: {preEE_bare_yields[sample_name] + postEE_bare_yields[sample_name]}')



print(20*'=')
print('yields with pre-selection')
print(20*'=')
for sample_name in preEE_preSelection_yields.keys():
    if sample_name == 'GJetPt20To40':
        print(f"{'GJetPt20'}: {preEE_preSelection_yields['GJetPt40'] + postEE_preSelection_yields['GJetPt40'] + preEE_preSelection_yields[sample_name] + postEE_preSelection_yields[sample_name]}")
    elif sample_name == 'GJetPt40':
        continue
    else:
        print(f'{sample_name}: {preEE_preSelection_yields[sample_name] + postEE_preSelection_yields[sample_name]}')



print(20*'=')
print('yields with pre-selection and ($100<M_{bb}<150$ and $100<M_{gg}<150$)')
print(20*'=')
for sample_name in preEE_massWindow_yields.keys():
    if sample_name == 'GJetPt20To40':
        print(f"{'GJetPt20'}: {preEE_massWindow_yields['GJetPt40'] + postEE_massWindow_yields['GJetPt40'] + preEE_massWindow_yields[sample_name] + postEE_massWindow_yields[sample_name]}")
    elif sample_name == 'GJetPt40':
        continue
    else:
        print(f'{sample_name}: {preEE_massWindow_yields[sample_name] + postEE_massWindow_yields[sample_name]}')



print(20*'=')
print('yields with pre-selection and ($100<M_{bb}<150$ and $100<M_{gg}<150$) and ($lead_PN_bTag > loose WP = 0.047$ $sublead_PN_bTag > loose WP$)')
print(20*'=')
for sample_name in preEE_massWindow_bScore_yields.keys():
    if sample_name == 'GJetPt20To40':
        print(f"{'GJetPt20'}: {preEE_massWindow_bScore_yields['GJetPt40'] + postEE_massWindow_bScore_yields['GJetPt40'] + preEE_massWindow_bScore_yields[sample_name] + postEE_massWindow_bScore_yields[sample_name]}")
    elif sample_name == 'GJetPt40':
        continue
    else:
        print(f'{sample_name}: {preEE_massWindow_bScore_yields[sample_name] + postEE_massWindow_bScore_yields[sample_name]}')



bare yields
GluGluToHH: 3.062500298
GGJets: 3036.22625
GJetPt20: 39739.4976
GluGluHToGG: 3784.6124016
ttHToGG: 39.50754702
VBFHToGG: 294.76608132
VHToGG: 175.611631512
yields with pre-selection
GluGluToHH: 1.040322825351273
GGJets: 59.89237169817969
GJetPt20: 69.30920656139028
GluGluHToGG: 194.18421921985515
ttHToGG: 15.61416890760386
VBFHToGG: 13.932683331343206
VHToGG: 28.540278319021052
yields with pre-selection and ($100<M_{bb}<150$ and $100<M_{gg}<150$)
GluGluToHH: 0.7785170331626169
GGJets: 24.724545632086294
GJetPt20: 29.585459539739357
GluGluHToGG: 111.72756793943532
ttHToGG: 9.955937824763021
VBFHToGG: 7.9817314364873635
VHToGG: 14.163729794505706
yields with pre-selection and ($100<M_{bb}<150$ and $100<M_{gg}<150$) and ($lead_PN_bTag > loose WP = 0.047$ $sublead_PN_bTag > loose WP$)
GluGluToHH: 0.5670077420471464
GGJets: 1.4649031368752037
GJetPt20: 1.5065709552818025
GluGluHToGG: 5.109998291509605
ttHToGG: 5.785561854568329
VBFHToGG: 0.45183796682839134
VHToGG: 1.63998215413