This notebook outputs the cutflow data for each sample as a YAML file in somewhat the same format as on HEPData. There will obviously need to be some iterations on this, but it is a good starting point.

In [263]:
import uproot
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Image
import yaml


In [264]:
hist_files_all = glob.glob('/data/hnl/histograms/v9p3_histograms/histograms_fullrun2_*')
hist_files_10_10 = glob.glob('/data/hnl/histograms/v9p3_histograms/histograms_fullrun2_10G_10mm_*')
cutflow_types = [
    'CutFlow',
    'CutFlow_LNC_raw_counts',
    'CutFlow_LNV_raw_counts',
    'CutFlow_weighted_one_hnl_majorana',
    'CutFlow_weighted_one_hnl_majorana_LNV_only',
    'CutFlow_weighted_one_hnl_majorana_LNC_only',
    'CutFlow_weighted_one_hnl_dirac',
    'CutFlow_weighted_majorana_limit_ih',
    'CutFlow_weighted_majorana_limit_nh',
    'CutFlow_weighted_dirac_limit_ih',
    'CutFlow_weighted_dirac_limit_nh'
    ]
# cutflow_bins_mu_mumu = ['all', 'trigger', '4-filter', 'medium prompt muon', 'plep overlap', 'DV', 'fiducial', '2-track DV', 'OS DV', 'mumu DV', 'cosmic veto', 'lepton pt', '2-medium-lepton DV', 'trig match', 'm_lll', 'B-hadron veto', 'Z mass veto', 'm_HNL', ]
# cutflow_bins_e_emu = ['all', 'trigger', '4-filter', 'medium prompt electron', 'plep overlap', 'DV', 'fiducial', '2-track DV', 'OS DV', 'emu DV', 'cosmic veto', 'lepton pt', 'medium-veryveryloose-lepton DV', 'trig match', 'm_lll', 'B-hadron veto', 'Z mass veto', 'm_HNL', ]
# cutflow_bins_mu_mue = ['all', 'trigger', '4-filter', 'medium prompt muon', 'plep overlap', 'DV', 'fiducial', '2-track DV', 'OS DV', 'emu DV', 'cosmic veto', 'lepton pt', 'medium-veryveryloose-lepton DV', 'trig match', 'm_lll', 'B-hadron veto', 'Z mass veto', 'm_HNL', ]
# cutflow_bins_e_ee = ['all', 'trigger', '4-filter', 'medium prompt electron', 'plep overlap', 'DV', 'fiducial', '2-track DV', 'OS DV', 'emu DV', 'cosmic veto', 'lepton pt', 'mat. veto', '2-veryveryloose-lepton DV', 'trig match', 'm_lll', 'B-hadron veto', 'Z mass veto', 'm_HNL', ]
# cutflow_bins_mu_ee = ['all', 'trigger', '4-filter', 'medium prompt muon', 'plep overlap', 'DV', 'fiducial', '2-track DV', 'OS DV', 'emu DV', 'cosmic veto', 'lepton pt', 'mat. veto', '2-veryveryloose-lepton DV', 'trig match', 'm_lll', 'B-hadron veto', 'Z mass veto', 'm_HNL', ]
# cutflow_bins_e_mumu = ['all', 'trigger', '4-filter', 'medium prompt electron', 'plep overlap', 'DV', 'fiducial', '2-track DV', 'OS DV', 'mumu DV', 'cosmic veto', 'lepton pt', '2-medium-lepton DV', 'trig match', 'm_lll', 'B-hadron veto', 'Z mass veto', 'm_HNL', ]

In [265]:
samples = {}
# pick all or the benchmark 10G 10mm
# for filename in hist_files_all:
for filename in hist_files_10_10:
    sample = filename.split('histograms_fullrun2_')[1].replace('.root','') # extract sample name from filename
    cutflow_dict = {}
    cutflow_dir = uproot.open(filename)['nominal']['VSI_LeptonsMod']['CutFlow']
    for cutflow_type in cutflow_types:
        cutflow_dict[cutflow_type] = cutflow_dir[cutflow_type].numpy()[0].tolist()
        if cutflow_type == "CutFlow":
            cutflow_dict['stat_err'] = np.sqrt(cutflow_dir[cutflow_type].numpy()[0]).tolist()
            cutflow_dict['labels'] = cutflow_dir[cutflow_type].xlabels
            cutflow_dict['labels'].insert(-1, 'empty') # annoying feature of cutflows, second to last bin is empty            
    samples[sample] = cutflow_dict


In [266]:
print(yaml.dump(samples['10G_10mm_eeu'], sort_keys=False))

CutFlow:
- 50000.0
- 15037.0
- 15037.0
- 9355.0
- 7490.0
- 7450.0
- 2423.0
- 2243.0
- 2148.0
- 2121.0
- 2071.0
- 2071.0
- 1684.0
- 1519.0
- 1494.0
- 1449.0
- 818.0
- 817.0
- 817.0
- 0.0
- 736.0
stat_err:
- 223.60679774997897
- 122.62544597268545
- 122.62544597268545
- 96.7212489580237
- 86.54478609367523
- 86.31338250816034
- 49.22397789695587
- 47.36032094485847
- 46.34652090502587
- 46.05431575867782
- 45.50824101193101
- 45.50824101193101
- 41.036569057366385
- 38.97435053981015
- 38.65229618017538
- 38.06573262134856
- 28.600699292150182
- 28.583211855912904
- 28.583211855912904
- 0.0
- 27.129319932501073
labels:
- all
- trigger
- PV
- 4-filter
- medium prompt electron
- plep overlap
- DV
- fiducial
- 2-track DV
- OS DV
- emu DV
- cosmic veto
- lepton pt
- medium-veryveryloose-lepton DV
- trig. match
- m_{lll}
- B-hadron veto
- Z mass veto
- m_{HNL}
- empty
- truth matched
CutFlow_LNC_raw_counts:
- 25187.0
- 7558.0
- 7558.0
- 4693.0
- 3789.0
- 3771.0
- 1208.0
- 1123.0
- 1082.0
- 10

In [267]:
dataframes = {}
for k in samples.keys():
    dataframes[k] = pd.DataFrame()
    for x in samples[k].keys():
        dataframes[k][x] = samples[k][x]


for k, df in dataframes.items():
    # df['labels'] = 
    if k not in ['10G_10mm_uee', '10G_10mm_eee',]:
        # print(df[:13].labels)
        # print(df[13:].labels)
        dummy_matveto = pd.DataFrame(columns=df.columns)
        dummy_matveto.loc[0] = [None]*len(df.columns)
        df = pd.concat([df[:13], dummy_matveto ,df[13:]]).reset_index(drop=True)
        # print()
    df['labels'] = ['All', 'Trigger', 'PV', '4-filter', 'Medium prompt lep.', 'P.lep. overlap', 'DV', 'Fiducial', '2-track DV', 'OS DV', 'Lep. type DV', 'Cosmic veto', 'Lepton pt', 'Mat. veto', 'Lep. qual. DV', 'Trig. match', 'm_{lll}', 'B-hadron veto', 'Z mass veto', 'm_{HNL}', 'empty', 'truth matched']
    dataframes[k] = df


In [268]:
model = 'CutFlow_weighted_majorana_limit_ih'
model = 'CutFlow_weighted_one_hnl_dirac'
    # 'CutFlow',
    # 'CutFlow_LNC_raw_counts',
    # 'CutFlow_LNV_raw_counts',
    # 'CutFlow_weighted_one_hnl_majorana',
    # 'CutFlow_weighted_one_hnl_majorana_LNV_only',
    # 'CutFlow_weighted_one_hnl_majorana_LNC_only',
    # '
    # ',
    # 'CutFlow_weighted_majorana_limit_ih',
    # 'CutFlow_weighted_majorana_limit_nh',
    # 'CutFlow_weighted_dirac_limit_ih',
    # 'CutFlow_weighted_dirac_limit_nh'

replacements = {
    'm\_\{HNL\}': '$m_{\mathrm{HNL}}$', 
    'm\_\{lll\}': '$m_{\mathrm{lll}}$', 
    'veryveryloose': 'VVL', 
    'truth matched': '\hline truth matched',
    'nan': '--',
    'uuu': '$\mu\mu\mu$',
    }
# for sample, df in dataframes.items():

prompt_el_df = pd.DataFrame()
prompt_el_df['Selection'] = dataframes['10G_10mm_eee']['labels']
prompt_el_df['eee'] = dataframes['10G_10mm_eee'][model]  
prompt_el_df['eeu'] = dataframes['10G_10mm_eeu'][model]
prompt_el_df['euu'] = dataframes['10G_10mm_euu'][model]

prompt_mu_df = pd.DataFrame()
prompt_mu_df['Selection'] = dataframes['10G_10mm_uuu']['labels']
prompt_mu_df['uuu'] = dataframes['10G_10mm_uuu'][model]  
prompt_mu_df['uue'] = dataframes['10G_10mm_uue'][model]
prompt_mu_df['uee'] = dataframes['10G_10mm_uee'][model]

df = pd.DataFrame()
df['Selection'] = dataframes['10G_10mm_uuu']['labels']
df['uuu'] = dataframes['10G_10mm_uuu'][model]  
df['uue'] = dataframes['10G_10mm_uue'][model]
df['uee'] = dataframes['10G_10mm_uee'][model]
df['eee'] = dataframes['10G_10mm_eee'][model]  
df['eeu'] = dataframes['10G_10mm_eeu'][model]
df['euu'] = dataframes['10G_10mm_euu'][model]

latex_table = df[:20].to_latex(index=False, float_format="{:0.3f}".format, na_rep='--')
for i, j in replacements.items():
    latex_table = latex_table.replace(i, j)

print(f'''
\\begin{{table}}[!htbp]
\centering % center the table
\label{{tab:cutflow_prompt_muon}}

{latex_table}

\caption{{Cutflow for prompt muon samples. 
Weighted number of expected events based on Majorana limit inverted-hierarchy model.}}
\end{{table}}
'''
)

# prompt_el_latex_table = prompt_el_df[:20].to_latex(index=False, float_format="{:0.2f}".format, na_rep='--')
# for i, j in replacements.items():
#     prompt_el_latex_table = prompt_el_latex_table.replace(i, j)

# print(f'''
# \\begin{{table}}[!htbp]
# \centering % center the table
# {prompt_el_latex_table}

# \caption{{Cutflow for prompt electron samples. 
# Weighted number of expected events based on Majorana limit inverted-hierarchy model.}}
# \label{{tab:cutflow_prompt_electron}}

# \end{{table}}
# '''
# )


\begin{table}[!htbp]
\centering % center the table
\label{tab:cutflow_prompt_muon}

\begin{tabular}{lrrrrrr}
\toprule
          Selection &    $\mu\mu\mu$ &     uue &   uee &    eee &     eeu &   euu \\
\midrule
                All & 59.927 & 175.111 & 2.781 & 61.300 & 179.616 & 2.813 \\
            Trigger & 22.980 &  66.681 & 1.005 & 18.052 &  53.910 & 0.887 \\
                 PV & 22.980 &  66.681 & 1.005 & 18.052 &  53.910 & 0.887 \\
           4-filter & 14.523 &  40.744 & 0.556 &  9.948 &  30.920 & 0.565 \\
 Medium prompt lep. & 13.674 &  38.126 & 0.519 &  7.953 &  26.056 & 0.464 \\
     P.lep. overlap & 13.658 &  38.069 & 0.518 &  7.924 &  25.959 & 0.462 \\
                 DV &  4.561 &   9.762 & 0.127 &  2.032 &   8.133 & 0.167 \\
           Fiducial &  4.279 &   9.145 & 0.121 &  1.927 &   7.382 & 0.153 \\
         2-track DV &  4.156 &   8.710 & 0.115 &  1.791 &   7.083 & 0.148 \\
              OS DV &  4.123 &   8.605 & 0.113 &  1.780 &   7.018 & 0.147 \\
       Lep. type 

In [269]:
samples = {}
hist_files_data = glob.glob('/data/hnl/v8_histograms/jul29_unblind_SR_v8p0_histograms/fullrun2_histograms_mc_*.root')
# pick all or the benchmark 10G 10mm
# for filename in hist_files_all:
for filename in hist_files_data:
    cutflow_dict = {}
    sample = filename.split('fullrun2_histograms_mc_')[1].replace('.root','') # extract sample name from filename
    cutflow_dir = uproot.open(filename)['VSI_LeptonsMod']['CutFlow']
    for cutflow_type in ['CutFlow']:
        cutflow_dict[cutflow_type] = cutflow_dir[cutflow_type].numpy()[0].tolist()
        if cutflow_type == "CutFlow":
            # cutflow_dict['stat_err'] = np.sqrt(cutflow_dir[cutflow_type].numpy()[0]).tolist()
            cutflow_dict['labels'] = cutflow_dir[cutflow_type].xlabels
            cutflow_dict['labels'].append('empty') # annoying feature of cutflows, second to last bin is empty 
            cutflow_dict['labels'].append('truth matched') # annoying feature of cutflows, second to last bin is empty 
    samples[sample] = cutflow_dict

dataframes = {}
for k in samples.keys():
    dataframes[k] = pd.DataFrame()
    for x in samples[k].keys():
        dataframes[k][x] = samples[k][x]
    # break

for k, df in dataframes.items():
    # df['labels'] = 
    if k not in ['uee', 'eee',]:
        # print(df[:13].labels)
        # print(df[13:].labels)
        dummy_matveto = pd.DataFrame(columns=df.columns)
        dummy_matveto.loc[0] = [None]*len(df.columns)
        df = pd.concat([df[:13], dummy_matveto ,df[13:]]).reset_index(drop=True)
        # print()
    df['labels'] = ['All', 'Trigger', 'PV', '4-filter', 'Medium prompt lep.', 'P.lep. overlap', 'DV', 'Fiducial', '2-track DV', 'OS DV', 'Lep. type DV', 'Cosmic veto', 'Lepton pt', 'Mat. veto', 'Lep. qual. DV', 'Trig. match', 'm_{lll}', 'B-hadron veto', 'Z mass veto', 'm_{HNL}', 'empty', 'truth matched']
    dataframes[k] = df

dataframes
df = pd.DataFrame()
df['Selection'] = dataframes['uuu']['labels']
df['uuu'] = dataframes['uuu']['CutFlow']
df['uue'] = dataframes['uue']['CutFlow']
df['uee'] = dataframes['uee']['CutFlow']
df['eee'] = dataframes['eee']['CutFlow']
df['eeu'] = dataframes['eeu']['CutFlow']
df['euu'] = dataframes['euu']['CutFlow']

df

Unnamed: 0,Selection,uuu,uue,uee,eee,eeu,euu
0,All,101921425.0,101921425.0,101921425.0,101629496.0,101921425.0,101921425.0
1,Trigger,22002921.0,22002921.0,22002921.0,21924697.0,22002921.0,22002921.0
2,PV,22002921.0,22002921.0,22002921.0,21924697.0,22002921.0,22002921.0
3,4-filter,16294834.0,16294834.0,16294834.0,16236924.0,16294834.0,16294834.0
4,Medium prompt lep.,8023702.0,8023702.0,8023702.0,6427469.0,6449805.0,6449805.0
5,P.lep. overlap,6259871.0,6259871.0,6259871.0,4664256.0,4680330.0,4680330.0
6,DV,6259871.0,6259871.0,6259871.0,4664256.0,4680330.0,4680330.0
7,Fiducial,3205961.0,3205961.0,3205961.0,2437219.0,2445529.0,2445529.0
8,2-track DV,1975904.0,1975904.0,1975904.0,1508527.0,1513732.0,1513732.0
9,OS DV,1166717.0,1166717.0,1166717.0,921697.0,924865.0,924865.0


In [270]:

replacements = {
    'm\_\{HNL\}': '$m_{\mathrm{HNL}}$', 
    'm\_\{lll\}': '$m_{\mathrm{lll}}$', 
    'veryveryloose': 'VVL', 
    'truth matched': '\hline truth matched',
    'nan': '--',
    'NAN': '--',
    'uuu': '$\mu\mu\mu$',
    'E+0': 'e+' 
    }


latex_table = df[:20].to_latex(index=False, float_format="{:.3E}".format, na_rep='--')
# latex_table = df[:20].to_latex(index=False, float_format="{:.4n}".format, na_rep='--')
for i, j in replacements.items():
    latex_table = latex_table.replace(i, j)

print(f'''
\\begin{{table}}[!htbp]
\centering % center the table
\label{{tab:cutflow_prompt_muon}}

{latex_table}

\caption{{Cutflow for prompt muon samples. 
Weighted number of expected events based on Majorana limit inverted-hierarchy model.}}
\end{{table}}
'''
)


\begin{table}[!htbp]
\centering % center the table
\label{tab:cutflow_prompt_muon}

\begin{tabular}{lrrrrrr}
\toprule
          Selection &       $\mu\mu\mu$ &       uue &       uee &       eee &       eeu &       euu \\
\midrule
                All & 1.019e+8 & 1.019e+8 & 1.019e+8 & 1.016e+8 & 1.019e+8 & 1.019e+8 \\
            Trigger & 2.200e+7 & 2.200e+7 & 2.200e+7 & 2.192e+7 & 2.200e+7 & 2.200e+7 \\
                 PV & 2.200e+7 & 2.200e+7 & 2.200e+7 & 2.192e+7 & 2.200e+7 & 2.200e+7 \\
           4-filter & 1.629e+7 & 1.629e+7 & 1.629e+7 & 1.624e+7 & 1.629e+7 & 1.629e+7 \\
 Medium prompt lep. & 8.024e+6 & 8.024e+6 & 8.024e+6 & 6.427e+6 & 6.450e+6 & 6.450e+6 \\
     P.lep. overlap & 6.260e+6 & 6.260e+6 & 6.260e+6 & 4.664e+6 & 4.680e+6 & 4.680e+6 \\
                 DV & 6.260e+6 & 6.260e+6 & 6.260e+6 & 4.664e+6 & 4.680e+6 & 4.680e+6 \\
           Fiducial & 3.206e+6 & 3.206e+6 & 3.206e+6 & 2.437e+6 & 2.446e+6 & 2.446e+6 \\
         2-track DV & 1.976e+6 & 1.976e+6 & 1.976e+6 & 1.