This notebook outputs the cutflow data for each sample as a YAML file in somewhat the same format as on HEPData. There will obviously need to be some iterations on this, but it is a good starting point.

In [29]:
import uproot
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Image
import yaml


In [30]:
hist_files_all = glob.glob('/data/hnl/histograms/v9p3_histograms/histograms_fullrun2_*')
hist_files_10_10 = glob.glob('/data/hnl/histograms/v9p3_histograms/histograms_fullrun2_10G_10mm_*')
cutflow_types = [
    'CutFlow',
    'CutFlow_LNC_raw_counts',
    'CutFlow_LNV_raw_counts',
    'CutFlow_weighted_one_hnl_majorana',
    'CutFlow_weighted_one_hnl_majorana_LNV_only',
    'CutFlow_weighted_one_hnl_majorana_LNC_only',
    'CutFlow_weighted_one_hnl_dirac',
    'CutFlow_weighted_majorana_limit_ih',
    'CutFlow_weighted_majorana_limit_nh',
    'CutFlow_weighted_dirac_limit_ih',
    'CutFlow_weighted_dirac_limit_nh'
    ]
# cutflow_bins_mu_mumu = ['all', 'trigger', '4-filter', 'medium prompt muon', 'plep overlap', 'DV', 'fiducial', '2-track DV', 'OS DV', 'mumu DV', 'cosmic veto', 'lepton pt', '2-medium-lepton DV', 'trig match', 'm_lll', 'B-hadron veto', 'Z mass veto', 'm_HNL', ]
# cutflow_bins_e_emu = ['all', 'trigger', '4-filter', 'medium prompt electron', 'plep overlap', 'DV', 'fiducial', '2-track DV', 'OS DV', 'emu DV', 'cosmic veto', 'lepton pt', 'medium-veryveryloose-lepton DV', 'trig match', 'm_lll', 'B-hadron veto', 'Z mass veto', 'm_HNL', ]
# cutflow_bins_mu_mue = ['all', 'trigger', '4-filter', 'medium prompt muon', 'plep overlap', 'DV', 'fiducial', '2-track DV', 'OS DV', 'emu DV', 'cosmic veto', 'lepton pt', 'medium-veryveryloose-lepton DV', 'trig match', 'm_lll', 'B-hadron veto', 'Z mass veto', 'm_HNL', ]
# cutflow_bins_e_ee = ['all', 'trigger', '4-filter', 'medium prompt electron', 'plep overlap', 'DV', 'fiducial', '2-track DV', 'OS DV', 'emu DV', 'cosmic veto', 'lepton pt', 'mat. veto', '2-veryveryloose-lepton DV', 'trig match', 'm_lll', 'B-hadron veto', 'Z mass veto', 'm_HNL', ]
# cutflow_bins_mu_ee = ['all', 'trigger', '4-filter', 'medium prompt muon', 'plep overlap', 'DV', 'fiducial', '2-track DV', 'OS DV', 'emu DV', 'cosmic veto', 'lepton pt', 'mat. veto', '2-veryveryloose-lepton DV', 'trig match', 'm_lll', 'B-hadron veto', 'Z mass veto', 'm_HNL', ]
# cutflow_bins_e_mumu = ['all', 'trigger', '4-filter', 'medium prompt electron', 'plep overlap', 'DV', 'fiducial', '2-track DV', 'OS DV', 'mumu DV', 'cosmic veto', 'lepton pt', '2-medium-lepton DV', 'trig match', 'm_lll', 'B-hadron veto', 'Z mass veto', 'm_HNL', ]

In [31]:
samples = {}
# pick all or the benchmark 10G 10mm
# for filename in hist_files_all:
for filename in hist_files_10_10:
    sample = filename.split('histograms_fullrun2_')[1].replace('.root','') # extract sample name from filename
    cutflow_dict = {}
    cutflow_dir = uproot.open(filename)['nominal']['VSI_LeptonsMod']['CutFlow']
    for cutflow_type in cutflow_types:
        cutflow_dict[cutflow_type] = cutflow_dir[cutflow_type].numpy()[0].tolist()
        if cutflow_type == "CutFlow":
            cutflow_dict['stat_err'] = np.sqrt(cutflow_dir[cutflow_type].numpy()[0]).tolist()
            cutflow_dict['labels'] = cutflow_dir[cutflow_type].xlabels
            cutflow_dict['labels'].insert(-1, 'empty') # annoying feature of cutflows, second to last bin is empty
    samples[sample] = cutflow_dict


In [28]:
print(yaml.dump(samples['10G_10mm_eeu']))

CutFlow:
- 50000.0
- 15037.0
- 15037.0
- 9355.0
- 7490.0
- 7450.0
- 2423.0
- 2243.0
- 2148.0
- 2121.0
- 2071.0
- 2071.0
- 1684.0
- 1519.0
- 1494.0
- 1449.0
- 818.0
- 817.0
- 817.0
- 0.0
- 736.0
CutFlow_LNC_raw_counts:
- 25187.0
- 7558.0
- 7558.0
- 4693.0
- 3789.0
- 3771.0
- 1208.0
- 1123.0
- 1082.0
- 1065.0
- 1038.0
- 1038.0
- 851.0
- 761.0
- 747.0
- 723.0
- 405.0
- 404.0
- 404.0
- 0.0
- 363.0
CutFlow_LNV_raw_counts:
- 24813.0
- 7479.0
- 7479.0
- 4662.0
- 3701.0
- 3679.0
- 1215.0
- 1120.0
- 1066.0
- 1056.0
- 1033.0
- 1033.0
- 833.0
- 758.0
- 747.0
- 726.0
- 413.0
- 413.0
- 413.0
- 0.0
- 373.0
CutFlow_weighted_dirac_limit_ih:
- 958.1144119493015
- 535.6241399272109
- 535.6241399272109
- 96.76794048187261
- 62.426435096124024
- 62.19852719545918
- 20.92928563030362
- 19.447206791676628
- 18.711609357509232
- 18.626604488417144
- 18.4192251877979
- 18.4192251877979
- 15.659231797925283
- 14.766202023928916
- 14.878042129346142
- 14.639007405959086
- 11.648588064532095
- 11.64116551685736
