In [1]:
import glob
import os
import json
import pickle
import yaml

import math
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None
import pyarrow.parquet as pq
from sklearn.metrics import auc, roc_curve
from scipy.special import softmax

import hist as hist2
import matplotlib.pyplot as plt
import mplhep as hep

plt.style.use(hep.style.CMS)

import utils
plt.rcParams.update({"font.size": 20})

In [2]:
! ls ../Feb22_2017

[34mDYJetsToLL_Pt-100To250[m[m                  [34mTTToHadronic[m[m
[34mDYJetsToLL_Pt-250To400[m[m                  [34mTTToSemiLeptonic[m[m
[34mDYJetsToLL_Pt-400To650[m[m                  [34mVBFHToTauTau[m[m
[34mDYJetsToLL_Pt-50To100[m[m                   [34mVBFHToWWToLNuQQ_M-125_withDipoleRecoil[m[m
[34mDYJetsToLL_Pt-650ToInf[m[m                  [34mWJetsToLNu_HT-100To200[m[m
[34mGluGluHToTauTau[m[m                         [34mWJetsToLNu_HT-1200To2500[m[m
[34mGluGluHToWWToLNuQQ[m[m                      [34mWJetsToLNu_HT-200To400[m[m
[34mGluGluHToWW_Pt-200ToInf_M-125[m[m           [34mWJetsToLNu_HT-2500ToInf[m[m
[34mHWminusJ_HToWW_M-125[m[m                    [34mWJetsToLNu_HT-400To600[m[m
[34mHWplusJ_HToWW_M-125[m[m                     [34mWJetsToLNu_HT-600To800[m[m
[34mHZJ_HToWW_M-125[m[m                         [34mWJetsToLNu_HT-70To100[m[m
[34mQCD_Pt_1000to1400[m[m                       [34mWJetsT

In [3]:
combine_samples = {
    # data
    "SingleElectron_": "SingleElectron",    
    "SingleMuon_": "SingleMuon",  
    "EGamma_": "EGamma",    
    
    # signal
    "GluGluHToWW_Pt-200ToInf_M-125": "HWW",    
    "HToWW_M-125": "VH",
    "VBFHToWWToLNuQQ_M-125_withDipoleRecoil": "VBF",
    "ttHToNonbb_M125": "ttHToNonbb_M125",
    
    # bkg
    "QCD_Pt": "QCD",
    "DYJets": "DYJets",    
    "WJetsToLNu_": "WJetsLNu",
    "JetsToQQ": "WZQQ",
    "TT": "TTbar",
    "ST_": "SingleTop",
    "WW": "Diboson",
    "WZ": "Diboson",
    "ZZ": "Diboson",
}

In [4]:
weights = {
    "mu": {
        "weight_genweight": 1,
        "weight_L1Prefiring": 1,
        "weight_pileup": 1,
        "weight_trigger_iso_muon": 1,
        "weight_trigger_noniso_muon": 1,
        "weight_isolation_muon": 1,
        "weight_id_muon": 1,
        "weight_vjets_nominal": 1,
    },
    "ele":{
        "weight_genweight": 1,
        "weight_L1Prefiring": 1,
        "weight_pileup": 1,
        "weight_trigger_electron": 1,
        "weight_reco_electron": 1,
        "weight_id_electron": 1,
        "weight_vjets_nominal": 1,   
    }
}

signals = [
        "HWW",        
        "ttHToNonbb_M125",
        "VH",
        "VBF"]

data_by_ch = {
    "ele": "SingleElectron",
    "mu": "SingleMuon",
}

nice_channel = {
    "mu": "Muon",
    "ele": "Electron"
}

In [5]:
def disc_score(df, sigs, bkgs):
    num = df[sigs].sum(axis=1) 
    den = df[sigs].sum(axis=1) + df[bkgs].sum(axis=1)
    return num/den

In [6]:
# scores definition
hwwev = ["fj_PN_probHWqqWev0c","fj_PN_probHWqqWev1c","fj_PN_probHWqqWtauev0c","fj_PN_probHWqqWtauev1c"]
hwwmv = ["fj_PN_probHWqqWmv0c","fj_PN_probHWqqWmv1c","fj_PN_probHWqqWtauev0c","fj_PN_probHWqqWtaumv1c"]
qcd = ["fj_PN_probQCDbb","fj_PN_probQCDcc","fj_PN_probQCDb","fj_PN_probQCDc","fj_PN_probQCDothers"]

tope = ["fj_PN_probTopbWev","fj_PN_probTopbWtauev"]
topm = ["fj_PN_probTopbWmv","fj_PN_probTopbWtaumv"]
tophad = ["fj_PN_probTopbWqq0c","fj_PN_probTopbWqq1c","fj_PN_probTopbWq0c","fj_PN_probTopbWq1c","fj_PN_probTopbWtauhv"]

top = tope+topm+tophad

# s/b

In [7]:
selections = {
    "mu": {
        ### pre-selection must include lepton isolation
        "pre-selection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",        

        # selections we find useful
        "mini-iso": "( (lep_pt < 55) |  ( (lep_misolation < 0.2) & (lep_pt >= 55) ) )",
        "bjet_ophem": "( (fj_bjets_ophem < 0.3040) )", 

        # tagger cuts
        "QCD>0.99": "( QCD>0.99 )",
        "Top>0.90": "( Top>0.90 )",
        
        ### inclusive tagger cuts
        "inclusive>0.84": "( inclusive>0.84 )",
        "inclusive>0.86": "( inclusive>0.86 )",
        "inclusive>0.88": "( inclusive>0.88 )",
        "inclusive>0.90": "( inclusive>0.90 )",
        "inclusive>0.92": "( inclusive>0.92 )",
        "inclusive>0.94": "( inclusive>0.94 )",
        "inclusive>0.96": "( inclusive>0.96 )",        
    },
    "ele": {
        ### pre-selection must include lepton isolation
        "pre-selection": "( ( (lep_pt < 55) & (lep_isolation<0.15)) |  (lep_pt >= 55) )",
        
        # selections we find useful
        "mini-iso": "( fj_pt>0 )",  # trivial for electrons
        "bjet_ophem": "( (fj_bjets_ophem < 0.3040) )", 

        # tagger cuts
        "QCD>0.99": "( QCD>0.99 )",
        "Top>0.90": "( Top>0.90 )",
        
        ### inclusive tagger cuts
        "inclusive>0.84": "( inclusive>0.84 )",
        "inclusive>0.86": "( inclusive>0.86 )",
        "inclusive>0.88": "( inclusive>0.88 )",
        "inclusive>0.90": "( inclusive>0.90 )",
        "inclusive>0.92": "( inclusive>0.92 )",
        "inclusive>0.94": "( inclusive>0.94 )",
        "inclusive>0.96": "( inclusive>0.96 )",     
    },
}

In [8]:
# how we landed on (1) using inclusive score and (2) not using bveto
cuts = {
    "pre-selection": ["pre-selection"],
    "pre-selection + mini-iso": ["pre-selection", "mini-iso"],
    "pre-selection + QCD>0.99": ["pre-selection", "QCD>0.99"],    
    "pre-selection + mini-iso + QCD>0.99": ["pre-selection", "mini-iso", "QCD>0.99"],
    "pre-selection + mini-iso + Top>0.9": ["pre-selection", "mini-iso", "Top>0.90"],
    "pre-selection + mini-iso + inclusive>0.9": ["pre-selection", "mini-iso", "inclusive>0.90"],    
    "pre-selection + mini-iso + bjet_ophem + Top>0.9": ["pre-selection", "mini-iso", "bjet_ophem", "Top>0.90"],
    "pre-selection + mini-iso + bjet_ophem + inclusive>0.9": ["pre-selection", "mini-iso", "bjet_ophem", "inclusive>0.90"],

}

# # testing different tagger scores
# cuts = {
#     "pre-selection + mini-iso": ["pre-selection", "mini-iso"],
#     "pre-selection + mini-iso + inclusive>0.84": ["pre-selection", "mini-iso", "inclusive>0.84"],    
#     "pre-selection + mini-iso + inclusive>0.86": ["pre-selection", "mini-iso", "inclusive>0.86"],    
#     "pre-selection + mini-iso + inclusive>0.88": ["pre-selection", "mini-iso", "inclusive>0.88"],    
#     "pre-selection + mini-iso + inclusive>0.90": ["pre-selection", "mini-iso", "inclusive>0.90"],    
#     "pre-selection + mini-iso + inclusive>0.92": ["pre-selection", "mini-iso", "inclusive>0.92"],    
#     "pre-selection + mini-iso + inclusive>0.94": ["pre-selection", "mini-iso", "inclusive>0.94"],    
#     "pre-selection + mini-iso + inclusive>0.96": ["pre-selection", "mini-iso", "inclusive>0.96"],
# }

In [9]:
samples_dir = "../Feb22_2017"
samples = os.listdir(samples_dir)

channels = ["mu", "ele"]

sigs = {
    "ele": hwwev,
    "mu": hwwmv,
}

qcd_bkg = [b.replace("PN","ParT") for b in qcd]
top_bkg = [b.replace("PN","ParT") for b in tope+topm+tophad]
inclusive_bkg = [b.replace("PN","ParT") for b in qcd+tope+topm+tophad]

is_data = False

events_dict = {}
for ch in channels:
    events_dict[ch] = {}

    for cut in cuts:
        events_dict[ch][cut] = {}

    new_sig = [s.replace("PN","ParT") for s in sigs[ch]]

    # get lumi
    with open("../fileset/luminosity.json") as f:
        luminosity = json.load(f)[ch]["2017"]

    for sample in samples:

        ### get a combined label to combine samples of the same process
        for key in combine_samples:
            if key in sample:
                sample_to_use = combine_samples[key]
                break
            else:
                sample_to_use = sample

        print(f"Finding {sample} samples and should combine them under {sample_to_use}")

        out_files = f"{samples_dir}/{sample}/outfiles/"
        parquet_files = glob.glob(f"{out_files}/*_{ch}.parquet")
        pkl_files = glob.glob(f"{out_files}/*.pkl")

        if not parquet_files:
            print(f"No parquet file for {sample}")
            continue                

        data = pd.read_parquet(parquet_files)
        empty = len(data) == 0
        if empty:
            continue

        # get event_weight
    #     print(f"---> Retrieving event weight.")                
        event_weight = utils.get_xsecweight(pkl_files, "2017", sample, is_data, luminosity)
        for w in weights[ch]:
            if w not in data.keys():
    #             print(f"{w} weight is not stored in parquet")
                continue
            event_weight *= data[w]

        data["event_weight"] = event_weight
        
        # add QCD score 
        data["QCD"] = disc_score(data,new_sig,qcd_bkg)
        # add TOP score 
        data["Top"] = disc_score(data,new_sig,top_bkg)    
        # add inclusive score
        data["inclusive"] = disc_score(data,new_sig,inclusive_bkg)
            
        print(f"---> We have {len(data)} events.")        
        # apply selection
        
        for cut in cuts:
            print(f"Check {cut}")
            for i, selection in enumerate(cuts[cut]):
                if i!=0:
                    data1 = data1.query(selections[ch][selection]) 
                else:
                    data1 = data.query(selections[ch][selection])           
                print(f"---> Applied {selection}... Only {len(data1)} events survived.")        
            
            ### specefy columns to keep
            columns_ = ["fj_ParT_mass", "lep_fj_m", "event_weight"]

            # fill the big dataframe
            if sample_to_use not in events_dict[ch][cut]:
                events_dict[ch][cut][sample_to_use] = data1[columns_]
            else:
                events_dict[ch][cut][sample_to_use] = pd.concat([events_dict[ch][cut][sample_to_use], data1[columns_]])

Finding WJetsToLNu_HT-100To200 samples and should combine them under WJetsLNu
---> We have 964 events.
Check pre-selection
---> Applied pre-selection... Only 949 events survived.
Check pre-selection + mini-iso
---> Applied pre-selection... Only 949 events survived.
---> Applied mini-iso... Only 938 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 949 events survived.
---> Applied QCD>0.99... Only 861 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 949 events survived.
---> Applied mini-iso... Only 938 events survived.
---> Applied QCD>0.99... Only 852 events survived.
Check pre-selection + mini-iso + Top>0.9
---> Applied pre-selection... Only 949 events survived.
---> Applied mini-iso... Only 938 events survived.
---> Applied Top>0.90... Only 90 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 949 events survived.
---> Applied mini-iso... Only 938 events sur

---> We have 885873 events.
Check pre-selection
---> Applied pre-selection... Only 861475 events survived.
Check pre-selection + mini-iso
---> Applied pre-selection... Only 861475 events survived.
---> Applied mini-iso... Only 836187 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 861475 events survived.
---> Applied QCD>0.99... Only 496251 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 861475 events survived.
---> Applied mini-iso... Only 836187 events survived.
---> Applied QCD>0.99... Only 489542 events survived.
Check pre-selection + mini-iso + Top>0.9
---> Applied pre-selection... Only 861475 events survived.
---> Applied mini-iso... Only 836187 events survived.
---> Applied Top>0.90... Only 32722 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 861475 events survived.
---> Applied mini-iso... Only 836187 events survived.
---> Applied inclusive>0.90..

---> We have 95757 events.
Check pre-selection
---> Applied pre-selection... Only 93767 events survived.
Check pre-selection + mini-iso
---> Applied pre-selection... Only 93767 events survived.
---> Applied mini-iso... Only 92403 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 93767 events survived.
---> Applied QCD>0.99... Only 88773 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 93767 events survived.
---> Applied mini-iso... Only 92403 events survived.
---> Applied QCD>0.99... Only 87786 events survived.
Check pre-selection + mini-iso + Top>0.9
---> Applied pre-selection... Only 93767 events survived.
---> Applied mini-iso... Only 92403 events survived.
---> Applied Top>0.90... Only 16102 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 93767 events survived.
---> Applied mini-iso... Only 92403 events survived.
---> Applied inclusive>0.90... Only 16023 

---> Applied mini-iso... Only 2 events survived.
---> Applied bjet_ophem... Only 2 events survived.
---> Applied Top>0.90... Only 0 events survived.
Check pre-selection + mini-iso + bjet_ophem + inclusive>0.9
---> Applied pre-selection... Only 8 events survived.
---> Applied mini-iso... Only 2 events survived.
---> Applied bjet_ophem... Only 2 events survived.
---> Applied inclusive>0.90... Only 0 events survived.
Finding QCD_Pt_3200toInf samples and should combine them under QCD
---> We have 1144 events.
Check pre-selection
---> Applied pre-selection... Only 948 events survived.
Check pre-selection + mini-iso
---> Applied pre-selection... Only 948 events survived.
---> Applied mini-iso... Only 56 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 948 events survived.
---> Applied QCD>0.99... Only 10 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 948 events survived.
---> Applied mini-iso... Only 56 events s

---> Applied pre-selection... Only 5341 events survived.
---> Applied mini-iso... Only 298 events survived.
---> Applied Top>0.90... Only 21 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 5341 events survived.
---> Applied mini-iso... Only 298 events survived.
---> Applied inclusive>0.90... Only 17 events survived.
Check pre-selection + mini-iso + bjet_ophem + Top>0.9
---> Applied pre-selection... Only 5341 events survived.
---> Applied mini-iso... Only 298 events survived.
---> Applied bjet_ophem... Only 225 events survived.
---> Applied Top>0.90... Only 18 events survived.
Check pre-selection + mini-iso + bjet_ophem + inclusive>0.9
---> Applied pre-selection... Only 5341 events survived.
---> Applied mini-iso... Only 298 events survived.
---> Applied bjet_ophem... Only 225 events survived.
---> Applied inclusive>0.90... Only 14 events survived.
Finding WW samples and should combine them under Diboson
---> We have 3100 events.
Check 

---> Applied Top>0.90... Only 7163 events survived.
Check pre-selection + mini-iso + bjet_ophem + inclusive>0.9
---> Applied pre-selection... Only 20815 events survived.
---> Applied mini-iso... Only 20310 events survived.
---> Applied bjet_ophem... Only 17489 events survived.
---> Applied inclusive>0.90... Only 7141 events survived.
Finding GluGluHToTauTau samples and should combine them under GluGluHToTauTau
No parquet file for GluGluHToTauTau
Finding ZJetsToQQ_HT-400to600 samples and should combine them under WZQQ
---> We have 2603 events.
Check pre-selection
---> Applied pre-selection... Only 1997 events survived.
Check pre-selection + mini-iso
---> Applied pre-selection... Only 1997 events survived.
---> Applied mini-iso... Only 259 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 1997 events survived.
---> Applied QCD>0.99... Only 61 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 1997 events survived

---> We have 15116 events.
Check pre-selection
---> Applied pre-selection... Only 12339 events survived.
Check pre-selection + mini-iso
---> Applied pre-selection... Only 12339 events survived.
---> Applied mini-iso... Only 777 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 12339 events survived.
---> Applied QCD>0.99... Only 452 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 12339 events survived.
---> Applied mini-iso... Only 777 events survived.
---> Applied QCD>0.99... Only 213 events survived.
Check pre-selection + mini-iso + Top>0.9
---> Applied pre-selection... Only 12339 events survived.
---> Applied mini-iso... Only 777 events survived.
---> Applied Top>0.90... Only 167 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 12339 events survived.
---> Applied mini-iso... Only 777 events survived.
---> Applied inclusive>0.90... Only 143 events survived.

---> Applied pre-selection... Only 75732 events survived.
---> Applied QCD>0.99... Only 38661 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 75732 events survived.
---> Applied mini-iso... Only 73054 events survived.
---> Applied QCD>0.99... Only 37430 events survived.
Check pre-selection + mini-iso + Top>0.9
---> Applied pre-selection... Only 75732 events survived.
---> Applied mini-iso... Only 73054 events survived.
---> Applied Top>0.90... Only 7459 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 75732 events survived.
---> Applied mini-iso... Only 73054 events survived.
---> Applied inclusive>0.90... Only 7176 events survived.
Check pre-selection + mini-iso + bjet_ophem + Top>0.9
---> Applied pre-selection... Only 75732 events survived.
---> Applied mini-iso... Only 73054 events survived.
---> Applied bjet_ophem... Only 58045 events survived.
---> Applied Top>0.90... Only 5949 events s

---> We have 10424 events.
Check pre-selection
---> Applied pre-selection... Only 8670 events survived.
Check pre-selection + mini-iso
---> Applied pre-selection... Only 8670 events survived.
---> Applied mini-iso... Only 553 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 8670 events survived.
---> Applied QCD>0.99... Only 147 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 8670 events survived.
---> Applied mini-iso... Only 553 events survived.
---> Applied QCD>0.99... Only 71 events survived.
Check pre-selection + mini-iso + Top>0.9
---> Applied pre-selection... Only 8670 events survived.
---> Applied mini-iso... Only 553 events survived.
---> Applied Top>0.90... Only 32 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 8670 events survived.
---> Applied mini-iso... Only 553 events survived.
---> Applied inclusive>0.90... Only 26 events survived.
Check pr

---> We have 86319 events.
Check pre-selection
---> Applied pre-selection... Only 86138 events survived.
Check pre-selection + mini-iso
---> Applied pre-selection... Only 86138 events survived.
---> Applied mini-iso... Only 86138 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 86138 events survived.
---> Applied QCD>0.99... Only 56667 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 86138 events survived.
---> Applied mini-iso... Only 86138 events survived.
---> Applied QCD>0.99... Only 56667 events survived.
Check pre-selection + mini-iso + Top>0.9
---> Applied pre-selection... Only 86138 events survived.
---> Applied mini-iso... Only 86138 events survived.
---> Applied Top>0.90... Only 13367 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 86138 events survived.
---> Applied mini-iso... Only 86138 events survived.
---> Applied inclusive>0.90... Only 13151 

---> We have 646239 events.
Check pre-selection
---> Applied pre-selection... Only 645251 events survived.
Check pre-selection + mini-iso
---> Applied pre-selection... Only 645251 events survived.
---> Applied mini-iso... Only 645251 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 645251 events survived.
---> Applied QCD>0.99... Only 365716 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 645251 events survived.
---> Applied mini-iso... Only 645251 events survived.
---> Applied QCD>0.99... Only 365716 events survived.
Check pre-selection + mini-iso + Top>0.9
---> Applied pre-selection... Only 645251 events survived.
---> Applied mini-iso... Only 645251 events survived.
---> Applied Top>0.90... Only 57866 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 645251 events survived.
---> Applied mini-iso... Only 645251 events survived.
---> Applied inclusive>0.90..

---> Applied pre-selection... Only 8198 events survived.
---> Applied mini-iso... Only 8198 events survived.
---> Applied Top>0.90... Only 588 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 8198 events survived.
---> Applied mini-iso... Only 8198 events survived.
---> Applied inclusive>0.90... Only 562 events survived.
Check pre-selection + mini-iso + bjet_ophem + Top>0.9
---> Applied pre-selection... Only 8198 events survived.
---> Applied mini-iso... Only 8198 events survived.
---> Applied bjet_ophem... Only 4348 events survived.
---> Applied Top>0.90... Only 220 events survived.
Check pre-selection + mini-iso + bjet_ophem + inclusive>0.9
---> Applied pre-selection... Only 8198 events survived.
---> Applied mini-iso... Only 8198 events survived.
---> Applied bjet_ophem... Only 4348 events survived.
---> Applied inclusive>0.90... Only 209 events survived.
Finding WplusHToTauTau samples and should combine them under WplusHToTauTau
No 

---> Applied mini-iso... Only 86 events survived.
---> Applied inclusive>0.90... Only 5 events survived.
Check pre-selection + mini-iso + bjet_ophem + Top>0.9
---> Applied pre-selection... Only 86 events survived.
---> Applied mini-iso... Only 86 events survived.
---> Applied bjet_ophem... Only 41 events survived.
---> Applied Top>0.90... Only 1 events survived.
Check pre-selection + mini-iso + bjet_ophem + inclusive>0.9
---> Applied pre-selection... Only 86 events survived.
---> Applied mini-iso... Only 86 events survived.
---> Applied bjet_ophem... Only 41 events survived.
---> Applied inclusive>0.90... Only 1 events survived.
Finding ST_s-channel_4f_leptonDecays samples and should combine them under SingleTop
---> We have 25645 events.
Check pre-selection
---> Applied pre-selection... Only 25542 events survived.
Check pre-selection + mini-iso
---> Applied pre-selection... Only 25542 events survived.
---> Applied mini-iso... Only 25542 events survived.
Check pre-selection + QCD>0.99


---> We have 615755 events.
Check pre-selection
---> Applied pre-selection... Only 612818 events survived.
Check pre-selection + mini-iso
---> Applied pre-selection... Only 612818 events survived.
---> Applied mini-iso... Only 612818 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 612818 events survived.
---> Applied QCD>0.99... Only 230872 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 612818 events survived.
---> Applied mini-iso... Only 612818 events survived.
---> Applied QCD>0.99... Only 230872 events survived.
Check pre-selection + mini-iso + Top>0.9
---> Applied pre-selection... Only 612818 events survived.
---> Applied mini-iso... Only 612818 events survived.
---> Applied Top>0.90... Only 13512 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 612818 events survived.
---> Applied mini-iso... Only 612818 events survived.
---> Applied inclusive>0.90..

---> Applied pre-selection... Only 1792 events survived.
---> Applied mini-iso... Only 1792 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 1792 events survived.
---> Applied QCD>0.99... Only 237 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 1792 events survived.
---> Applied mini-iso... Only 1792 events survived.
---> Applied QCD>0.99... Only 237 events survived.
Check pre-selection + mini-iso + Top>0.9
---> Applied pre-selection... Only 1792 events survived.
---> Applied mini-iso... Only 1792 events survived.
---> Applied Top>0.90... Only 168 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 1792 events survived.
---> Applied mini-iso... Only 1792 events survived.
---> Applied inclusive>0.90... Only 114 events survived.
Check pre-selection + mini-iso + bjet_ophem + Top>0.9
---> Applied pre-selection... Only 1792 events survived.
---> Applied mini-iso... 

---> Applied mini-iso... Only 385 events survived.
---> Applied QCD>0.99... Only 60 events survived.
Check pre-selection + mini-iso + Top>0.9
---> Applied pre-selection... Only 385 events survived.
---> Applied mini-iso... Only 385 events survived.
---> Applied Top>0.90... Only 39 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 385 events survived.
---> Applied mini-iso... Only 385 events survived.
---> Applied inclusive>0.90... Only 29 events survived.
Check pre-selection + mini-iso + bjet_ophem + Top>0.9
---> Applied pre-selection... Only 385 events survived.
---> Applied mini-iso... Only 385 events survived.
---> Applied bjet_ophem... Only 365 events survived.
---> Applied Top>0.90... Only 37 events survived.
Check pre-selection + mini-iso + bjet_ophem + inclusive>0.9
---> Applied pre-selection... Only 385 events survived.
---> Applied mini-iso... Only 385 events survived.
---> Applied bjet_ophem... Only 365 events survived.
---> Ap

---> Applied pre-selection... Only 833 events survived.
---> Applied mini-iso... Only 833 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 833 events survived.
---> Applied QCD>0.99... Only 112 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 833 events survived.
---> Applied mini-iso... Only 833 events survived.
---> Applied QCD>0.99... Only 112 events survived.
Check pre-selection + mini-iso + Top>0.9
---> Applied pre-selection... Only 833 events survived.
---> Applied mini-iso... Only 833 events survived.
---> Applied Top>0.90... Only 135 events survived.
Check pre-selection + mini-iso + inclusive>0.9
---> Applied pre-selection... Only 833 events survived.
---> Applied mini-iso... Only 833 events survived.
---> Applied inclusive>0.90... Only 83 events survived.
Check pre-selection + mini-iso + bjet_ophem + Top>0.9
---> Applied pre-selection... Only 833 events survived.
---> Applied mini-iso... Only 833 ev

---> Applied bjet_ophem... Only 9057 events survived.
---> Applied Top>0.90... Only 3035 events survived.
Check pre-selection + mini-iso + bjet_ophem + inclusive>0.9
---> Applied pre-selection... Only 12517 events survived.
---> Applied mini-iso... Only 12517 events survived.
---> Applied bjet_ophem... Only 9057 events survived.
---> Applied inclusive>0.90... Only 2995 events survived.
Finding WZ samples and should combine them under Diboson
---> We have 1186 events.
Check pre-selection
---> Applied pre-selection... Only 1175 events survived.
Check pre-selection + mini-iso
---> Applied pre-selection... Only 1175 events survived.
---> Applied mini-iso... Only 1175 events survived.
Check pre-selection + QCD>0.99
---> Applied pre-selection... Only 1175 events survived.
---> Applied QCD>0.99... Only 733 events survived.
Check pre-selection + mini-iso + QCD>0.99
---> Applied pre-selection... Only 1175 events survived.
---> Applied mini-iso... Only 1175 events survived.
---> Applied QCD>0.99

In [10]:
for cut in cuts:
    print(cut)

pre-selection
pre-selection + mini-iso
pre-selection + QCD>0.99
pre-selection + mini-iso + QCD>0.99
pre-selection + mini-iso + Top>0.9
pre-selection + mini-iso + inclusive>0.9
pre-selection + mini-iso + bjet_ophem + Top>0.9
pre-selection + mini-iso + bjet_ophem + inclusive>0.9


In [11]:
events_dict.keys()

dict_keys(['mu', 'ele'])

In [12]:
events_dict["mu"].keys()

dict_keys(['pre-selection', 'pre-selection + mini-iso', 'pre-selection + QCD>0.99', 'pre-selection + mini-iso + QCD>0.99', 'pre-selection + mini-iso + Top>0.9', 'pre-selection + mini-iso + inclusive>0.9', 'pre-selection + mini-iso + bjet_ophem + Top>0.9', 'pre-selection + mini-iso + bjet_ophem + inclusive>0.9'])

# Get s/b

In [13]:
cutflows = {}
samples = events_dict["mu"][cut]
for sample in samples:
    cutflows[sample] = {}
    for cut in cuts:
        cutflows[sample][cut] = 0
    
s, b = {}, {}
for cut in events_dict["mu"]:
    s[cut], b[cut] = 0, 0
    
    for sample in samples:
        for ch in channels:
            
            df = events_dict[ch][cut][sample]
            
            ### cutflows
            cutflows[sample][cut] += len(df)

            ### s/b
            ev_weight = df["event_weight"]

            regressed_mass =df["fj_ParT_mass"]     
            lep_fj_m = df["lep_fj_m"]     

#             mass_window = (regressed_mass>100) & (regressed_mass<150)
            mass_window = (lep_fj_m>0) & (lep_fj_m<150)

            if sample in signals:
                s[cut] += ev_weight[mass_window].sum()
            else:
                b[cut] += ev_weight[mass_window].sum()            

In [14]:
print("In the 0<lep_fj_m<150 window")
for cut in s:
    print(f"- {cut} : s/b = {s[cut]/math.sqrt(b[cut]):2f}")

In the 0<lep_fj_m<150 window
- pre-selection : s/b = 0.498451
- pre-selection + mini-iso : s/b = 0.597053
- pre-selection + QCD>0.99 : s/b = 0.615851
- pre-selection + mini-iso + QCD>0.99 : s/b = 0.608473
- pre-selection + mini-iso + Top>0.9 : s/b = 0.708517
- pre-selection + mini-iso + inclusive>0.9 : s/b = 0.721729
- pre-selection + mini-iso + bjet_ophem + Top>0.9 : s/b = 0.629265
- pre-selection + mini-iso + bjet_ophem + inclusive>0.9 : s/b = 0.642641


In [15]:
# plt.rcParams.update({"font.size": 20})

# lab_ = []
# for lab in cuts:
#     lab_.append("X>" + lab[37:])
# lab_[0] = "pre-selection"

# fig, ax = plt.subplots(figsize=(16,10))
# ax.grid() 
# for cut in s:
#     ax.bar(range(len(cuts)), s_over_b, tick_label=lab_)
# ax.legend(title="Semi-leptonic Channel")
# ax.set_title("X = inclusive tagger score")
# ax.set_ylabel(r"s \ $\sqrt{b}$")

# Get Cutflows

In [16]:
for sample in cutflows:
    if sample in ["HWW", "WJetsLNu", "QCD", "TTbar"]:
        print(sample)    
        for cut in cutflows[sample]:
            print(f"{cut} : {cutflows[sample][cut]}")
        print("---------------------------------")            

WJetsLNu
pre-selection : 1511403
pre-selection + mini-iso : 1489830
pre-selection + QCD>0.99 : 1018909
pre-selection + mini-iso + QCD>0.99 : 1005990
pre-selection + mini-iso + Top>0.9 : 276147
pre-selection + mini-iso + inclusive>0.9 : 268317
pre-selection + mini-iso + bjet_ophem + Top>0.9 : 243551
pre-selection + mini-iso + bjet_ophem + inclusive>0.9 : 236960
---------------------------------
TTbar
pre-selection : 2793057
pre-selection + mini-iso : 2724885
pre-selection + QCD>0.99 : 1320127
pre-selection + mini-iso + QCD>0.99 : 1309236
pre-selection + mini-iso + Top>0.9 : 84772
pre-selection + mini-iso + inclusive>0.9 : 83730
pre-selection + mini-iso + bjet_ophem + Top>0.9 : 30898
pre-selection + mini-iso + bjet_ophem + inclusive>0.9 : 30557
---------------------------------
HWW
pre-selection : 24041
pre-selection + mini-iso : 23331
pre-selection + QCD>0.99 : 17954
pre-selection + mini-iso + QCD>0.99 : 17505
pre-selection + mini-iso + Top>0.9 : 10005
pre-selection + mini-iso + inclusi

In [17]:
## OTHERS
print("Counting events of the following samples:")
c = [0] * len(cutflows[sample])
for sample in cutflows:
    if sample not in ["HWW", "QCD", "TTbar", "WJetsLNu"]:
        if sample not in signals:
            print(sample)
            for i, cut in enumerate(cutflows[sample]):
                c[i] += cutflows[sample][cut]
print("---------------------------------")     
for i, cut in enumerate(cutflows[sample]):
    print(f"{cut}: {c[i]}")

Counting events of the following samples:
DYJets
SingleTop
WZQQ
Diboson
---------------------------------
pre-selection: 1571915
pre-selection + mini-iso: 1517239
pre-selection + QCD>0.99: 924055
pre-selection + mini-iso + QCD>0.99: 919809
pre-selection + mini-iso + Top>0.9: 153643
pre-selection + mini-iso + inclusive>0.9: 150372
pre-selection + mini-iso + bjet_ophem + Top>0.9: 133584
pre-selection + mini-iso + bjet_ophem + inclusive>0.9: 130754
