In [2]:
%load_ext autoreload
%autoreload 2
import numpy as np
import matplotlib.pyplot as plt
from coffea import util
from coffea.processor import accumulate
import os
import re
import pandas as pd
import hist
import matplotlib as mpl
import mplhep
import sys

sys.path.append("../../analysisTools/")
import plotTools as ptools
import utils

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Import coffea files -- signal and background

In [3]:
prefix = './coffea/skimmed/'

suffix = '2018_SR_v27_xclean_nominal'

In [4]:
# Signal
sig_histo = util.load(f'{prefix}/signal_{suffix}_official.coffea')[0]

In [5]:
# Background
bkg_histo = util.load(f'{prefix}/bkg_{suffix}.coffea')[0]

In [39]:
bkg_histo_MVA90 = util.load(f'{prefix}/bkg_2018_SR_v27_xclean_MVA90.coffea')[0]

In [52]:
bkg_histo_MVALoose = util.load(f'{prefix}/bkg_2018_SR_v27_xclean_MVALoose.coffea')[0]

FileNotFoundError: [Errno 2] No such file or directory: './coffea/skimmed//bkg_2018_SR_v27_xclean_MVALoose.coffea'

## Signal

In [7]:
sig_samples = list(sig_histo['cutflow'].keys())
si = pd.DataFrame.from_dict({s:ptools.signalPoint(s) for s in sig_samples}, orient='index')

In [8]:
cutnames = utils.get_signal_list_of_cuts(sig_histo)
cutnames

['Preselections',
 'n(good vertex) > 0',
 'No b-tagged jets',
 'Leading jet pT > 80 GeV',
 'Leading jet |eta| < 2.4',
 'dPhi(MET,leading jet) > 2.0',
 'dPhi(MET,all jets) > 0.75',
 'OSSF',
 'theta < 1.22 rad',
 'BDT']

In [9]:
# Efficiency
pd.options.display.float_format = '{:,.6f}'.format
eff_cutflow = utils.get_signal_cutflow_dict(sig_histo, 'cutflow')

In [10]:
table = eff_cutflow
table.columns = cutnames

In [11]:
table

Unnamed: 0,Preselections,n(good vertex) > 0,No b-tagged jets,Leading jet pT > 80 GeV,Leading jet |eta| < 2.4,"dPhi(MET,leading jet) > 2.0","dPhi(MET,all jets) > 0.75",OSSF,theta < 1.22 rad,BDT
sig_2018_Mchi-99p0_dMchi-18p0_ctau-10,0.097945,0.038154,0.035503,0.035403,0.033901,0.033550,0.030398,0.030048,0.026041,0.014071
sig_2018_Mchi-99p0_dMchi-18p0_ctau-1,0.123759,0.056625,0.051788,0.051688,0.050325,0.049872,0.045642,0.045440,0.038836,0.007048
sig_2018_Mchi-94p5_dMchi-9p0_ctau-100,0.043148,0.009605,0.008913,0.008898,0.008613,0.008553,0.007847,0.007608,0.007233,0.005042
sig_2018_Mchi-99p0_dMchi-18p0_ctau-100,0.050391,0.012350,0.011558,0.011505,0.010959,0.010889,0.010130,0.009848,0.008774,0.005462
sig_2018_Mchi-94p5_dMchi-9p0_ctau-10,0.085268,0.030288,0.027886,0.027757,0.027283,0.027026,0.024324,0.024238,0.023162,0.012719
...,...,...,...,...,...,...,...,...,...,...
sig_2018_Mchi-10p5_dMchi-1p0_ctau-1,0.023676,0.007513,0.007096,0.007069,0.006821,0.006759,0.006146,0.006137,0.005844,0.000986
sig_2018_Mchi-10p5_dMchi-1p0_ctau-100,0.006103,0.000537,0.000506,0.000506,0.000489,0.000482,0.000448,0.000448,0.000442,0.000227
sig_2018_Mchi-105p0_dMchi-10p0_ctau-100,0.049334,0.011447,0.010671,0.010637,0.010257,0.010108,0.009116,0.008818,0.008323,0.005566
sig_2018_Mchi-105p0_dMchi-10p0_ctau-10,0.092926,0.034109,0.031574,0.031370,0.030357,0.030057,0.026965,0.026813,0.024834,0.012982


In [12]:
# Per ctau
for ct in [1, 10, 100]:
    points = (si["ctau"] == ct)
    
    df = table.loc[list(si[points].name)]

    outdir = './csv/Cutflow/Efficiency/'
    outname = f'SR_Signal_official_ctau-{int(ct)}_samples_Efficiency'
    
    df_to_save = df
    
    utils.save_df_to_csv(df_to_save, outdir, outname, isSignal=True)

Saved: ./csv/Cutflow/Efficiency//SR_Signal_official_ctau-1_samples_Efficiency.csv
Saved: ./csv/Cutflow/Efficiency//SR_Signal_official_ctau-10_samples_Efficiency.csv
Saved: ./csv/Cutflow/Efficiency//SR_Signal_official_ctau-100_samples_Efficiency.csv


In [15]:
# Weighted event count
pd.options.display.float_format = '{:,.4f}'.format
cts_cutflow = utils.get_signal_cutflow_dict(sig_histo, 'cutflow_cts')

In [16]:
table = cts_cutflow
table.columns = cutnames

In [17]:
# ctau = 1mm
ct = 1.0

points = (si["ctau"] == ct)

df = table.loc[list(si[points].name)]
df

Unnamed: 0,Preselections,n(good vertex) > 0,No b-tagged jets,Leading jet pT > 80 GeV,Leading jet |eta| < 2.4,"dPhi(MET,leading jet) > 2.0","dPhi(MET,all jets) > 0.75",OSSF,theta < 1.22 rad,BDT
sig_2018_Mchi-99p0_dMchi-18p0_ctau-1,94.9762,43.4559,39.7439,39.6666,38.6211,38.2735,35.0267,34.8723,29.8037,5.4088
sig_2018_Mchi-94p5_dMchi-9p0_ctau-1,2510.1938,1080.9674,1013.1992,1010.8288,970.3439,964.3789,882.2668,881.0791,803.6315,126.1695
sig_2018_Mchi-88p0_dMchi-16p0_ctau-1,119.5415,55.8308,51.0345,50.9353,48.9576,48.4137,44.6567,44.4593,39.4619,7.5133
sig_2018_Mchi-84p0_dMchi-8p0_ctau-1,3336.9639,1382.4887,1280.3008,1275.7256,1234.6448,1222.4167,1109.6213,1106.604,1004.726,173.854
sig_2018_Mchi-77p0_dMchi-14p0_ctau-1,163.6005,71.9353,66.0595,65.9822,63.3121,62.4701,57.434,57.2061,51.4874,10.2283
sig_2018_Mchi-73p5_dMchi-7p0_ctau-1,4496.8465,1828.9826,1686.6866,1682.5959,1635.0546,1608.3143,1474.1526,1465.8889,1379.3339,259.7471
sig_2018_Mchi-66p0_dMchi-12p0_ctau-1,238.0476,104.3508,95.1256,95.1256,91.9699,91.1829,84.3348,83.8846,76.458,14.3906
sig_2018_Mchi-63p0_dMchi-6p0_ctau-1,5745.8923,2261.9255,2139.2927,2136.2399,2056.7478,2038.3951,1901.1246,1898.0977,1809.3408,351.3279
sig_2018_Mchi-5p5_dMchi-1p0_ctau-1,4192.7209,1268.6659,1172.2039,1169.193,1121.0151,1104.46,1021.5165,1019.9782,1004.9012,194.5235
sig_2018_Mchi-5p25_dMchi-0p5_ctau-1,81359.917,19618.3532,18241.8327,18145.7996,17514.5476,17457.1314,16137.3007,16118.1408,15698.8656,1832.0281


In [20]:
# Per ctau
for ct in [1, 10, 100]:
    points = (si["ctau"] == ct)
    
    df = table.loc[list(si[points].name)]

    outdir = './csv/Cutflow/Efficiency/'
    outname = f'VR1_Signal_ctau-{int(ct)}_samples_EventCount_Weighted'
    
    df_to_save = df
    
    utils.save_df_to_csv(df_to_save, outdir, outname, isSignal=True)

Saved: ./csv/Cutflow/Efficiency//VR1_Signal_ctau-1_samples_EventCount_Weighted.csv
Saved: ./csv/Cutflow/Efficiency//VR1_Signal_ctau-10_samples_EventCount_Weighted.csv
Saved: ./csv/Cutflow/Efficiency//VR1_Signal_ctau-100_samples_EventCount_Weighted.csv


In [23]:
# Raw count
pd.options.display.float_format = '{:,.4f}'.format
raw_cutflow = utils.get_signal_cutflow_dict(sig_histo, 'cutflow_nevts')

In [24]:
raw_cutflow

Unnamed: 0,Preselections,n(good vertex) > 0,No b-tagged jets,Leading jet pT > 80 GeV,Leading jet |eta| < 2.4,"dPhi(MET,leading jet) > 2.0","dPhi(MET,all jets) > 0.75",OSSF,theta < 1.22 rad,BDT
sig_2018_Mchi-99p0_dMchi-18p0_ctau-10,1956,762,709,707,677,670,607,600,520,281
sig_2018_Mchi-99p0_dMchi-18p0_ctau-1,2457,1124,1028,1026,999,990,906,902,771,140
sig_2018_Mchi-94p5_dMchi-9p0_ctau-100,2875,640,594,593,574,570,523,507,482,336
sig_2018_Mchi-99p0_dMchi-18p0_ctau-100,2860,701,656,653,622,618,575,559,498,310
sig_2018_Mchi-94p5_dMchi-9p0_ctau-10,1985,705,649,646,635,629,566,564,539,296
...,...,...,...,...,...,...,...,...,...,...
sig_2018_Mchi-10p5_dMchi-1p0_ctau-1,2665,846,799,796,768,761,692,691,658,111
sig_2018_Mchi-10p5_dMchi-1p0_ctau-100,4092,360,339,339,328,323,300,300,296,152
sig_2018_Mchi-105p0_dMchi-10p0_ctau-100,2988,693,646,644,621,612,552,534,504,337
sig_2018_Mchi-105p0_dMchi-10p0_ctau-10,1833,673,623,619,599,593,532,529,490,256


In [25]:
# Per ctau
for ct in [1, 10, 100]:
    points = (si["ctau"] == ct)
    
    df = table.loc[list(si[points].name)]

    outdir = './csv/Cutflow/Efficiency/'
    outname = f'SR_Signal_official_ctau-{int(ct)}_samples_Raw'
    
    df_to_save = df
    
    utils.save_df_to_csv(df_to_save, outdir, outname, isSignal=True)

Saved: ./csv/Cutflow/Efficiency//SR_Signal_official_ctau-1_samples_Raw.csv
Saved: ./csv/Cutflow/Efficiency//SR_Signal_official_ctau-10_samples_Raw.csv
Saved: ./csv/Cutflow/Efficiency//SR_Signal_official_ctau-100_samples_Raw.csv


## Bkg

In [51]:
df_nominal = utils.get_bkg_cutflow_df(bkg_histo, 'cutflow_cts')
df_MVA90 = utils.get_bkg_cutflow_df(bkg_histo_MVA90, 'cutflow_cts')
df_MVALoose = utils.get_bkg_cutflow_df(bkg_histo_MVALoose, 'cutflow_cts')

fig, axes = plt.subplots(figsize=(7,7))


plot_dict_bkg_eff = {

    # Select processes
    'processes': ['Total'], # Otherwise, give as a list; ['WJets', 'ZJets', 'Total']

    # Plot display styling
    'ylim': None, # None for default; otherwise [ymin, ymax]
    'doLog': True,
    
    'ylabel': 'Events', # None for default
    'title': rf"Background Cutflow", 
    'label': None,
    'color': None,

    # Plot saving
    'doSave': False,
    'outDir': './plots/cutflow/',
    'outName': ''
}

plot_dict_bkg_eff['label'] = 'Cut-based ID'
plot_dict_bkg_eff['color'] = 'black'

ptools.plot_bkg_efficiency(bkg_histo, df_nominal, plot_dict_bkg_eff)

plot_dict_bkg_eff['label'] = 'MVA90 ID'
plot_dict_bkg_eff['color'] = 'blue'

ptools.plot_bkg_efficiency(bkg_histo_MVA90, df_MVA90, plot_dict_bkg_eff)

fig, axes = plt.subplots(figsize=(7,7))


plot_dict_bkg_eff = {

    # Select processes
    'processes': ['Total'], # Otherwise, give as a list; ['WJets', 'ZJets', 'Total']

    # Plot display styling
    'ylim': None, # None for default; otherwise [ymin, ymax]
    'doLog': True,
    
    'ylabel': 'Events', # None for default
    'title': rf"Background Cutflow", 
    'label': None,
    'color': None,

    # Plot saving
    'doSave': False,
    'outDir': './plots/cutflow/',
    'outName': ''
}

plot_dict_bkg_eff['label'] = 'Cut-based ID'
plot_dict_bkg_eff['color'] = 'black'

ptools.plot_bkg_efficiency(bkg_histo, df_nominal, plot_dict_bkg_eff)

plot_dict_bkg_eff['label'] = 'MVA90 ID'
plot_dict_bkg_eff['color'] = 'blue'

ptools.plot_bkg_efficiency(bkg_histo_MVA90, df_MVA90, plot_dict_bkg_eff)

plot_dict_bkg_eff['label'] = 'MVALoose ID'
plot_dict_bkg_eff['color'] = 'green'

ptools.plot_bkg_efficiency(bkg_histo_MVALoose, df_MVALoose, plot_dict_bkg_eff)


NameError: name 'bkg_histo_MVALoose' is not defined