In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import uncertainties

In [3]:
from bff_plotting_tools.data_getter import get_data
from plotting_meta.plotting_meta import bins, color_cycle, cms_format_fig, Bins

from bff_plotting_tools.make_hists import make_sys, SysHist

In [4]:
era = '2016'
wdf, lumi = get_data(era)

In [5]:
#This dict organizes backgrounds into categories
bck_dict = {
    "DY": ['ZToEE_M_120_200', 'ZToEE_M_200_400', 'ZToEE_M_400_800',
       'ZToEE_M_50_120', 'ZToEE_M_800_1400', 'ZToMuMu_M_120_200',
       'ZToMuMu_M_200_400', 'ZToMuMu_M_400_800', 'ZToMuMu_M_50_120',
       'ZToMuMu_M_800_1400'],
    "TT": ['mc_ttbar'],
    "ST": ['mc_santitop', 'mc_stop'],
    "WW": [ 'mc_ww'],
    "WZ": ['mc_wz'],
    "ZZ": ['mc_zz'],
    "BFF200": ['BFFZprimeToMuMu_M_200_dbs0p04']
}
for sample in bck_dict:
    bck_dict[sample] += [smp+"_DeepFlavour"  for smp in bck_dict[sample]]

In [6]:
def get_count_unc(_df, reg, name_list):
    '''Get sum of weights meeting some region and name. Returns sum weight and sum weight**2 **.5 (aka uncertainty)'''
    reg_string = "{}_nom".format(reg)
    tdf = _df[(_df[reg_string]==1) 
      & (_df.name.isin(name_list))]
    sys_hist = make_sys(tdf, 'DiLepMass', reg, bin_edges=[0,1e5])
    nominal = sys_hist.nominal[0]
    sys = ((sys_hist.up-sys_hist.down)/2)[0]
    std = sys_hist.std[0]
    return nominal, sys, std

In [7]:
# This dict lists regions and provieds a latex string
region_and_label = {'SR1': "#mu#mu_{b}", 
           'CR10': "#mu#mu_{j}", 
           'CR13': "ee_{b}", 
           'CR14': "ee_{j}", 
           'SR2': "#mu#mu_{1,2 b}", 
           'CR20': "#mu#mu_{2 j}", 
           'CR23': "ee_{1,2 b}", 
           'CR24': "ee_{2 j}", 
          }
# This dict lists regions and provieds a latex string for AN
region_and_label_AN = {'SR1': "\SR", 
           'CR10': "\CRmmj", 
           'CR13': "\CReeb", 
           'CR14': "\CReej", 
           'SR2': "\SRTwo", 
           'CR20': "\CRmmjTwo", 
           'CR23': "\CReebTwo", 
           'CR24': "\CReejTwo", 
          }

In [8]:
def make_count_by_region(_df):
    stat_list = []
    for reg in region_and_label:
        for bcks, name_list in bck_dict.items():
            nom, sys, std = get_count_unc(_df, reg, name_list)
            stat_list.append({"reg": reg, "name": bcks, "nom": nom, "sys":sys, "stat": std})
    count_df = pd.DataFrame(stat_list)
    #reformating it in a way to be easier to read in AN
    usestring = False
    reg_list = []
    for reg in region_and_label:
        bck_string_dict = {}
        bck_string_dict['reg'] = region_and_label_AN[reg]
        for bcks, name_list in bck_dict.items():
            tcdf = count_df[(count_df.reg==reg) & (count_df.name==bcks)]
            row = tcdf.iloc[0]
            
            if usestring:
                bck_string_dict[bcks] = "{:.1f}+/-{:.1f}".format(row.nom, (row.sys**2+row.stat**2)**.5)
            else:
                bck_string_dict[bcks] = uncertainties.ufloat(row.nom, (row.sys**2+row.stat**2)**.5)
        reg_list.append(bck_string_dict)
    return pd.DataFrame(reg_list)

In [9]:
count_df_by_reg = make_count_by_region(wdf)

In [10]:
print(count_df_by_reg.to_latex(escape=False, index=False))

\begin{tabular}{llllllll}
\toprule
      reg &                DY &                TT &         ST &          WW &        WZ &         ZZ &            BFF200 \\
\midrule
      \SR &   (5.0+/-0.8)e+03 &   (3.8+/-0.7)e+03 &   322+/-19 &   1.3+/-0.5 & 4.0+/-1.9 &  6.6+/-1.5 & (1.35+/-0.25)e+03 \\
   \CRmmj &   (5.4+/-0.5)e+04 &   (1.9+/-0.4)e+03 &   192+/-12 &  23.9+/-2.7 &    52+/-7 &     43+/-5 &   (7.3+/-1.5)e+02 \\
   \CReeb & (2.11+/-0.29)e+03 &   (2.1+/-0.4)e+03 &    178+/-9 & 0.49+/-0.35 &   0.0+/-0 &  3.4+/-1.0 &           0.0+/-0 \\
   \CReej & (2.84+/-0.27)e+04 & (1.04+/-0.20)e+03 &    114+/-8 &  12.9+/-1.8 &    36+/-6 & 25.4+/-3.0 &           0.0+/-0 \\
   \SRTwo & (1.35+/-0.18)e+03 &   (5.8+/-1.1)e+03 &    132+/-9 &   0.8+/-0.4 & 3.4+/-1.9 &  5.4+/-1.4 &   (3.9+/-0.7)e+02 \\
\CRmmjTwo & (1.10+/-0.11)e+04 &   (9.5+/-2.0)e+02 & 45.1+/-2.7 &   2.1+/-0.7 &    23+/-5 & 23.2+/-3.2 &          105+/-20 \\
\CReebTwo &   (7.1+/-1.1)e+02 &   (3.2+/-0.6)e+03 & 79.5+/-2.3 &   0.7+/-0.4 & 1.

In [11]:
def signficance(row):
    return row.BFF200/(row.BFF200+row.DY+row.TT)**.5

In [36]:
def make_counts_of_regions():
    deepcsv_df = wdf[wdf.deepflavor==0]
    deepcsv_df_narror = deepcsv_df[abs(deepcsv_df.DiLepMass-200) < 10]
    
    deepflavour_df = wdf[wdf.deepflavor==1]
    deepflavour_df_narror = deepflavour_df[abs(deepflavour_df.DiLepMass-200) < 10]
    
    deepcsv_counts = make_count_by_region(deepcsv_df)
    deepcsv_narrow_counts = make_count_by_region(deepcsv_df_narror)
    
    deepflavour_counts = make_count_by_region(deepflavour_df)
    deepflavour_narrow_counts = make_count_by_region(deepflavour_df_narror)
    
    return deepcsv_counts, deepcsv_narrow_counts, deepflavour_counts, deepflavour_narrow_counts

In [37]:
deepcsv_counts, deepcsv_narrow_counts, deepflavour_counts, deepflavour_narrow_counts = make_counts_of_regions()

In [55]:
def print_counts(_df):
    print(_df[['reg', 'DY', 'TT', 'BFF200']].to_latex())
    print(signficance(_df).to_latex())

In [56]:
print_counts(deepcsv_counts)

\begin{tabular}{lllll}
\toprule
{} &        reg &                 DY &                 TT &           BFF200 \\
\midrule
0 &        \textbackslash SR &  (1.48+/-0.23)e+03 &  (1.06+/-0.20)e+03 &  (4.2+/-0.8)e+02 \\
1 &     \textbackslash CRmmj &  (1.86+/-0.17)e+04 &    (6.5+/-1.3)e+02 &  (2.7+/-0.5)e+02 \\
2 &     \textbackslash CReeb &    (8.1+/-1.1)e+02 &    (5.7+/-1.1)e+02 &          0.0+/-0 \\
3 &     \textbackslash CReej &  (1.19+/-0.11)e+04 &    (3.5+/-0.7)e+02 &          0.0+/-0 \\
4 &     \textbackslash SRTwo &    (4.3+/-0.7)e+02 &  (1.63+/-0.31)e+03 &         123+/-21 \\
5 &  \textbackslash CRmmjTwo &    (3.8+/-0.4)e+03 &    (3.5+/-0.7)e+02 &           40+/-7 \\
6 &  \textbackslash CReebTwo &    (2.9+/-0.6)e+02 &    (9.1+/-1.7)e+02 &          0.0+/-0 \\
7 &  \textbackslash CReejTwo &  (2.25+/-0.30)e+03 &    (1.9+/-0.4)e+02 &          0.0+/-0 \\
\bottomrule
\end{tabular}

\begin{tabular}{ll}
\toprule
{} &            0 \\
\midrule
0 &    7.8+/-1.4 \\
1 &    1.9+/-0.4 \\
2 &      

In [57]:
print_counts(deepflavour_counts)

\begin{tabular}{lllll}
\toprule
{} &        reg &                 DY &                 TT &           BFF200 \\
\midrule
0 &        \textbackslash SR &    (3.5+/-0.6)e+03 &    (2.7+/-0.5)e+03 &  (9.3+/-1.7)e+02 \\
1 &     \textbackslash CRmmj &  (3.55+/-0.33)e+04 &  (1.28+/-0.25)e+03 &  (4.7+/-0.9)e+02 \\
2 &     \textbackslash CReeb &  (1.30+/-0.20)e+03 &  (1.49+/-0.27)e+03 &          0.0+/-0 \\
3 &     \textbackslash CReej &  (1.65+/-0.18)e+04 &    (6.9+/-1.3)e+02 &          0.0+/-0 \\
4 &     \textbackslash SRTwo &    (9.2+/-1.4)e+02 &    (4.1+/-0.8)e+03 &  (2.7+/-0.5)e+02 \\
5 &  \textbackslash CRmmjTwo &    (7.3+/-0.8)e+03 &    (6.0+/-1.3)e+02 &          65+/-12 \\
6 &  \textbackslash CReebTwo &    (4.2+/-0.8)e+02 &    (2.3+/-0.4)e+03 &          0.0+/-0 \\
7 &  \textbackslash CReejTwo &    (2.9+/-0.5)e+03 &    (3.2+/-0.7)e+02 &          0.0+/-0 \\
\bottomrule
\end{tabular}

\begin{tabular}{ll}
\toprule
{} &            0 \\
\midrule
0 &   10.9+/-2.0 \\
1 &    2.4+/-0.5 \\
2 &      

In [58]:
print_counts(deepcsv_narrow_counts)

\begin{tabular}{lllll}
\toprule
{} &        reg &                 DY &        TT &           BFF200 \\
\midrule
0 &        \textbackslash SR &            67+/-14 &  112+/-21 &  (3.9+/-0.7)e+02 \\
1 &     \textbackslash CRmmj &  (1.23+/-0.12)e+03 &   67+/-13 &  (2.5+/-0.5)e+02 \\
2 &     \textbackslash CReeb &            59+/-12 &   62+/-11 &          0.0+/-0 \\
3 &     \textbackslash CReej &    (9.6+/-1.2)e+02 &    40+/-8 &          0.0+/-0 \\
4 &     \textbackslash SRTwo &             15+/-5 &  173+/-33 &         114+/-20 \\
5 &  \textbackslash CRmmjTwo &    (2.9+/-0.4)e+02 &    36+/-8 &           37+/-7 \\
6 &  \textbackslash CReebTwo &             19+/-6 &   98+/-18 &          0.0+/-0 \\
7 &  \textbackslash CReejTwo &           126+/-18 &    21+/-4 &          0.0+/-0 \\
\bottomrule
\end{tabular}

\begin{tabular}{ll}
\toprule
{} &           0 \\
\midrule
0 &  16.4+/-2.1 \\
1 &   6.3+/-1.2 \\
2 &     0.0+/-0 \\
3 &     0.0+/-0 \\
4 &   6.6+/-1.0 \\
5 &   2.0+/-0.4 \\
6 &     0.0+/-0 \

In [59]:
print_counts(deepflavour_narrow_counts)

\begin{tabular}{lllll}
\toprule
{} &        reg &                 DY &               TT &           BFF200 \\
\midrule
0 &        \textbackslash SR &    (2.0+/-0.4)e+02 &  (2.9+/-0.5)e+02 &  (8.6+/-1.6)e+02 \\
1 &     \textbackslash CRmmj &  (2.43+/-0.24)e+03 &         133+/-26 &  (4.3+/-0.9)e+02 \\
2 &     \textbackslash CReeb &            48+/-21 &         161+/-30 &          0.0+/-0 \\
3 &     \textbackslash CReej &    (8.9+/-2.2)e+02 &          77+/-15 &          0.0+/-0 \\
4 &     \textbackslash SRTwo &            50+/-16 &  (4.4+/-0.8)e+02 &  (2.5+/-0.4)e+02 \\
5 &  \textbackslash CRmmjTwo &    (5.7+/-0.7)e+02 &          63+/-13 &          61+/-12 \\
6 &  \textbackslash CReebTwo &            24+/-14 &  (2.5+/-0.5)e+02 &          0.0+/-0 \\
7 &  \textbackslash CReejTwo &            75+/-27 &           34+/-7 &          0.0+/-0 \\
\bottomrule
\end{tabular}

\begin{tabular}{ll}
\toprule
{} &           0 \\
\midrule
0 &  23.4+/-3.0 \\
1 &   7.9+/-1.5 \\
2 &     0.0+/-0 \\
3 &     0.0

In [50]:
deepcsv_narrow_counts[['reg', 'DY', 'TT', 'BFF200']]

Unnamed: 0,reg,DY,TT,BFF200
0,\SR,67+/-14,112+/-21,(3.9+/-0.7)e+02
1,\CRmmj,(1.23+/-0.12)e+03,67+/-13,(2.5+/-0.5)e+02
2,\CReeb,59+/-12,62+/-11,0.0+/-0
3,\CReej,(9.6+/-1.2)e+02,40+/-8,0.0+/-0
4,\SRTwo,15+/-5,173+/-33,114+/-20
5,\CRmmjTwo,(2.9+/-0.4)e+02,36+/-8,37+/-7
6,\CReebTwo,19+/-6,98+/-18,0.0+/-0
7,\CReejTwo,126+/-18,21+/-4,0.0+/-0


In [52]:
deepflavour_narrow_counts[['reg', 'DY', 'TT', 'BFF200']]

Unnamed: 0,reg,DY,TT,BFF200
0,\SR,(2.0+/-0.4)e+02,(2.9+/-0.5)e+02,(8.6+/-1.6)e+02
1,\CRmmj,(2.43+/-0.24)e+03,133+/-26,(4.3+/-0.9)e+02
2,\CReeb,48+/-21,161+/-30,0.0+/-0
3,\CReej,(8.9+/-2.2)e+02,77+/-15,0.0+/-0
4,\SRTwo,50+/-16,(4.4+/-0.8)e+02,(2.5+/-0.4)e+02
5,\CRmmjTwo,(5.7+/-0.7)e+02,63+/-13,61+/-12
6,\CReebTwo,24+/-14,(2.5+/-0.5)e+02,0.0+/-0
7,\CReejTwo,75+/-27,34+/-7,0.0+/-0


In [53]:
signficance(deepcsv_narrow_counts)

0    16.4+/-2.1
1     6.3+/-1.2
2       0.0+/-0
3       0.0+/-0
4     6.6+/-1.0
5     2.0+/-0.4
6       0.0+/-0
7       0.0+/-0
dtype: object

In [54]:
signficance(deepflavour_narrow_counts)

0    23.4+/-3.0
1     7.9+/-1.5
2       0.0+/-0
3       0.0+/-0
4     9.1+/-1.4
5     2.3+/-0.4
6       0.0+/-0
7       0.0+/-0
dtype: object