In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
from bff_plotting_tools.data_getter import get_data
from plotting_meta.plotting_meta import bins, color_cycle, cms_format_fig, Bins

from bff_plotting_tools.make_hists import make_sys, SysHist

In [None]:
era = '2016'
df, lumi = get_data(era)
df = df[df.DiLepMass > 105]

In [None]:
#This dict organizes backgrounds into categories
bck_dict = {
    "DY": ['ZToEE_M_120_200', 'ZToEE_M_200_400', 'ZToEE_M_400_800',
       'ZToEE_M_50_120', 'ZToEE_M_800_1400', 'ZToMuMu_M_120_200',
       'ZToMuMu_M_200_400', 'ZToMuMu_M_400_800', 'ZToMuMu_M_50_120',
       'ZToMuMu_M_800_1400'],
    "TT": ['mc_ttbar'],
    "ST": ['mc_santitop', 'mc_stop'],
    "WW": [ 'mc_ww'],
    "WZ": ['mc_wz'],
    "ZZ": ['mc_zz'],
    "data": ['data_mu', 'data_el']
}

In [None]:
def get_count_unc(reg, name_list):
    '''Get sum of weights meeting some region and name. Returns sum weight and sum weight**2 **.5 (aka uncertainty)'''
    reg_string = "{}_nom".format(reg)
    tdf = df[(df[reg_string]==1) 
      & (df.name.isin(name_list))]
    sys_hist = make_sys(tdf, 'DiLepMass', reg, bin_edges=[105,1e5])
    nominal = sys_hist.nominal[0]
    sys = ((sys_hist.up-sys_hist.down)/2)[0]
    std = sys_hist.std[0]
    return nominal, sys, std

In [None]:
# This dict lists regions and provieds a latex string
region_and_label = {'SR1': "#mu#mu_{b}", 
           'CR10': "#mu#mu_{j}", 
           'CR13': "ee_{b}", 
           'CR14': "ee_{j}", 
           'SR2': "#mu#mu_{1,2 b}", 
           'CR20': "#mu#mu_{2 j}", 
           'CR23': "ee_{1,2 b}", 
           'CR24': "ee_{2 j}", 
          }
# This dict lists regions and provieds a latex string for AN
region_and_label_AN = {'SR1': "\SR", 
           'CR10': "\CRmmj", 
           'CR13': "\CReeb", 
           'CR14': "\CReej", 
           'SR2': "\SRTwo", 
           'CR20': "\CRmmjTwo", 
           'CR23': "\CReebTwo", 
           'CR24': "\CReejTwo", 
          }

In [None]:
stat_list = []
for reg in region_and_label:
    for bcks, name_list in bck_dict.items():
        nom, sys, std = get_count_unc(reg, name_list)
        stat_list.append({"reg": reg, "name": bcks, "nom": nom, "sys":sys, "stat": std})

In [None]:
count_df = pd.DataFrame(stat_list)

In [None]:
# blinds data
count_df.loc[(count_df.reg.str.contains('SR')) & (count_df.name=='data'), 'nom'] = 0 
count_df.loc[(count_df.reg.str.contains('SR')) & (count_df.name=='data'), 'sys'] = 0 
count_df.loc[(count_df.reg.str.contains('SR')) & (count_df.name=='data'), 'stat'] = 0 

In [None]:
import pandas as pd
count_df = count_df.to_csv('output/count_csv_region_{}.csv'.format(era))

In [None]:
count_df = pd.read_csv('output/count_csv_region_{}.csv'.format(era))

In [None]:
#reformating it in a way to be easier to read in AN
usestring = True
reg_list = []
for reg in region_and_label:
    bck_string_dict = {}
    bck_string_dict['reg'] = region_and_label_AN[reg]
    for bcks, name_list in bck_dict.items():
        tcdf = count_df[(count_df.reg==reg) & (count_df.name==bcks)]
        row = tcdf.iloc[0]
        
        if usestring:
            bck_string_dict[bcks] = "{:.1f}+/-{:.1f}".format(row.nom, (row.sys**2+row.stat**2)**.5)
        else:
            bck_string_dict[bcks] = uncertainties.ufloat(row.nom, (row.sys**2+row.stat**2)**.5)
    reg_list.append(bck_string_dict)

In [None]:
count_df_by_reg = pd.DataFrame(reg_list)

In [None]:
print(count_df_by_reg[['reg', 'DY', 'TT', 'ST', 'WW', 'WZ', 'ZZ']].to_latex(escape=False, index=False))

In [None]:
count_df_by_reg

In [None]:
#reformating it in a way to be easier to read in AN
reg_list_norm = []
for reg in region_and_label:
    bck_string_dict = {}
    bck_string_dict['reg'] = region_and_label_AN[reg]
    total_events = count_df[(count_df.reg==reg) & (count_df.name!='data')]
    sum_events = total_events.nom.sum()
    for bcks, name_list in bck_dict.items():
        tcdf = count_df[(count_df.reg==reg) & (count_df.name==bcks)]
        row = tcdf.iloc[0]
        bck_string_dict[bcks] = "{:.2f}+/-{:.2f}".format(row.nom/sum_events, (row.sys**2+row.stat**2)**.5/sum_events)
    reg_list_norm.append(bck_string_dict)
count_df_by_reg_norm = pd.DataFrame(reg_list_norm)

In [None]:
print(count_df_by_reg_norm[['reg', 'DY', 'TT', 'ST', 'WW', 'WZ', 'ZZ']].to_latex(escape=False, index=False))

In [None]:
def signficance(row):
    return row.BFF200/(row.BFF200+row.DY+row.TT)**.5

In [None]:
count_df_by_reg.apply(signficance, axis=1)

In [None]:
count_df_by_reg

In [None]:
count_df_by_reg.apply(signficance, axis=1)