In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import uncertainties as unc
from uncertainties import unumpy

In [None]:
from src.plotting_tools.cms_format import cms_format_fig, cms_style
cms_style()
from src.data_tools.get_file_list import get_file_df
from src.data_tools.apply_cuts_v2 import  process_sample
from src.plotting_tools.SysHist import make_sys_hist, SysHist
from src.assets.lumi import lumi_dict
from src.assets.regions import region_and_label

In [None]:
output_dir = '/eos/cms/store/group/phys_exotica/bffZprime/assets_june_23'
era='2018'

In [None]:
skimmed_df = get_file_df(path = "{}/data".format(output_dir))
skimmed_df = skimmed_df[skimmed_df.category=='BFF']

In [None]:
unskimmed_df = get_file_df(path = "{}/data_gen_b_s".format(output_dir))
unskimmed_df = unskimmed_df[unskimmed_df.category=='BFF']

In [None]:
def isKthBitSet(value, k):
    return bool(value & (1 << k))

def multiplicity(value):
    multi = -1
    # in some cases, multple bits can be set, e.g. it is in the 0b state, so the 0 bit is added, but it has 2 outgoing bs
    # so only the highest multplicity case is taken
    for i in range(18):
        # no 2s
        #if i in [15,16]: continue
        if isKthBitSet(value, i): multi = i
    return multi
# 0j: 0,1, 1b: 2,3,4, 1s: 5,6, 1b+1s: 7,8,9,10, 2b: 11,12,13,14, 2s: 15, 16, underlying b jet, but otherwise 0b case: 17
def is0b(value):
    return multiplicity(value) in [0,1, 17]

def is1b(value):
    return multiplicity(value) in [2,3,4]

def is1s(value):
    return multiplicity(value) in [5,6]

def is1b1s(value):
    return multiplicity(value) in [7,8,9,10]

def is2b(value):
    return multiplicity(value) in [11,12,13,14]


def which_bits(value):
    string = ""
    for i in range(18):
        if isKthBitSet(value, i): string+="{},".format(i)
    return string[:-1]

In [None]:
def make_hist(row):
    tdf = pd.read_csv(row.file)
    multiplicity_values = tdf.GenMultiplicity.apply(multiplicity)
    unique = range(22)
    return {**{value: (multiplicity_values==value).sum() for value in unique}, **row.to_dict()}

In [None]:
def make_sum(_df, weight, postfix=''):
    #only takes constant weights
    tsum = np.histogram(_df.GenMultiplicity.apply(multiplicity), bins=np.linspace(-0.5,23.5, 24))[0]
    return  tsum*np.mean(weight), tsum**.5*np.mean(weight)

In [None]:
def setup_df(_df, recompute_weights=False, xsec=0, lumi=0):
    #select states
    _df['0b'] = _df.GenMultiplicity.apply(is0b)
    _df['1b'] = _df.GenMultiplicity.apply(is1b)
    _df['1s'] = _df.GenMultiplicity.apply(is1s)
    _df['1b1s'] = _df.GenMultiplicity.apply(is1b1s)
    _df['2b'] = _df.GenMultiplicity.apply(is2b)
    _df['uncategorized'] =  1-(_df['0b'] | _df['1b'] | _df['1s'] | _df['1b1s'] | _df['2b'])
    # general multplicity
    _df['multiplicity'] = _df.GenMultiplicity.apply(multiplicity)
    
    #set weights
    if xsec:
        nEvents = _df.shape[0]
        uweight = float(xsec)*lumi/float(nEvents)
        _df['sample_weight'] = uweight 
    if recompute_weights:
        _df['fsr_isr_weightDown'] = _df['Weight_ISRFSR_Down']/_df['Weight']
        _df['fsr_isr_weightUp'] = _df['Weight_ISRFSR_Up']/_df['Weight']
        _df['pdf_weightDown'] = _df['Weight_PDF_Down']/_df['Weight']
        _df['pdf_weightUp'] = _df['Weight_PDF_Up']/_df['Weight']
    _df['Weight_ISRFSR_Down'] = _df['fsr_isr_weightDown']*_df['sample_weight']
    _df['Weight_ISRFSR_Up'] = _df['fsr_isr_weightUp']*_df['sample_weight']
    _df['Weight_PDF_Down'] = _df['pdf_weightDown']*_df['sample_weight']
    _df['Weight_PDF_Up'] = _df['pdf_weightUp']*_df['sample_weight']  
  
    return _df

In [None]:
def make_ratio(nom_dict, denom_dict):
    def safe_divide(a, b):
        if b==0: return np.inf
        else: return a/b
    return  {x[0]+"_ratio": safe_divide(x[1],y[1]) for x, y in zip(nom_dict.items(), denom_dict.items())}

In [None]:
def do_row(row):
    era = row.era
    lumi = lumi_dict[str(row.era)]
    # get dfs
    tudf = unskimmed_df[(unskimmed_df.mass==row.mass)&
                       (unskimmed_df.dbs==row.dbs) & 
                       (unskimmed_df.era==row.era)]
    #unskimmed df
    urow = tudf.iloc[0]
    print(urow.file)
    udf = pd.read_csv(urow.file)
    print(urow.file)
    udf = setup_df(udf, xsec=urow.xsec, lumi=lumi)
    
    #processes sample
    df = process_sample(row, era, verbose=True, trigger_fix=True)
    df = setup_df(df, recompute_weights=True)
    
    # nominal weight
    for weight in ["sample_weight",  "Weight_ISRFSR_Up", "Weight_ISRFSR_Down", "Weight_PDF_Up", "Weight_PDF_Down"]:
        u_sum, _ = make_sum(udf, udf[weight], postfix='_u')
        for reg in ['SR1_jet_nom_muon_corrected_pt_ele_pt', 'SR2_jet_nom_muon_corrected_pt_ele_pt']: 
            tdf = df[df[reg]==1]
            regstr = 'SR1' if 'SR1' in reg else 'SR2'
    
            r_sum, r_err = make_sum(tdf, tdf[weight])
            
            r_ratio = r_sum/u_sum
            r_err_ratio = r_err/u_sum
            r_ratio_ufloat = np.array([unc.ufloat(r, re) for r, re in zip(r_ratio, r_err_ratio)])
            reg_hist.append({"era": row.era, "mass": int(row.mass), "dbs": row.dbs,
                             "reg": regstr, "weight": weight, 
                            "r_sum": list(r_sum), "r_err":list(r_err), 
                             "u_sum": list(u_sum), "r_ratio_ufloat": list(r_ratio_ufloat)})
 

In [None]:
from time import perf_counter 
import datetime

reg_hist = []
sdf = skimmed_df
j = 0 
start = perf_counter()
for i, row in sdf.iterrows():
    j+=1
    print("-----")
    #if row.mass > 350: continue
    print(row)
    do_row(row)
    
    # time estimate
    delta = perf_counter() - start
    timeper = delta/j
    time_remaining = (sdf.shape[0] - j)*timeper
    print(j, sdf.shape[0], "{:.1f}".format(j/sdf.shape[0]), timeper, str(datetime.timedelta(seconds=time_remaining)))


In [None]:
tempdf = pd.DataFrame(reg_hist)
tempdf.to_csv(output_dir+"/data_gen_b_s/multiplicity_eff.csv")

In [None]:
break

In [None]:
def list_from_str(string):
    arr = string.strip('][').split(', ')
    if '+/-' in arr[0]: arr = [unc.ufloat_fromstr(x) for x in arr]
    else: arr = [float(x) for x in arr]
    return np.array(arr)

In [None]:
rdf = pd.read_csv(output_dir+"/data_gen_b_s/multiplicity_eff.csv")
rdf['r_sum'] = rdf.r_sum.apply(list_from_str)
rdf['r_err'] = rdf.r_err.apply(list_from_str)
rdf['u_sum'] = rdf.u_sum.apply(list_from_str)
rdf['r_ratio_ufloat'] = rdf.r_ratio_ufloat.apply(list_from_str)

rdf = rdf[ ~((rdf.mass==125) & (rdf.dbs==1.0) & (rdf.era>2016))]

In [None]:
# this is necesary because I set zero denoms to 1 previously
def set_one_zero(arr): arr[arr==1] = 0
rdf.u_sum.apply(set_one_zero)

In [None]:
def select(df, **kwargs):
    _df = df
    for k, v in kwargs.items():
        _df = _df[_df[k]==v]
    return _df

In [None]:
for mul in range(23):
    for reg in ['SR1', 'SR2']:
        #make era, dbs plot
        fig, ax = plt.subplots()
        cms_format_fig('Run 2', ax)
        #ax.set_ylim(0,25)
        ax.set_ylabel('Acceptance')
        ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')

        for c, era in zip(['r', 'g', 'b'], [2016, 2017, 2018]):
            trdf = rdf[(rdf.era==era) & (rdf.reg==reg) & (rdf.mass<400) ]
            for ls, dbs in zip(['solid', 'dashed', 'dotted'], trdf.dbs.unique()):
                tdrdf = trdf[trdf.dbs==dbs]
                tdrdf = tdrdf.sort_values('mass')
                r_sum = tdrdf.r_sum.apply(lambda x: x[mul])
                r_err = tdrdf.r_err.apply(lambda x: x[mul])
                u_sum = tdrdf.u_sum.apply(lambda x: x[mul])
                ax.errorbar(tdrdf.mass, r_sum/u_sum, yerr=r_err/u_sum , color=c, ls=ls, label='{} {}'.format(dbs, era))
        plt.legend(title='{} {}'.format(mul, reg), ncol=2)
        plt.show()
        plt.close()

In [None]:
# combine multiplicities
def sum_via_index(arr, *args):
    value = 0
    for v in args:
        value += arr[v]
    return value
def ratio_via_index(_df, *args):
    r_sum = sum_via_index(_df.r_sum_unp, *args)
    u_sum = sum_via_index(_df.u_sum, *args)
    return r_sum/u_sum
def make_unumpy(row):
    return unumpy.uarray(row.r_sum, row.r_err)

In [None]:
accept_list = []
for reg in ['SR1', 'SR2']:
    for mass in rdf.mass.unique():
        print(mass, reg)
        tdf = select(rdf, reg=reg, mass=mass)
        nera = len(tdf.era.unique())
        if nera!=3: continue
        tdf['r_sum_unp'] = tdf.apply(make_unumpy, axis=1)            
        stdf = tdf.sum()
        reg_mass_dict = {}
        reg_mass_dict['0b(1b)'] = ratio_via_index(stdf, 1, 21)
        reg_mass_dict['0b(2b)'] = ratio_via_index(stdf, 0, 17)
        reg_mass_dict['1b'] = ratio_via_index(stdf, 2, 3, 4, 10)
        reg_mass_dict['1s'] = ratio_via_index(stdf, 5,6)
        reg_mass_dict['1b+1s'] = ratio_via_index(stdf, 7, 8,9, 19)
        reg_mass_dict['2b'] = ratio_via_index(stdf, 11,12,13,14)
        reg_mass_dict['2s'] = ratio_via_index(stdf,  15, 16)
        reg_mass_dict['mass'] = mass
        reg_mass_dict['reg'] = reg
        accept_list.append(reg_mass_dict)
        
        

In [None]:
acc_df = pd.DataFrame(accept_list)

In [None]:
def plot_reg(ax, _df, key, **kwargs):
    _tacc_df = tacc_df.sort_values('mass')
    nom, std = unumpy.nominal_values(_tacc_df[key]), unumpy.std_devs(_tacc_df[key])
    ax.errorbar(_tacc_df.mass, nom*100, yerr=std*100, **kwargs)

In [None]:
for reg in ['SR1', 'SR2']:
    tacc_df = acc_df[acc_df.reg==reg]
    tacc_df = tacc_df[tacc_df.mass <400]
    
    fig, ax = plt.subplots()
    cms_format_fig('Run 2', ax)
    ax.set_ylim(0,25)
    ax.set_ylabel('Acceptance [\%]')
    ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')
    
    plot_reg(ax, tacc_df, '0b(1b)', label='0b(1b)')
    plot_reg(ax, tacc_df, '0b(2b)', label='0b(2b)')

    plot_reg(ax, tacc_df, '1b', label='1b')
    plot_reg(ax, tacc_df, '1s', label='1s')
    plot_reg(ax, tacc_df, '1b+1s', label='1b+1s')
    plot_reg(ax, tacc_df, '2b', label='2b')
    #plot_reg(ax, tacc_df, '2s', label='2s')
    plt.legend()
    plt.ylim(0, 25)
    fig.savefig('{}/gen_b_s/{}_acceptance.pdf'.format(output_dir, reg))
    plt.show()
    plt.clf()

In [None]:
acc_df

In [None]:
fig, ax = plt.subplots()
for reg in ['SR1', 'SR2']:
    tacc_df = acc_df[acc_df.reg==reg]
    tacc_df = tacc_df[tacc_df.mass <400]
    
    
    cms_format_fig('Run 2', ax)
    ax.set_ylim(0,25)
    ax.set_ylabel('Acceptance [\%]')
    ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')

    plot_reg(ax, tacc_df, '1b', label='1b {}'.format(reg))
    plt.legend()
    plt.ylim(0, .25)

    #plt.show()
    #plt.clf()

In [None]:
for reg in ['SR1', 'SR2']:
    tacc_df = acc_df[acc_df.reg==reg]
    tacc_df = tacc_df[tacc_df.mass <400]
    
    fig, ax = plt.subplots()
    cms_format_fig('Run 2', ax)
    ax.set_ylim(0,25)
    ax.set_ylabel('Acceptance [\%]')
    ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')
    
    plot_reg(ax, tacc_df, '0b', label='0b')
    plot_reg(ax, tacc_df, '1b', label='1b')
    plot_reg(ax, tacc_df, '1s', label='1s')
    plot_reg(ax, tacc_df, '1b+1s', label='1b+1s')
    plot_reg(ax, tacc_df, '2b', label='2b')
    plt.legend()
    plt.ylim(0, .25)

    plt.show()
    plt.clf()

In [None]:
for reg in ['SR1', 'SR2']:
    tacc_df = acc_df[acc_df.reg==reg]
    tacc_df = tacc_df[tacc_df.mass <400]
    
    fig, ax = plt.subplots()
    cms_format_fig('Run 2', ax)
    ax.set_ylim(0,25)
    ax.set_ylabel('Acceptance [\%]')
    ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')
    
    plot_reg(ax, tacc_df, '0b', label='0b')
    plot_reg(ax, tacc_df, '1b', label='1b')
    plot_reg(ax, tacc_df, '1s', label='1s')
    plot_reg(ax, tacc_df, '1b+1s', label='1b+1s')
    plot_reg(ax, tacc_df, '2b', label='2b')
    plt.legend()
    plt.ylim(0, .25)

    plt.show()
    plt.clf()

In [None]:
p04s = select(rdf, reg='SR1', mass=350, dbs=0.04)

In [None]:
p5s = select(rdf, reg='SR1', mass=350, dbs=0.5)

In [None]:
for i, (p5, p04) in enumerate(zip(p5s.r_ratio_ufloat.sum(), p04s.r_ratio_ufloat.sum())):
    if p04*p5 == 0: continue
    if abs(unc.nominal_value(p5-p04)) < abs(unc.std_dev(p5-p04)): continue
    print("{} {:.2f} {:.2f} {:.2f}".format( i, (p5-p04)*100/3, p5*100/3, p04*100/3))

In [None]:
0, 2, 3

In [None]:
reg_hist = []
do_row(skimmed_df.loc[2])
do_row(skimmed_df.loc[0])

In [None]:
df = pd.DataFrame(reg_hist)
df = df[(df.weight=='sample_weight')]

In [None]:
df

In [None]:
for i, uflt in enumerate((df[df.reg=='SR1'].iloc[0].r_ratio_ufloat)):
    print(i, uflt)
    
df[df.reg=='SR1'].iloc[0]

In [None]:
for i, uflt in enumerate((df[df.reg=='SR1'].iloc[1].r_ratio_ufloat)):
    print(i, uflt)
df[df.reg=='SR1'].iloc[1]

In [None]:
sum(df[df.reg=='SR1'].iloc[0].r_ratio_ufloat[2:5]), sum(df[df.reg=='SR1'].iloc[1].r_ratio_ufloat[2:5])

In [None]:
0,3, 17

In [None]:
df150 = pd.read_csv(skimmed_df.file[5])
df150 = df150[df150.SR2_jet_nom_muon_corrected_pt_ele_pt==1]
df150.GenMultiplicity.apply(is1b1s).sum()/df125.shape[0]

In [None]:
from time import perf_counter 
import datetime

reg_hist = []
sdf = skimmed_df
j = 0 
start = perf_counter()
for i, row in sdf.iterrows():
    j+=1
    print("-----")
    print(row)
    do_row(row)
    
    # time estimate
    delta = perf_counter() - start
    timeper = delta/j
    time_remaining = (sdf.shape[0] - j)*timeper
    print(j, sdf.shape[0], "{:.1f}".format(j/sdf.shape[0]), timeper, str(datetime.timedelta(seconds=time_remaining)))


In [None]:
tempdf = pd.DataFrame(reg_hist)
tempdf.to_csv(output_dir+"/data_gen_b_s/efficency_df.csv")

In [None]:
tempdf.to_csv(output_dir+"/data_gen_b_s/efficency_df.csv")

In [None]:
rdf = pd.read_csv(output_dir+"/data_gen_b_s/efficency_df.csv")


In [None]:
trdf = rdf[(rdf.reg=='SR2') & (rdf.weight=='sample_weight') & (rdf.mass<150)]

In [None]:
trdf[['era', 'mass', 'dbs', 'reg', 'weight', '1b1s', '1b1s_u', '1b1s_ratio_ufloat']]

In [None]:
rdf = pd.read_csv(output_dir+"/data_gen_b_s/efficency_df.csv")
rdf['0b_ratio_ufloat'] = rdf['0b_ratio_ufloat'].apply(unc.ufloat_fromstr)
rdf['1b_ratio_ufloat'] = rdf['1b_ratio_ufloat'].apply(unc.ufloat_fromstr)
rdf['1s_ratio_ufloat'] = rdf['1s_ratio_ufloat'].apply(unc.ufloat_fromstr)
rdf['1b1s_ratio_ufloat'] = rdf['1b1s_ratio_ufloat'].apply(unc.ufloat_fromstr)
rdf['2b_ratio_ufloat'] = rdf['2b_ratio_ufloat'].apply(unc.ufloat_fromstr)

In [None]:
def weighted_mean(ufloats):
    noms, stds = unumpy.nominal_values(ufloats), unumpy.std_devs(ufloats)
    stds[stds==0]=np.inf
    weights = 1/stds**2
    return np.multiply(ufloats,weights).sum(axis=0)/weights.sum(axis=0)
def average_df(**kwargs):
    df_filter = []
    for k,v in kwargs.items():
        if len(df_filter) == 0 : df_filter = (rdf[k] == v)
        else: df_filter = df_filter & (rdf[k] == v)   
    
    tdf = rdf[df_filter]
    
    multiplicities = tdf[['0b_ratio_ufloat', '1b_ratio_ufloat', '1s_ratio_ufloat', '1b1s_ratio_ufloat', '2b_ratio_ufloat']]
    average =  weighted_mean(multiplicities)
    return {**kwargs, **average.to_dict()}

In [None]:
average_list = []
regions, masses, weights = rdf.reg.unique(), rdf.mass.unique(), rdf.weight.unique()
for reg in regions:
    print(reg)
    for mass in masses:
        print("\t ", mass)
        for weight in weights:
            average_list.append(average_df(reg=reg, mass=mass, weight=weight))
            

In [None]:
average_df = pd.DataFrame(average_list)

In [None]:
average_df[(average_df.mass==125) & (average_df.weight=="sample_weight")]

In [None]:
def print_mass_reg(mass, reg):
    def calc_nom(value): return unc.nominal_value(value)
    def calc_std(value): return unc.std_dev(value)
    def format_value(value): return "{:.2f}%".format(value*100)
    #def format_value(value): return value
    tdf = average_df[(average_df.mass==mass) & (average_df.reg==reg)]
    keys = tdf.filter(regex='ratio').keys()
    weights = ['Weight_ISRFSR_Up', 'Weight_ISRFSR_Down', 'Weight_PDF_Up', 'Weight_PDF_Down']
    
    row_list = []
    for key in keys:
        row_dict = {}
        unom = tdf[(tdf.weight=='sample_weight')].iloc[0][key]
        nom, std = calc_nom(unom), calc_std(unom) 
        
        row_dict['mass'] = mass
        row_dict['reg'] =  reg
        row_dict['type'] = key.replace('_ratio_ufloat', '')
        row_dict['acceptance'] = format_value(nom)
        row_dict['statistical'] = format_value(std)
        for w in weights:
            sunom = tdf[(tdf.weight==w)].iloc[0][key]
            snom = calc_nom(sunom)
            row_dict[w] = format_value((snom-nom))
        row_list.append(row_dict)        
    return row_list

In [None]:
row_lists = []
for mass in [125, 150, 175, 200, 350]:
    for reg in ['SR1', "SR2"]:
        row_lists += print_mass_reg(mass, reg)

In [None]:
print_df = pd.DataFrame(row_lists)

In [None]:
print_df.to_csv(output_dir+"/data_gen_b_s/summary_df.csv")

In [None]:
###
## seeing range of systematic 
###
from scipy.optimize import curve_fit

x = pd.concat([print_df.acceptance,print_df.acceptance])
y = pd.concat([abs(print_df.Weight_ISRFSR_Up),abs(print_df.Weight_ISRFSR_Down)])
yerr = pd.concat([abs(print_df.statistical),abs(print_df.statistical)])

plt.errorbar(print_df.acceptance, abs(print_df.Weight_ISRFSR_Up), linestyle='', yerr=print_df.statistical)
plt.errorbar(print_df.acceptance, abs(print_df.Weight_ISRFSR_Down), linestyle='', yerr=print_df.statistical)

In [None]:
for reg in ['SR1', 'SR2']:
    fig, ax = plt.subplots()
    cms_format_fig('Run 2', ax)
    ax.set_ylim(0,25)
    ax.set_ylabel('Acceptance [\%]')
    ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')
    for fs_type in print_df.type.unique():
        
        tdf = print_df[(print_df.reg==reg) & (print_df.type==fs_type)]
        if fs_type =='1b1s': fs_type ='1b+1s'
        ax.errorbar(tdf.mass, tdf.acceptance*100, yerr=tdf.statistical*100, label=fs_type)
    ax.legend()
    fig.savefig('{}/gen_b_s/{}_acceptance.pdf'.format(output_dir, reg))
    

In [None]:
'{}/gen_b_s/{}_acceptance.pdf'.format(output_dir, reg)

In [None]:
print(print_df[print_df.mass==125].to_latex(index=False))

In [None]:
a_list = []
for reg in ['SR1', 'SR2']:
    for ctype in ['0b', '1b', '1s', '1b1s', '2b']:
    
        a_list.append({"reg": reg, "type":ctype, **print_df[(print_df.type==ctype) & (print_df.reg==reg)].mean().to_dict()})

In [None]:
adf = pd.DataFrame(a_list)

In [None]:
adf[['reg', 'type', 'acceptance', 'statistical', 'Weight_ISRFSR_Up','Weight_ISRFSR_Down', 'Weight_PDF_Up', 'Weight_PDF_Down']]

In [None]:
adf.mass.unique()

In [None]:
def get_nom_values(series):
    nom, err = series.apply(unumpy.nominal_values), series.apply(unumpy.std_devs)
    return nom.to_numpy(), err.to_numpy()

In [None]:
def plot_series(ax, tdf, key):
    mass = tdf['mass']
    nom, err = get_nom_values(tdf[key])
    plt.errorbar(mass, nom, yerr=err, label=key)

In [None]:
values = []
for w in rdf.weight.unique():
    for mass in [350]:
        values.append(average_df(mass=mass, reg='SR1', weight=w))
tdf = pd.DataFrame(values)
tdf

In [None]:
def draw_by_reg_weight(ax, reg,  weight, ratio, **kwargs):
    values = []
    for mass in rdf.mass.unique():
        values.append(average_df(mass=mass, reg=reg, weight=weight))
    tdf = pd.DataFrame(values)

    plot_series(ax, tdf, ratio)

In [None]:
fig, ax = plt.subplots()
for rat in ['rat_0b', 'rat_1b', 'rat_1s', 'rat_1b1s', 'rat_2b']:
    for w in rdf.weight.unique():
        draw_by_reg_weight(ax, 'SR1',  w, rat)


In [None]:
fig, ax = plt.subplots()
for rat in ['rat_0b', 'rat_1b', 'rat_1s', 'rat_1b1s', 'rat_2b']:
    for w in rdf.weight.unique():
        draw_by_reg_weight(ax, 'SR2',  w, rat)


In [None]:
dbs_test = []
for mass in df.mass.unique():
    dbs_test.append(average_df(mass=mass, reg=reg, weight=))
    
tdf = pd.DataFrame(dbs_test)

fig, ax = plt.subplots(figsize=[10,10])
plot_series(ax, '0b')
plot_series(ax, '1b')
plot_series(ax, '1s')
plot_series(ax, '1b+1s')
plot_series(ax, '2b')
ax.legend(title='${}$'.format(region_and_label['SR2']))
ax.set_ylim(0,25)
cms_format_fig('Run 2', ax)
ax.set_ylabel('Acceptance [\%]')
ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')


#fig.savefig('{}/gen_b_s/SR2_acceptance.pdf'.format(output_dir))
print(tdf.to_latex(index=False))

tdf

In [None]:
dbs_test = []
for mass in df.mass.unique():
    dbs_test.append(average_df(mass=mass, reg='SR1'))
    
tdf = pd.DataFrame(dbs_test)

fig, ax = plt.subplots(figsize=[10,10])
plot_series(ax, '0b')
plot_series(ax, '1b')
plot_series(ax, '1s')
plot_series(ax, '1b+1s')
plot_series(ax, '2b')
ax.legend(title='${}$'.format(region_and_label['SR1']))
cms_format_fig('Run 2', ax)
ax.set_ylim(0,25)
ax.set_ylabel('Acceptance [\%]')
ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')
fig.savefig('{}/gen_b_s/SR1_acceptance.pdf'.format(output_dir))

print(tdf.to_latex(index=False))

tdf

In [None]:
def draw_bar(values, log = False, **kwargs):
    fig = plt.figure()
    ax = fig.add_axes([0,0,1,1])
    ax.bar(['0b', '1b', '1s', '1b+1s', '2b'], values, **kwargs)
    ax.set_ylim([0,.5])
    if log: plt.yscale('log')    
    return fig, ax

In [None]:
def comparison_bar(uncut, region, reg):
    # The data
    indices = range(len(nom))
    names = ["", '0b', '1b', '1s', '1b+1s', '2b']
    # Calculate optimal width
    width = np.min(np.diff(indices))/3.
    
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.bar(indices-width/2.,uncut/np.sum(uncut),width,color='b',label='pre-cuts')
    ax.bar(indices+width/2.,region/np.sum(uncut),width,color='r',label='{}'.format(reg))
    #tiks = ax.get_xticks().tolist()
    ax.axes.set_xticklabels(names)
    #plt.yscale("log")
    return fig, ax


In [None]:
for row_dict in reg_hist:
    era = row_dict['era']
    
    nom = unumpy.uarray(row_dict['region_hist'], row_dict['region_hist']**.5)
    denom = unumpy.uarray(row_dict['uncut_hist'], row_dict['uncut_hist']**.5)
    ratio  = nom/denom
    nom, std = unumpy.nominal_values(ratio), unumpy.std_devs(ratio)
    fig, ax = draw_bar(nom, yerr=std)
    cms_format_fig(str(era), ax)
    ax.set_xlabel("Final State b and s Multiplicity")
    ax.set_ylabel("Acceptance")
    title='{mass} GeV $\delta_{{bs}}={dbs}$'.format(**row_dict)
    ax.legend(title=title)
    
    reg = 'SR1' if 'SR1' in row_dict['reg'] else 'SR2'
    fig, ax = comparison_bar(row_dict['uncut_hist'], row_dict['region_hist'], reg)
    
    ax.legend(title=title)
    
    propreg  = row_dict['region_hist']/np.sum(row_dict['uncut_hist'])
    propunc = row_dict['uncut_hist']/np.sum(row_dict['uncut_hist'])
    print("--------")
    print(title + " " + row_dict['reg'] + " " + str(era))
    for bin_name, reg, unc, rat, std in zip(['0b', '1b', '1s', '1b+1s', '2b'], propreg, propunc, nom, std):
    
        print("{} & {:.2f} & {:.2f} & {:.2f} & {:.2f} \\\\".format(bin_name, reg*100, unc*100, rat*100, std*100))
        

In [None]:
df.keys()