In [None]:
import pandas as pd
import numpy as np
import uncertainties as unc
from uncertainties import unumpy
import matplotlib.pyplot as plt

In [None]:
from src.plotting_tools.cms_format import cms_format_fig, cms_style
cms_style()
from src.data_tools.get_file_list import get_file_df
from src.data_tools.apply_cuts_v2 import  process_sample
from src.plotting_tools.SysHist import make_sys_hist, SysHist
from src.assets.lumi import lumi_dict
from src.assets.regions import region_and_label

In [None]:
output_dir = '/eos/cms/store/group/phys_exotica/bffZprime/assets_june_23'


In [None]:
def list_from_str(string):
    arr = string.strip('][').split(', ')
    if '+/-' in arr[0]: arr = [unc.ufloat_fromstr(x) for x in arr]
    else: arr = [float(x) for x in arr]
    return np.array(arr)

In [None]:
rdf = pd.read_csv(output_dir+"/data_gen_b_s/multiplicity_eff.csv")
rdf['r_sum'] = rdf.r_sum.apply(list_from_str)
rdf['r_err'] = rdf.r_err.apply(list_from_str)
rdf['u_sum'] = rdf.u_sum.apply(list_from_str)
rdf['r_ratio_ufloat'] = rdf.r_ratio_ufloat.apply(list_from_str)

rdf = rdf[ ~((rdf.mass==125) & (rdf.dbs==1.0) & (rdf.era>2016))]
# this is necesary because I set zero denoms to 1 previously
def set_one_zero(arr): arr[arr==1] = 0
rdf.u_sum.apply(set_one_zero)

In [None]:
def select(df, **kwargs):
    _df = df
    for k, v in kwargs.items():
        _df = _df[_df[k]==v]
    return _df

In [None]:
for mul in range(23):
    for reg in ['SR1', 'SR2']:
        #make era, dbs plot
        fig, ax = plt.subplots()
        cms_format_fig('Run 2', ax)
        #ax.set_ylim(0,25)
        ax.set_ylabel('Acceptance')
        ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')

        for c, era in zip(['r', 'g', 'b'], [2016, 2017, 2018]):
            trdf = rdf[(rdf.era==era) & (rdf.reg==reg) & (rdf.mass<400) ]
            trdf = trdf[(trdf.weight=='sample_weight')]
            for ls, dbs in zip(['solid', 'dashed', 'dotted'], trdf.dbs.unique()):
                tdrdf = trdf[trdf.dbs==dbs]
                tdrdf = tdrdf.sort_values('mass')
                r_sum = tdrdf.r_sum.apply(lambda x: x[mul])
                r_err = tdrdf.r_err.apply(lambda x: x[mul])
                u_sum = tdrdf.u_sum.apply(lambda x: x[mul])
                ax.errorbar(tdrdf.mass, r_sum/u_sum, yerr=r_err/u_sum , color=c, ls=ls, label='{} {}'.format(dbs, era))
        plt.legend(title='{} {}'.format(mul, reg), ncol=2)
        plt.show()
        plt.close()

In [None]:
# combine multiplicities
def sum_via_index(arr, *args):
    value = 0
    for v in args:
        value += arr[v]
    return value
def ratio_via_index(_df, *args):
    r_sum = sum_via_index(_df.r_sum_unp, *args)
    u_sum = sum_via_index(_df.u_sum, *args)
    return r_sum/u_sum
def make_unumpy(row):
    return unumpy.uarray(row.r_sum, row.r_err)

In [None]:
accept_list = []
for reg in ['SR1', 'SR2']:
    for mass in rdf.mass.unique():
        print(mass, reg)
        tdf = select(rdf, reg=reg, mass=mass)
        nera = len(tdf.era.unique())
        if nera!=3: continue
        tdf['r_sum_unp'] = tdf.apply(make_unumpy, axis=1)    
        for weight in rdf.weight.unique():
            wtdf = tdf[tdf.weight==weight]
            swtdf = wtdf.sum()
            reg_mass_dict = {}
            reg_mass_dict['0b(1b)'] = ratio_via_index(swtdf, 1, 21)
            reg_mass_dict['0b(2b)'] = ratio_via_index(swtdf, 0, 17)
            reg_mass_dict['1b'] = ratio_via_index(swtdf, 2, 3, 4, 10)
            reg_mass_dict['1s'] = ratio_via_index(swtdf, 5,6)
            reg_mass_dict['1b+1s'] = ratio_via_index(swtdf, 7, 8,9, 19)
            reg_mass_dict['2b'] = ratio_via_index(swtdf, 11,12,13,14)
            reg_mass_dict['2s'] = ratio_via_index(swtdf,  15, 16)
            reg_mass_dict['mass'] = mass
            reg_mass_dict['reg'] = reg
            reg_mass_dict['weight'] = weight
            accept_list.append(reg_mass_dict)
        
        

In [None]:
acc_df = pd.DataFrame(accept_list)

In [None]:
acc_df

In [None]:
def plot_reg(ax, _df, key, **kwargs):
    _tacc_df = tacc_df.sort_values('mass')
    nom, std = unumpy.nominal_values(_tacc_df[key]), unumpy.std_devs(_tacc_df[key])
    ax.errorbar(_tacc_df.mass, nom*100, yerr=std*100, **kwargs)

In [None]:
for reg in ['SR1', 'SR2']:
    tacc_df = acc_df[acc_df.reg==reg]
    tacc_df = tacc_df[tacc_df.mass <400]
    tacc_df = tacc_df[tacc_df.weight=='sample_weight']
    fig, ax = plt.subplots()
    cms_format_fig('Run 2', ax)
    ax.set_ylim(0,25)
    ax.set_ylabel('Acceptance [\%]')
    ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')
    
    plot_reg(ax, tacc_df, '0b(1b)', label='0b(1b)')
    plot_reg(ax, tacc_df, '0b(2b)', label='0b(2b)')

    plot_reg(ax, tacc_df, '1b', label='1b')
    plot_reg(ax, tacc_df, '1s', label='1s')
    plot_reg(ax, tacc_df, '1b+1s', label='1b+1s')
    plot_reg(ax, tacc_df, '2b', label='2b')
    #plot_reg(ax, tacc_df, '2s sample_weight', label='2s')
    plt.legend()
    plt.ylim(0, 25)
    fig.savefig('{}/gen_b_s/{}_acceptance.pdf'.format(output_dir, reg))
    plt.show()
    plt.clf()

In [None]:
def print_mass_reg(mass, reg, for_print=False):
    def calc_nom(value): return unc.nominal_value(value)
    def calc_std(value): return unc.std_dev(value)
    if for_print: 
        def format_value(value): return "{:.2f}%".format(value*100)
    else: 
        def format_value(value): return value
    tdf = acc_df[(acc_df.mass==mass) & (acc_df.reg==reg)]
    keys = tdf.filter(regex='ratio').keys()
    weights = ['Weight_ISRFSR_Up', 'Weight_ISRFSR_Down', 'Weight_PDF_Up', 'Weight_PDF_Down']
    
    row_list = []
    for key in ['0b(1b)', '0b(2b)', '1b', '1s', '1b+1s', '2b', '2s']:
        row_dict = {}
        unom = tdf[(tdf.weight=='sample_weight')].iloc[0][key]
        nom, std = calc_nom(unom), calc_std(unom) 
        
        row_dict['mass'] = mass
        row_dict['reg'] =  reg
        row_dict['type'] = key.replace('_ratio_ufloat', '')
        row_dict['acceptance'] = format_value(nom)
        row_dict['statistical'] = format_value(std)
        for w in weights:
            sunom = tdf[(tdf.weight==w)].iloc[0][key]
            snom = calc_nom(sunom)
            row_dict[w] = format_value((snom-nom))
            #row_dict[w] = format_value((nom*.05))
        row_list.append(row_dict)        
    return row_list

In [None]:
row_lists = []
for mass in [125, 150, 175, 200, 250, 300, 350]:
    for reg in ['SR1', "SR2"]:
        row_lists += print_mass_reg(mass, reg)
print_df = pd.DataFrame(row_lists)

In [None]:
print_df

In [None]:
print_df.to_csv(output_dir+"/data_gen_b_s/summary_df.csv")

In [None]:
row_lists = []
for mass in [125, 150, 175, 200, 250, 300, 350]:
    for reg in ['SR1', "SR2"]:
        row_lists += print_mass_reg(mass, reg, for_print=True)
print_df = pd.DataFrame(row_lists)

In [None]:
print(print_df.to_latex(index=False))

In [None]:
for t in print_df.type.unique():
    print(print_df[print_df.type==t].to_latex(index=False))