In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import zfit
from zfit import z

In [None]:
import mplhep as hep

In [None]:
from plotting_meta.plotting_meta import bins, color_cycle, cms_format_fig, Bins

from bff_plotting_tools.make_hists import make_sys, SysHist
from bff_plotting_tools.utils import hist_chi2
from bff_plotting_tools.data_getter import get_data

from bff_signal_model.bff_bck_model import bff_bck_model, sigma, theta, mean

from bff_processor.utils import ratio_plot_template

In [None]:
era = '2016'
df, lumi = get_data(era)
df = df[(df.DiLepMass > bins.calc_bin_range()[0]) & (df.DiLepMass <  bins.calc_bin_range()[1])]
compute_hesse=True
binning_type = 'split'

In [None]:
obs = zfit.Space("x", limits=bins.calc_bin_range())

In [None]:
def fit_lognorm(y, weights, obs, verbose=False):
    lognorm = bff_bck_model(obs, sigma=sigma, theta=theta, mean=mean)

    # load the data
    data = zfit.Data.from_numpy(obs=obs, array=y, weights=weights)
    
    # build the loss
    nll = zfit.loss.UnbinnedNLL(model=lognorm, data=data)

    # minimize
    minimizer = zfit.minimize.Minuit()
    result = minimizer.minimize(nll)
    lognorm.result = result
    
    # calculate errors
    param_errors = result.hesse()
    
    if verbose: print(result,"\n", param_errors)
    
    return lognorm, data

In [None]:
def fit_region(reg, type_data):
    # get data
    region = df[(df.type==type_data) & (df['{}_nom'.format(reg)]==1)]
    #make data_hist
    y = region.DiLepMass.to_numpy()
    weight = region.Weight.to_numpy()
    hist = make_sys(region, 'DiLepMass', reg)
    # fit data
    ln, data = fit_lognorm(y, weight, obs, verbose=True)
    # make fit hist
    normalization = np.sum(hist.nominal)
    fit = ln.fill_bins(bins, normalization, supersample=1)
    return {"hist":hist, "fit": fit}

In [None]:
def make_plot(reg, reg_dict, label):
    fit, hist = reg_dict['fit'], reg_dict['hist']
    fig, ax, rax = ratio_plot_template(figsize=(10,10))
    hist.make_density_hist().draw(ax, color=color_cycle[3], label=label)
    fit.make_density_hist().draw(ax, color=color_cycle[0], label='Fit', error_scale=0)
    #ratio plot
    hist.calc_ratio(fit.nominal).draw(rax, color=color_cycle[3])
    fit.calc_ratio(fit.nominal).draw(rax, color=color_cycle[0], error_scale=0)
    rax.set_ylim(bottom=0, top=2)
    ax.set_yscale('log')
    ax.set_ylabel('Events per GeV')
    ax.set_xlabel('DiLepMass [GeV]')
    ax.set_ylim(bottom=1e-2, top=1e4)
    ax.legend(title = '{}'.format(reg))
    cms_format_fig(era, ax)
    fig.savefig('fits/abcd/{}_binning_density_{}_{}_{}.png'.format(binning_type, era, reg, label))
    fit_chi2 = hist_chi2(hist, fit, nDOF=4)
    return {'era': era, 'reg': reg, 'bck':np.sum(hist.nominal), 'bck_unc': np.sum(hist.std**2)**.5, 
                      'ABCD mc': np.sum(fit.nominal), 'fit chi2': fit_chi2, 'label': label}

In [None]:
plot_dict_mc = {}
plot_dict = {}
for reg in ['CR10', 'CR13', 'CR14', 'CR20', 'CR23', 'CR24']:
    print(reg)
    plot_dict_mc[reg] = fit_region(reg, "bck")
    plot_dict[reg] = fit_region(reg, "data")
    

In [None]:
fit_stats = []
for reg in ['CR10', 'CR13', 'CR14', 'CR20', 'CR23', 'CR24']:
    print(reg)
    fit_stats.append(make_plot(reg, plot_dict_mc[reg], "MC"))
    fit_stats.append(make_plot(reg, plot_dict[reg], "data"))

In [None]:
def make_abcd(reg, plot_dict):
    def ufd(reg): return plot_dict[reg]['fit'].uncertainty_std_dev()
    A,B,C = ufd('CR{}0'.format(nJets)), ufd('CR{}3'.format(nJets)), ufd('CR{}4'.format(nJets))
    D = A*B/C
    return SysHist.from_ufloats(bins, D)

In [None]:
abcd_dict = {}
abcd_stats = []
#make abcd:
#A*B/C = D
for nJets in [1,2]:
    reg = 'SR{}'.format(nJets)
    ABCD = make_abcd(reg, plot_dict)
    abcd_dict[reg] = {"nom": ABCD.nominal, "std": ABCD.std, "bins": ABCD.bins.bin_edges}
    fig, ax, rax = ratio_plot_template(figsize=(10,10))
    ABCD.make_density_hist().draw(ax, color=color_cycle[0], label='data ABCD', zorder=4, error_scale=0)
    
    #mc background
    bck = make_sys(df[(df.type=='bck')], 'DiLepMass', reg)
    bck.make_density_hist().draw(ax, color=color_cycle[3], label='MC', zorder=2)
    ABCD_mc = make_abcd(reg, plot_dict_mc)
    ABCD_mc.make_density_hist().draw(ax, color=color_cycle[6], label='fit ABCD', zorder=3, error_scale=0)
    #ratio plot
    bck.calc_ratio(ABCD_mc.nominal).draw(rax, color=color_cycle[3])
    ABCD.calc_ratio(ABCD_mc.nominal).draw(rax, color=color_cycle[0], error_scale=0)
    ABCD_mc.calc_ratio(ABCD_mc.nominal).draw(rax, color=color_cycle[6], error_scale=0)
    rax.set_ylim(bottom=0, top=2)
    ax.set_yscale('log')
    ax.set_ylabel('Events per GeV')
    ax.set_xlabel('DiLepMass [GeV]')
    ax.set_ylim(bottom=1e-2, top=1e4)
    cms_format_fig(era, ax)
    ax.legend(title = '{}'.format(reg))
    fig.savefig('fits/abcd/{}_binning_density_{}_{}.png'.format(binning_type, era, reg))
    abcd_mc_chi2 = hist_chi2(bck, ABCD_mc, nDOF=4)
    abcd_data_chi2 = hist_chi2(bck, ABCD, nDOF=4)
    abcd_stats.append({'era': era, 'reg': reg, 'bck':np.sum(bck.nominal), 'bck_unc': np.sum(bck.std**2)**.5, 'ABCD mc': np.sum(ABCD_mc.nominal), 'ABCD data': np.sum(ABCD.nominal), 'MC chi2': abcd_mc_chi2, 'Data chi2': abcd_data_chi2})

In [None]:
import pandas as pd
df_stats = pd.DataFrame(abcd_stats)
df_stats.round(2)

In [None]:
print(df_stats.round(2).to_latex())

In [None]:
fit_stats_df = pd.DataFrame(fit_stats)
fit_stats_df.round(2)

In [None]:
print(fit_stats_df.round(2).to_latex())

In [None]:
with open('fits/limit_setting/abcd_dict_{}.pkl'.format(era), 'wb') as f:
    pickle.dump(abcd_dict, f, protocol=2)

In [None]:
abcd_dict