In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import zfit
from zfit import z

In [None]:
import mplhep as hep

In [None]:
from plotting_meta.plotting_meta import bins, color_cycle, cms_format_fig

from src.plotting_tools.Bins import Bins, make_bins

#from bff_plotting_tools.make_hists import make_sys
from src.plotting_tools.SysHist import SysHist, make_hist, make_sys_hist
from bff_plotting_tools.utils import hist_chi2
from bff_plotting_tools.data_getter import get_data

from bff_signal_model.bff_bck_model import bff_bck_model, sigma, theta, mean

from bff_processor.utils import ratio_plot_template

In [None]:
bins = make_bins()

In [None]:
bins

In [None]:
era = '2016'
df, lumi = get_data(era)
df = df[(df.DiLepMass > bins.calc_bin_range()[0]) & (df.DiLepMass <  bins.calc_bin_range()[1])]
compute_hesse=True
binning_type = 'split'

In [None]:
df.replace([np.inf, -np.inf], 0, inplace=True)

In [None]:
obs = zfit.Space("x", limits=bins.calc_bin_range())

In [None]:
def fit_lognorm(y, weights, obs, reg, verbose=False):
    
    
    if reg!='CR20' and reg!='CR24':
        lognorm = bff_bck_model(obs, sigma=sigma, theta=theta, mean=mean)
    else:
        lognorm = bff_bck_model(obs, sigma=sigma, theta=0.05, mean=mean)     
        

    # load the data
    data = zfit.Data.from_numpy(obs=obs, array=y, weights=weights)
    
    # build the loss
    nll = zfit.loss.UnbinnedNLL(model=lognorm, data=data)

    # minimize
    minimizer = zfit.minimize.Minuit()
    result = minimizer.minimize(nll)
    lognorm.result = result
    
    # calculate errors
    param_errors = result.hesse()
    
    if verbose: print(result,"\n", param_errors)
    
    return lognorm, data

In [None]:
def fit_region(reg, type_data):
    # get data
    region = df[(df.type==type_data) & (df['{}_nom'.format(reg)]==1)]
    #make data_hist
    y = region.DiLepMass.to_numpy()
    weight = region.Weight.to_numpy()
    hist = make_sys_hist(region, 'DiLepMass', reg)
    # fit data
    print(y.shape, weight.sum())
    ln, data = fit_lognorm(y, weight, obs, reg, verbose=True)
    # make fit hist
    normalization = np.sum(hist.nominal)
    fit = ln.fill_bins(bins, normalization, supersample=1)
    return {"hist":hist, "fit": fit}

In [None]:
def make_plot(reg, reg_dict, label):
    fit, hist = reg_dict['fit'], reg_dict['hist']
    fig, ax, rax = ratio_plot_template(figsize=(10,10))
    hist.make_density_hist().draw(ax, color=color_cycle[3], label=label)
    fit.make_density_hist().draw(ax, color=color_cycle[0], label='Fit', error_scale=1)
    #ratio plot
    hist.calc_ratio(fit.nominal).draw(rax, color=color_cycle[3])
    fit.calc_ratio(fit.nominal).draw(rax, color=color_cycle[0], error_scale=1)
    rax.set_ylim(bottom=0, top=2)
    ax.set_yscale('log')
    ax.set_ylabel('Events per GeV')
    ax.set_xlabel('DiLepMass [GeV]')
    ax.set_ylim(bottom=1e-2, top=1e4)
    ax.legend(title = '{}'.format(reg))
    cms_format_fig(era, ax)
    fig.savefig('fits/abcd/{}_binning_density_{}_{}_{}.png'.format(binning_type, era, reg, label))
    fit_chi2 = hist_chi2(hist, fit, nDOF=4)
    return {'era': era, 'reg': reg, 'bck':np.sum(hist.nominal), 'bck_unc': np.sum(hist.std**2)**.5, 
                      'ABCD mc': np.sum(fit.nominal), 'fit chi2': fit_chi2, 'label': label}

In [None]:
plot_dict_mc = {}
plot_dict = {}
for reg in ['CR10', 'CR13', 'CR14', 'CR20', 'CR23', 'CR24']:
    print(reg)
    plot_dict_mc[reg] = fit_region(reg, "bck")
    plot_dict[reg] = fit_region(reg, "data")
    

In [None]:
fit_stats = []
for reg in ['CR10', 'CR13', 'CR14', 'CR20', 'CR23', 'CR24']:
    print(reg)
    fit_stats.append(make_plot(reg, plot_dict_mc[reg], "MC"))
    fit_stats.append(make_plot(reg, plot_dict[reg], "data"))

In [None]:
def make_abcd(reg, plot_dict):
    def ufd(reg): return plot_dict[reg]['fit'].uncertainty_std_dev()
    A,B,C = ufd('CR{}0'.format(nJets)), ufd('CR{}3'.format(nJets)), ufd('CR{}4'.format(nJets))
    D = A*B/C
    return SysHist.from_ufloats(bins, D)

In [None]:
abcd_dict = {}
abcd_stats = []
#make abcd:
#A*B/C = D
for nJets in [1,2]:
    reg = 'SR{}'.format(nJets)
    ABCD = make_abcd(reg, plot_dict)
    abcd_dict[reg] = {"nom": ABCD.nominal, "std": ABCD.std, "bins": ABCD.bins.bin_edges}
    fig, ax, rax = ratio_plot_template(figsize=(10,10))
    ABCD.make_density_hist().draw(ax, color=color_cycle[0], label='data ABCD', zorder=4, error_scale=1)
    plot_dict[reg] = {}
    plot_dict[reg]['fit'] = ABCD
    #mc background
    bck = make_sys_hist(df[(df.type=='bck')], 'DiLepMass', reg)
    bck.make_density_hist().draw(ax, color=color_cycle[3], label='MC', zorder=2)
    ABCD_mc = make_abcd(reg, plot_dict_mc)
    ABCD_mc.make_density_hist().draw(ax, color=color_cycle[6], label='fit ABCD', zorder=3, error_scale=0)
    plot_dict_mc[reg] = {}
    plot_dict_mc[reg]['fit'] = ABCD_mc
    plot_dict_mc[reg]['hist'] = bck
    #ratio plot
    bck.calc_ratio(ABCD_mc.nominal).draw(rax, color=color_cycle[3])
    ABCD.calc_ratio(ABCD_mc.nominal).draw(rax, color=color_cycle[0], error_scale=0)
    ABCD_mc.calc_ratio(ABCD_mc.nominal).draw(rax, color=color_cycle[6], error_scale=0)
    rax.set_ylim(bottom=0, top=2)
    ax.set_yscale('log')
    ax.set_ylabel('Events per GeV')
    ax.set_xlabel('DiLepMass [GeV]')
    ax.set_ylim(bottom=1e-2, top=1e4)
    cms_format_fig(era, ax)
    ax.legend(title = '{}'.format(reg))
    fig.savefig('fits/abcd/{}_binning_density_{}_{}.png'.format(binning_type, era, reg))
    abcd_mc_chi2 = hist_chi2(bck, ABCD_mc, nDOF=4)
    abcd_data_chi2 = hist_chi2(bck, ABCD, nDOF=4)
    abcd_stats.append({'era': era, 'reg': reg, 'bck':np.sum(bck.nominal), 'bck_unc': np.sum(bck.std**2)**.5, 'ABCD mc': np.sum(ABCD_mc.nominal), 'ABCD data': np.sum(ABCD.nominal), 'MC chi2': abcd_mc_chi2, 'Data chi2': abcd_data_chi2})
    

In [None]:
import pandas as pd
df_stats = pd.DataFrame(abcd_stats)
df_stats.round(2)

In [None]:
print(df_stats.round(2).to_latex())

In [None]:
fit_stats_df = pd.DataFrame(fit_stats)
fit_stats_df.round(2)

In [None]:
print(fit_stats_df.round(2).to_latex())

In [None]:
with open('fits/limit_setting/abcd_dict_{}.pkl'.format(era), 'wb') as f:
    pickle.dump(abcd_dict, f, protocol=2)

In [None]:
#save mc fits and hists
for reg in plot_dict_mc:
    print(reg)
    plot_dict_mc[reg]['hist'] = plot_dict_mc[reg]['hist'].to_dict()
    plot_dict_mc[reg]['fit'] = plot_dict_mc[reg]['fit'].to_dict()
    
with open('fits/limit_setting/plot_dict_mc_{}.pkl'.format(era), 'wb') as f:
    pickle.dump(plot_dict_mc, f, protocol=2)

In [None]:
#save mc fits and hists
for reg in plot_dict:
    print(reg)
    if 'hist' in plot_dict[reg]:
        plot_dict[reg]['hist'] = plot_dict[reg]['hist'].to_dict()
    plot_dict[reg]['fit'] = plot_dict[reg]['fit'].to_dict()
    
with open('fits/limit_setting/plot_dict_data_{}.pkl'.format(era), 'wb') as f:
    pickle.dump(plot_dict, f, protocol=2)

In [None]:
from scipy.interpolate import BPoly

def make_bpoly(x, *constants, x_range=[105,900]):
    constants = [[c] for c in constants]
    bp = BPoly(constants, x_range )
    return bp(x)
    

In [None]:
from scipy.optimize import curve_fit

In [None]:
def fit_order(order, bin_centers, y, yerr,  ax=0):
    y_log = np.log(y+.01)
    popt, pcov = curve_fit(make_bpoly, bin_centers, y_log, p0=[1 for i in range(order)])
    print(popt)
    fit_y_log = make_bpoly(bin_centers, *popt)
    fit_y = np.exp(fit_y_log)
    if ax != 0:
        ax.plot(bin_centers,fit_y , label='Log. Berstein poly of order {}'.format(order), zorder = 2, color='red')
    return fit_y
    

In [None]:
from math import pi

In [None]:
def lognorm( data, norm, sigma, theta, mean):
    return norm/((data-theta)*sigma*2*pi)*np.exp(-(np.log((data-theta)/mean))**2/(2*sigma**2))

In [None]:
reg = 'CR10'

def make_berstein_plot(reg, order):
    y = plot_dict[reg]['hist'].make_density_hist().nominal
    yerr = plot_dict[reg]['fit'].make_density_hist().nominal**.5
    fit = plot_dict[reg]['fit'].make_density_hist()
    
    bin_centers = plot_dict[reg]['hist'].calc_bin_centers()
    
    fig, ax, rax = ratio_plot_template(figsize=(10,10))
    ax.errorbar(bin_centers, y, yerr=yerr,zorder = -1)
    fit.draw(ax, label='lognorm', zorder=0, color='green')
    
    ln_popt, ln_pcov = curve_fit(lognorm, bin_centers, y, maxfev=int(1e5))
    y_ln = lognorm(bin_centers, *ln_popt)
    ax.plot(bin_centers,y_ln , label='log norm with curve fit', zorder = 2, color='purple')
    
    y_fit = fit_order(order, bin_centers, y,yerr, ax = ax)
    
    ax.set_ylim(.1,900)
    ax.set_yscale('log')
    ax.legend(title=reg)
    
    rax.errorbar(bin_centers, y/y, yerr=yerr/y, zorder=1)
    fit.calc_ratio(y).draw(rax, zorder=2, color='green')
    rax.plot(bin_centers, y_ln/y, color='purple')
    
    rax.set_ylim(0,2)
    rax.plot(bin_centers, [1 for x in bin_centers], color='red')
    
    rax.plot(bin_centers, [1.1 for x in bin_centers], color='black', alpha=.4)
    rax.plot(bin_centers, [.9 for x in bin_centers], color='black', alpha=.4, label='10%')
    fig.savefig('fits/abcd/berstein_ord{}_{}_binning_density_{}_{}.png'.format(order, binning_type, era, reg))
    return y_fit

In [None]:
berstein_fits = {}
for reg in ['CR10', 'CR13', 'CR14', 'CR20', 'CR23', 'CR24']:
    berstein_fits[reg] = make_berstein_plot(reg,7)

In [None]:
def make_berstein_plot_SR(reg, order):
    #bernstein abcd 
    nJets = int(reg[-1])
    abcd = berstein_fits['CR{}0'.format(nJets)]*berstein_fits['CR{}3'.format(nJets)]/berstein_fits['CR{}4'.format(nJets)]
    
    y = plot_dict[reg]['hist'].make_density_hist().nominal
    yerr = plot_dict[reg]['fit'].make_density_hist().nominal**.5
    fit = plot_dict[reg]['fit'].make_density_hist()
    
    bin_centers = plot_dict[reg]['hist'].calc_bin_centers()
    
    fig, ax, rax = ratio_plot_template(figsize=(10,10))
    ax.errorbar(bin_centers, y, yerr=yerr,zorder = -1)
    fit.draw(ax, label='lognorm', zorder=0, color='green', alpha=1, error_scale=0)
    
    ax.plot(bin_centers,abcd , label='Bernstein abcd', zorder = 2, color='purple')
    
    #y_fit = fit_order(order, bin_centers, y,yerr, ax = ax)
    
    ax.set_ylim(.1,900)
    ax.set_yscale('log')
    ax.legend(title=reg)
    
    rax.errorbar(bin_centers, y/y, yerr=yerr/y, zorder=1)
    fit.calc_ratio(y).draw(rax, zorder=.5, color='green', alpha=1, error_scale=0)
    rax.plot(bin_centers, abcd/y, color='purple')
    
    rax.set_ylim(0,2)
    rax.plot(bin_centers, [1 for x in bin_centers], color='red')
    
    rax.plot(bin_centers, [1.1 for x in bin_centers], color='black', alpha=.4)
    rax.plot(bin_centers, [.9 for x in bin_centers], color='black', alpha=.4, label='10%')
    fig.savefig('fits/abcd/berstein_ord{}_{}_binning_density_{}_{}.png'.format(order, binning_type, era, reg))

In [None]:
for reg in ['SR1', 'SR2']:
    make_berstein_plot_SR(reg,7)