In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import zfit
from zfit import z
import re
import math

In [None]:
import mplhep as hep
hep.style.use(hep.style.CMS)
plt.rcParams.update({
    "text.usetex": True,
})

In [None]:
from plotting_meta.plotting_meta import bins, binning_type, cms_format_fig

from bff_plotting_tools.make_hists import make_sys
from bff_plotting_tools.data_getter import get_data

from bff_signal_model.bff_signal_model import bff_signal_model, reset_params, mu, sys_func, sys_func_offset, popt_sys_dict
from bff_signal_model.utils import fit_model

In [None]:
era = '2016'
df, lumi = get_data(era)
compute_hesse=True
masses = np.unique(df[df.type=='sig'].mass)
masses = [mass for mass in masses if not math.isnan(mass)]
masses 

In [None]:
df.replace([np.inf, -np.inf], 0, inplace=True)

In [None]:
obs = zfit.Space("x", limits=bins.calc_bin_range())

In [None]:
hists= pd.DataFrame([{"reg":reg, "mass":int(mass), "dbs":dbs} for reg in ['SR1','SR2'] for mass in masses for dbs in [0.04, 0.5, 1.0]])

In [None]:
# make histograms
hist_dict = {}
bck_dict = {}
for i, row in hists.iterrows():
    reg, mass, dbs = row
    print(reg, mass, dbs)
    # get hists
    bck_dict[(reg, mass, dbs)] = make_sys(df[(df.type=='bck')], 'DiLepMass', reg)
    sig_df = df[(df.mass==mass) & (df.dbs==dbs)]
    if sig_df.shape[0]==0: continue
    hist = make_sys(sig_df, 'DiLepMass', reg)
    #compute needed quantities
    normalization = np.sum(hist.nominal)
    
    #fit signal
    mu.set_value(mass)
    sm = bff_signal_model(obs, mu=mu)
    data = sig_df.DiLepMass.to_numpy()
    weight = sig_df.Weight.to_numpy()
    results_fit = fit_model(obs, data, weight, sm, compute_hesse=True)
    popt_sys = popt_sys_dict[era][reg]
    sys_width = sys_func_offset(row, .01, *popt_sys)
    print(sys_width, results_fit)
    fit_hist = sm.make_hist(bins, normalization, tail_percent=1, constant_percent=sys_width) 
    #make dict
    hist_dict[(reg, mass, dbs)] = {} 
    hist_dict[(reg, mass, dbs)]['mc'] =  hist
    hist_dict[(reg, mass, dbs)]['fit'] = fit_hist

In [None]:
#non-density
fig, ax = plt.subplots(figsize=(10,10))
for (reg, mass, dbs), plot_dict in hist_dict.items():
    if dbs!=0.04: continue
    if reg!="SR1": continue
    bck = bck_dict[(reg, mass, dbs)]
    hist = plot_dict['mc']
    fit_hist = plot_dict['fit']
    sm = bff_signal_model(obs, mu=mass)
    width = sm.sigma().numpy()

    #bck.draw(ax, color='green', label='MC bck')
    if mass==350 and dbs==0.04:
        hist.draw(ax, color='red', label='MC')
        fit_hist.draw(ax, color='blue', label='parametric')
    else:
        hist.draw(ax, color='red')
        fit_hist.draw(ax, color='blue')
ax.set_yscale('log')
ax.set_ylabel('Events per GeV')
ax.set_xlabel('DiLepMass [GeV]')
ax.set_ylim(bottom=1e-2, top=1e4)
#ax.set_xlim([mass-width*4, mass+width*4])
ax.legend(title = '{} {} GeV'.format(reg, mass))
cms_format_fig(era, ax)
fig.savefig('fits/bff/{}_binning_{}_{}.png'.format(binning_type, era, reg, mass))


In [None]:
# make non-density plots
np.seterr(divide='ignore', invalid='ignore')
for (reg, mass, dbs), plot_dict in hist_dict.items():
    bck = bck_dict[(reg, mass, dbs)]
    hist = plot_dict['mc']
    fit_hist = plot_dict['fit']
    sm = bff_signal_model(obs, mu=mass)
    width = sm.sigma().numpy()
    
    #non-density
    fig, ax = plt.subplots(figsize=(10,10))
    #bck.draw(ax, color='green', label='MC bck')
    hist.draw(ax, color='red', label='MC')
    fit_hist.draw(ax, color='blue', label='parametric')
    ax.set_yscale('log')
    ax.set_ylabel('Events per GeV')
    ax.set_xlabel('DiLepMass [GeV]')
    ax.set_ylim(bottom=1e-2, top=1e4)
    #ax.set_xlim([mass-width*4, mass+width*4])
    ax.legend(title = '{} {} GeV'.format(reg, mass))
    fig.savefig('fits/bff/{}_binning_{}_{}_{}.png'.format(binning_type, era, reg, mass))
    ax.autoscale(enable=True, axis='y')
    ax.set_yscale('linear')
    cms_format_fig(era, ax)
    fig.savefig('fits/bff/{}_binning_{}_{}_{}_linear.png'.format(binning_type, era, reg, mass))
    if not(mass==200 and dbs==0.04): plt.close()
    
    #density
    fig, ax = plt.subplots(figsize=(10,10))
    bck.make_density_hist().draw(ax, color='green', label='MC bck')
    hist.make_density_hist().draw(ax, color='red', label='MC')
    fit_hist.make_density_hist().draw(ax, color='blue', label='parametric')
    ax.set_yscale('log')
    ax.set_ylabel('Events per GeV')
    ax.set_xlabel('DiLepMass [GeV]')
    ax.set_ylim(bottom=1e-2, top=1e4)
    #ax.set_xlim([mass-width*4, mass+width*4])
    #ax.legend(title = '{} {} GeV'.format(reg, mass))
    cms_format_fig(era, ax)
    fig.savefig('fits/bff/{}_binning_density_{}_{}_{}.png'.format(binning_type, era, reg, mass))
    if not(mass==200 and dbs==0.04): plt.close()
    
    # ratio
    fig, ax = plt.subplots(figsize=(10,10))
    hist.calc_ratio(hist.nominal).draw(ax, color='red', label='MC')
    fit_hist.calc_ratio(fit_hist.nominal).draw(ax, color='blue', label='parametric')
    #ax.set_yscale('log')
    ax.set_ylabel('Ratio with nominal')
    ax.set_xlabel('DiLepMass [GeV]')
    ax.set_ylim(bottom=0, top=2)
    ax.set_xlim([mass-width*4, mass+width*4])
    ax.legend(title = '{} {} GeV'.format(reg, mass))
    cms_format_fig(era, ax)
    fig.savefig('fits/bff/{}_binning_ratio_{}_{}_{}_nom.png'.format(binning_type, era, reg, mass))
    if not(mass==200 and dbs==0.04): plt.close()
    
    # ratio
    fig, ax = plt.subplots(figsize=(10,10))
    hist.calc_ratio(fit_hist.nominal).draw(ax, color='red', label='MC')
    fit_hist.calc_ratio(fit_hist.nominal).draw(ax, color='blue', label='parametric')
    #ax.set_yscale('log')
    ax.set_ylabel('Ratio with fit')
    ax.set_xlabel('DiLepMass [GeV]')
    ax.set_ylim(bottom=0, top=2)
    ax.set_xlim([mass-width*4, mass+width*4])
    ax.legend(title = '{} {} GeV'.format(reg, mass))
    cms_format_fig(era, ax)
    fig.savefig('fits/bff/{}_binning_ratio_{}_{}_{}.png'.format(binning_type, era, reg, mass))
    if not(mass==200 and dbs==0.04): plt.close()

In [None]:
pckl_plt_dict = {}
for (reg, mass, dbs), plot_dict in hist_dict.items():
    bck = bck_dict[(reg, mass, dbs)]
    hist = plot_dict['mc']
    fit_hist = plot_dict['fit']
    def make_plot_dict(plot): return {"nom": plot.nominal, "up": plot.up, "down": plot.down, 'std': plot.std, 'bins':plot.bins.bin_edges}
    pckl_plt_dict[(reg, mass, dbs)] = {}
    pckl_plt_dict[(reg, mass, dbs)]['hist'] = make_plot_dict(hist)
    pckl_plt_dict[(reg, mass, dbs)]['fit_hist'] = make_plot_dict(fit_hist)

In [None]:
with open('fits/limit_setting/fit_dict_{}_{}.pkl'.format(binning_type, era), 'wb') as f:
    pickle.dump(pckl_plt_dict, f, protocol=2)

In [None]:
##
## make sys model
##

In [None]:
# compute relative sys levels per mass and dbs
sys_list = []
for (reg, mass, dbs), plot_dict in hist_dict.items():
    if mass == 201: continue
    nJets = int(re.findall('SR([1,2])', reg)[0])
    reg = "SR{}".format(nJets)
    mass = int(mass)
    print("\tmass", mass)
    mass_dict = {}
    hist = make_sys(df[(df.mass==mass) & (df.dbs==dbs)], 'DiLepMass', reg)
    up, down = np.sum(hist.down)/np.sum(hist.nominal), np.sum(hist.up)/np.sum(hist.nominal)
    sys_list.append({"nJets":nJets, "mass":mass, "dbs":dbs, "up": up, "down": down, "nominal": np.sum(hist.nominal)})

In [None]:

from scipy.optimize import curve_fit
import numpy as np
sys_df = pd.DataFrame(sys_list)

In [None]:
sys_df['y'] = (abs(sys_df.up)+sys_df.down)/2
sys_df['mass'] = sys_df['mass'].astype('float')
sys_df.dropna(inplace=True)
if era=='2018': sys_df = sys_df[sys_df.mass>=200]

In [None]:
print(sys_df.round(2).to_latex())

In [None]:
for nJets in [1,2]:
    tdf = sys_df[(sys_df.nJets==nJets)]
    popt, pcov = curve_fit(sys_func, tdf, tdf.y, p0=[2.08975535e+01,  1.85339127e-01, -1.28145436e-02])
    print(popt)
    colors = ["#1f77b4", "#ff7f0e", "#2ca02c"]
    for i, dbs in enumerate([0.04, 0.5, 1.0]):
        color = colors[i]
        print(color)
        tdf = sys_df[(sys_df.dbs==dbs) & (sys_df.nJets==nJets)]
        y_offset = sys_func_offset(tdf, 0.01, *popt)
        y = sys_func_offset(tdf, 0.0, *popt)
        plt.plot(tdf.mass, y, color=color)
        if i==0:
            plt.plot(tdf.mass, y_offset, linestyle=':', label='widdened fit', color=color)
        else: plt.plot(tdf.mass, y_offset, linestyle=':', color=color)
        plt.scatter(tdf.mass, tdf.y, label=dbs, color=color)
    plt.legend(title=nJets)
    plt.xlabel('DiLepMass [GeV]')
    plt.ylabel('avg. sys. (perc. of nominal)')
    plt.savefig('fits/bff/sys_percent_{}_{}_{}.png'.format(era, mass, nJets))
    cms_format_fig(era, plt.gca())
    plt.show()

In [None]:
table_template = r'''
\begin{{table}}[h]
    \centering
    \begin{{tabular}}{{c|c}}
       SR1  &  SR2\\
       \includegraphics[width=.4\linewidth]{{figures/fits/bff/split_binning_density_{era}_SR1_{mass}.png}} &
       \includegraphics[width=.4\linewidth]{{figures/fits/bff/split_binning_density_{era}_SR2_{mass}.png}} \\
       \hline \\
       \includegraphics[width=.4\linewidth]{{figures/fits/bff/split_binning_ratio_{era}_SR1_{mass}.png}} &
       \includegraphics[width=.4\linewidth]{{figures/fits/bff/split_binning_ratio_{era}_SR2_{mass}.png}} \\
    \end{{tabular}}
    \caption{{Signal model vs mc histogram for \dbs=0.04 {mass} GeV sample in {era}. Bottom plot shows ratio plot for the 4 sigma width of the distribution.}}
    \label{{fig:{era}_{mass}_fit}}
\end{{table}}
'''

In [None]:
for mass in masses:
    print(table_template.format(mass=int(mass), era=era))