In [None]:
import pandas as pd
import numpy as np
from bff_processor.Systogram import Systogram
from bff_processor.bff_meta import *
from bff_processor.sig_op_func import *
from bff_processor.bff_cuts import bff_1, bff_2, reg_filter
from bff_processor.plotting_utils import unc_plot, boost2unc
from bff_processor.utils import nratio_plot_template, hist2unc, vunc2nom, chiSquared, color_map
import os
import re
from time import perf_counter
import boost_histogram as bh
import pyarrow.feather as feather
import zfit
from zfit import z
from math import pi
import mplhep as hep
hep.style.use(hep.style.CMS)
plt.rcParams.update({
    "text.usetex": True,
})
from zfit.models.physics import DoubleCB

In [None]:
import pickle

In [None]:
try:
    mu = zfit.Parameter("mu", 300,  100, 600)
    sigma = zfit.Parameter("sigma", 20,  0, 100)
    alphal = zfit.Parameter("alphal", 2,  0, 100)
    nl = zfit.Parameter("nl", 3,  -10, 10)
    alphar = zfit.Parameter("alphar", 1.5,  0, 100)
    nr = zfit.Parameter("nr", 6,  -100, 100)
    Nsig = zfit.Parameter("Nsig", 1., -20., 1e8)
except:
    print("already defined")

In [None]:
x_range = [110,800]
obs = zfit.Space("x", limits=x_range)
doublecb = DoubleCB(obs=obs, mu=mu, sigma=sigma,
                         alphal=alphal, nl=nl, 
                         alphar=alphar, nr=nr)

In [None]:
era = '2016'
df = feather.read_feather('data/combined_{}.feather'.format(era))
if era=='2016':
    lumi=35.50
if era=='2017':
    lumi=41.85
if era=='2018':
    lumi=58.88

In [None]:
bff_data = df[df.name.str.contains("BFF")]
df  = bff_data[(bff_data.DiLepMass > x_range[0]) & (bff_data.DiLepMass < x_range[1]) & (bff_data.dbs==0.04)]
masses = df.mass.unique()
masses = sorted(masses)
masses

In [None]:
def reset_params(data):
    mean = np.mean(data)
    std = np.std(data)
    mu.set_value(mean)
    sigma.set_value(std)
    alphal.set_value(2)
    nl.set_value(3)
    alphar.set_value(1.5)
    nr.set_value(6)

In [None]:
def scaled_pdf(pdf, x, sumW, obs):
        y = pdf(x)
        n_bins = len(x)
        plot_scaling = sumW / n_bins * obs.area()
        return (y * plot_scaling).numpy()

In [None]:
param_list = []
plot_dict = {}
compute_hesse = False
for SR in ["SR1", "SR2"]:
    plot_dict[SR] = {}
    for mass in masses:
        print(mass)
        tdf = df[(df.mass==mass) & (df[SR+'_nom']==1)]
        data, weights = tdf.DiLepMass.to_numpy(),tdf.Weight.to_numpy()
        reset_params(data)
        
        doublecb = DoubleCB(obs=obs, mu=mu, sigma=sigma,
                         alphal=alphal, nl=nl, 
                         alphar=alphar, nr=nr)
        
        #set up fit
        data_zfit = zfit.Data.from_numpy(obs=obs, array=data, weights=weights)
        nll = zfit.loss.UnbinnedNLL(model=doublecb, data=data_zfit)
        minimizer = zfit.minimize.Minuit()
        result = minimizer.minimize(nll)
        if compute_hesse:
            x = result.hesse()
        else:
            x = {}
        param_dict = {'reg': SR, 'mass': mass, **{p.name:p.value().numpy() for p in result.params}, **{p.name+"_error":x[p]['error'] for p in x}}
        param_list.append(param_dict)
        
        #make the plot
        bins = np.linspace(*x_range, int((x_range[1]-x_range[0])/2 + 1))
        bin_centers = [(bins[i]+bins[i+1])/2 for i in range(len(bins) - 1)]
        y = scaled_pdf(doublecb.pdf, bin_centers, np.sum(weights), obs)
        hist, _ = np.histogram(data, weights=weights, bins=bins)
        
        plot_dict[SR][mass] = {"fit": y, "hist": hist}

In [None]:
pdf = pd.DataFrame(param_list)
pdf

In [None]:
pdf.to_csv('fits/bff/bff_{}.csv'.format(era))

In [None]:
colors = color_map(len(masses))
for SR, SR_dict in plot_dict.items():
    fig, ax = nratio_plot_template(nPlots=[1,1])
    (top, bottom) = ax[0][0]
    for color, (mass, mass_dict) in zip(colors,SR_dict.items()):
        #make a different color for histogram
        hist_color = np.power(color, 1)*.75
        #set alpha
        hist_color[-1] = 1

        fit_plot = mass_dict['fit']
        hist = mass_dict['hist']
        top.scatter(bin_centers, hist, label='{} GeV'.format( mass), color=hist_color)
        top.plot(bin_centers, fit_plot, color=color)
        
        bottom.plot(bin_centers, fit_plot/hist, color=color)
    top.legend(title=SR)
    top.set_yscale('log')
    top.set_ylim(bottom=.1e-2,top=1e3)
    bottom.set_ylim(bottom=0, top=2)
    bottom.plot(bin_centers, np.full(len(bin_centers), 1), color='black', linestyle=':')
    bottom.set_xlabel('DiLepMass [GeV]')
    top.set_ylabel('Count per 5 GeV')
    hep.cms.label(loc=0,ax=top,lumi=lumi,year=era, data=False)
    fig.savefig('fits/bff/bff_{}_{}.png'.format(SR, era))

In [None]:
def gs_plot(xrange, yrange, **kwargs):
    fig = plt.figure(constrained_layout=True, **kwargs)
 
    gs = fig.add_gridspec(xrange,yrange)
    for x in range(xrange):
        for y in range(yrange):
            fig.add_subplot(gs[x,y])
    return fig

In [None]:
params = ["alphal" ,"alphar" ,"mu","nl" ,"nr" ,"sigma"]
indicies =  [2,5,0,1,4,3]
for SR in pdf.reg.unique():
    tdf = pdf[pdf.reg==SR]
    x = tdf.mass.to_numpy()
    fig = gs_plot(2,3) 
    for index, par in zip(indicies, params):
        axes = fig.axes[index]
        y = tdf[par].to_numpy()
        yerr = tdf[par+"_error"].to_numpy()
        axes.errorbar(x, y, yerr=yerr)
        axes.set_title(SR + " " + par)
        
    fig.savefig('fits/bff/params_{}_{}.png'.format(era, SR, par))