In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit

In [None]:
from src.data_tools.StackPlotter import StackPlotter, get_stack_plotter
from src.plotting_tools.latexAssets import mll
from src.plotting_tools.cms_format import cms_style, cms_format_fig
from src.plotting_tools.Bins import Bins
from src.plotting_tools.utils import ratio_plot_template, nratio_plot_template
from src.general.functions import make_bpoly

cms_style()

from src.general.functions import linear, make_bpoly, double_crystalball

In [None]:
## get data for fit
from src.assets.output_dir import output_dir
outdir = output_dir
era = '2017'
sp = get_stack_plotter(outdir, era, bins=0)

In [None]:
masses = np.linspace(125,400,int((400-125)/5+1), dtype=int)
dbses  = [0.04, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]

masses, dbses

In [None]:
#make mean and sigma fit
from scipy.stats import norm

feature='DiLepMass'
reg = 'SR1'
signal_df = sp.get_signal_hist(feature, reg, dbs=0.04)
signal_df = pd.concat([signal_df,sp.get_signal_hist(feature, 'SR2')])
signal_df = signal_df.sort_values('mass')

fit_params = []
fig, ax = plt.subplots(1)

for i, row in signal_df.iterrows():
    if not row.mass > 0: continue
    hist = row['hist'].normalize()
    hist.draw(ax)
    y = hist.nominal
    x = hist.bins.calc_bin_centers()
    popt, pcov = curve_fit(norm.pdf, x, y, p0 = [row.mass, 10])
    ax.plot(x, norm.pdf(x, *popt))
    fit_params.append({"mass": row.mass, "mu": popt[0], "sigma": popt[1]})
ax.set_yscale('log')
ax.set_ylim(bottom=1e-3, top = 1e0)

fit_params = pd.DataFrame(fit_params).sort_values('mass')

In [None]:
##
## compare different dbs
##
feature='DiLepMass'
reg = 'SR1'
mass = 200
signal_df = sp.get_signal_hist(feature, reg, dbs=0.04, mass=mass)
hist0p04 = signal_df.iloc[0]['hist']
signal_df = sp.get_signal_hist(feature, reg, dbs=1.0, mass=mass)
hist1p0 = signal_df.iloc[0]['hist']

fig, ax = plt.subplots()

hist0p04.normalize().draw(ax, label='$\delta_{bs}=0.04$')
hist1p0.normalize().draw(ax, color='red', label='$\delta_{bs}=1.0$')

ax.set_xlim(180,220)
ax.legend()
ax.set_ylabel('Events')
ax.set_xlabel('$m_{\ell\ell}$ [GeV]')

cms_format_fig(era, ax, "\emph{Simulation}")
fig.savefig('{}/sig_interpolation/{}_{}_{}_dbs_comp.pdf'.format(output_dir, feature, reg, mass))

In [None]:
sys = sp.plot_df[(sp.plot_df.mass==200) & 
          (sp.plot_df.dbs==1.0) & 
          (sp.plot_df.reg=="SR1")& 
          (sp.plot_df.feature=="DiLepMass")].sys.iloc[0]

In [None]:
{k: np.sum(np.abs(v)) for k, v in sys.items()}

In [None]:
def make_model(x, y, func, ax=0, **kwargs):
    popt, pcov = curve_fit(func, x, y, **kwargs)
    def model(x):
        return func(x, *popt)
    if ax:
        ax.scatter(x,y, label='data (1+2 jet SR)')
        x_centers = np.linspace(min(x), max(x), len(y)*20)
        ax.plot(x_centers, model(x_centers), label='fit')
    return model
def make_mean_sigma_model():
    fig, (mean_ax, sigma_ax) = plt.subplots(1,2, figsize = (20, 10))
    
    mean_model = make_model(fit_params.mass, fit_params.mu, linear, ax=mean_ax)
    sigma_model = make_model(fit_params.mass, fit_params.sigma, make_bpoly, ax=sigma_ax,
                            p0=[1,1, 1, 1])
    for mass in fit_params.mass.unique():
        tdf = fit_params[fit_params.mass==mass]
        print(mass, tdf.sigma.mean())
    cms_format_fig(era, mean_ax, "\emph{Simulation}")
    cms_format_fig(era, sigma_ax, "\emph{Simulation}")
    
    mean_ax.set_xlabel(mll+ ' [GeV]')
    sigma_ax.set_xlabel(mll+ ' [GeV]')
    
    mean_ax.set_ylabel('mean [GeV]')
    sigma_ax.set_ylabel('sigma [GeV]') 
    
    mean_ax.legend()
    fig.savefig('{}/sig_interpolation/{}_mean_sigma_fit.pdf'.format(outdir, era))
    return mean_model, sigma_model

In [None]:
mean_model, sigma_model = make_mean_sigma_model()

In [None]:
from src.data_tools.apply_cuts_v2 import  process_sample
from src.plotting_tools.SysHist import make_sys_hist, SysHist

In [None]:
def make_hist_dict(row, reg, _bins):
    select_level = 1
    isdata = False
    feature = 'DiLepMass'
    hist = make_sys_hist(df, feature, reg, bin_edges=_bins.bin_edges,
                     ind_sys_hist=False, select_level=select_level, 
                         isdata=isdata)
    
    hist_dict = hist.to_dict()
    
    hist_dict['reg'] = reg
    hist_dict['feature'] = feature
    if feature=='minGoodJetElDR': print(reg, feature, hist.nominal.sum())
    return {**row.to_dict(), **hist_dict}
    

In [None]:
mean = mean_model(750)
sigma = sigma_model(750)
mean, sigma

In [None]:
def fit_dcb(x,y, **kwargs):
    mean = np.sum(x*y)/np.sum(y)
    mean = mean_model(mean)
    sigma = sigma_model(mean)
    p0 = [6.29906419e-02, mean, sigma, 1.26e+00, 1.8e+00, 1.5e+00, 1.5e+01]
    popt, pcov = curve_fit(double_crystalball, x, y, p0 = p0,  
                      bounds=([0, mean-10, sigma-sigma*.5,  1.1, 1.1,1.2, 1.2], 
                              [np.inf, mean+10, sigma+sigma*.2,  np.inf, np.inf, np.inf, np.inf]),
                           maxfev = int(1e4),
                        **kwargs
         )
    return  lambda x: double_crystalball(x, *popt)


In [None]:
def make_y_fit_func(x,y, **kwargs):
    _func =  fit_dcb(x,y, **kwargs)
    return _func

In [None]:
def make_sys_fits(_sh, row, reg, x_fit, show=False):
    hist_dict = {"mass": row.mass, "reg": reg}
    x = _sh.calc_bin_centers()
    hist_dict['bins'] = x
    # nominal
    y = _sh.nominal
    y_fit = make_y_fit_func(x,y)(x_fit)
    hist_dict['nom'] = y_fit
    if show:
        plt.clf()
        plt.plot(x,y)
        plt.plot(x_fit,y_fit)
        plt.show()
        
    for sys in _sh.sys:
        #print(sys)
        y_down, y_up = _sh.sys[sys]
        y_down = y_down.copy()
        y_up = y_up.copy()
        y_down +=  y
        y_up +=  y
        #print(y_up)
        y_up_fit = make_y_fit_func(x,y_up)(x_fit)
        y_down_fit = make_y_fit_func(x,y_down)(x_fit)
        
        sys = sys.replace('SR1', 'Reg')
        sys = sys.replace('SR2', 'Reg')
        hist_dict[sys] = y_up_fit
        hist_dict[sys.replace('Up', 'Down')] = y_up_fit
        
        if show:
            plt.clf()
            plt.plot(x, y_down)
            plt.plot(x, y_up)
            plt.plot(x_fit, y_down_fit)
            plt.plot(x_fit, y_up_fit)
            plt.show()
    return hist_dict
    

In [None]:
signal_df = sp.get_signal_hist(feature, 'SR1').sort_values(['mass'])

In [None]:
def make_bins(mean, sigma,nSigma=5, n_hist_bins=100):
    return Bins(np.linspace(mean-sigma*nSigma, mean+sigma*nSigma, n_hist_bins))

In [None]:
from time import perf_counter

In [None]:
#make signal hist
hist_list = []
n_hist_bins = 100
nSigma = 5
nfit_multi = 1
hist_dict_list = []
for mass in signal_df.mass.unique():
    if  mass > 350: continue
    print(mass)
    mdf = signal_df[(signal_df.mass==mass) & (signal_df.reg=='SR1')]
    mean, sigma = mean_model(mass), sigma_model(mass)
    bins = make_bins(mean, sigma,nSigma=nSigma, n_hist_bins=n_hist_bins )
    fit_bins = make_bins(mean, sigma,nSigma=nSigma, n_hist_bins=n_hist_bins*nfit_multi)
    
    for reg in ['SR1', 'SR2']:
        # sum up 
        nhists = 0
        for i, row in mdf.iterrows():
            df = process_sample(row, era, verbose=False, trigger_fix=True)
            hist = make_hist_dict(row, reg, bins)
            #make fit hist
            if nhists: sh += SysHist.from_dict(hist)
            else: sh = SysHist.from_dict(hist)
            # normalize for now
            sh = sh.normalize()
        print(reg, mass, row.mass, row.dbs, reg, sh.calc_sum())
        hist_dict_list.append(make_sys_fits(sh, row, reg, fit_bins.calc_bin_centers(), show=False ))


In [None]:
standardized_bins = Bins(np.linspace(-nSigma, nSigma, n_hist_bins*nfit_multi))

In [None]:
fit_hist_df = pd.DataFrame(hist_dict_list)

In [None]:
from scipy import interpolate

In [None]:
# make interpolations for normalized hists

In [None]:
def make_inerp(reg, y_series):
    tdf = fit_hist_df[(fit_hist_df.reg==reg) ].drop_duplicates('mass')
    y = tdf[y_series].to_list()
    im = np.stack(y)
    x = standardized_bins.calc_bin_centers()
    return interpolate.RectBivariateSpline(x, tdf.mass, im.T)

In [None]:
def make_hists(reg, y_series, show=False):
    interp = make_inerp(reg, y_series)
    x = standardized_bins.calc_bin_centers()
    intp = interp(x, masses).T
    means = mean_model(masses)
    sigmas = sigma_model(masses)
    xs = [np.array(make_bins(mean, sigma, n_hist_bins = n_hist_bins*nfit_multi).calc_bin_centers()) for mean, sigma in zip(means, sigmas)]
    if show:
        fig, ax = plt.subplots()
        ax.imshow(intp)
    return [{"reg": reg, "y_series": y_series, "mass":mass, 'y': y/y.sum(), "x": x} for mass, y, x in zip(masses, intp, xs)]

In [None]:
interps = []
for reg in ['SR1', 'SR2']:
    for sys in fit_hist_df.filter(regex='nom|Reg|Weight').keys():
        print(reg, sys)
        interps+=make_hists(reg, sys, show=False)

df_shape_interp = pd.DataFrame(interps)

In [None]:
# make dbs normalized hists

In [None]:
def get_normalization(mass, reg, dbs, sys):
    '''only works for histograms based on counts'''
    hist = sp.plot_df[(sp.plot_df.mass==mass) & 
              (sp.plot_df.dbs==dbs) & 
              (sp.plot_df.reg==reg) & 
              (sp.plot_df.feature=='DiLepMass') & 
                     (sp.plot_df.category=='BFF') ].iloc[0]

    if sys=='nom': x = hist.nom
    else:
        sys_up = int('Up' in sys)
        sys = sys.replace('Down', 'Up')
        sys = sys.replace('Reg', reg)
        x = hist['sys'][sys][sys_up] + hist.nom 
    return np.sum(x)

In [None]:
from scipy.interpolate import LinearNDInterpolator


norm_interpolators = {}
for sys in fit_hist_df.filter(regex='nom|Reg|Weight').keys():
    norm_interpolators[sys] = {}
    for reg in ['SR1', 'SR2']:
        mass_arr = []
        dbs_arr = []
        norm_arr = []
        for mass in sp.plot_df.sort_values(['mass']).mass.unique():
            if not mass > 0: continue
            for dbs in sp.plot_df.sort_values(['dbs']).dbs.unique():
                if not dbs > 0: continue
                try:
                    norm = get_normalization(mass, reg, dbs, sys)
                    norm_arr.append(norm)
                    mass_arr.append(mass)
                    dbs_arr.append(dbs)
                except:
                    continue
        X, Y = np.meshgrid(mass_arr, dbs_arr)  # 2D grid for interpolation
        interp = LinearNDInterpolator(list(zip(mass_arr, dbs_arr)), norm_arr)            
        norm_interpolators[sys][reg] = interp

In [None]:
# combine it all and make proper normalized histograms

In [None]:
norm_hist_list = []
for dbs in dbses:
    print(dbs)
    for i, row in df_shape_interp.iterrows():
        reg, sys, mass = row.reg, row.y_series, row.mass
        norm = norm_interpolators[sys][reg]([mass], [dbs])[0]
        x, y = row.x, row.y.copy()
        y = y*norm
        norm_hist_list.append({'reg': reg, 'sys': sys, 'mass': mass, 'dbs': dbs, 'norm': norm, 
                              'x': x, 'y': y})
        print(reg, sys, mass, dbs, norm)
        

In [None]:
norm_hist_df = pd.DataFrame(norm_hist_list)

In [None]:
import pickle as pkl

In [None]:
with open('{}/data/{}_bff_interp_dbs_norm.pkl'.format(outdir, era), 'wb') as f:
    pkl.dump(norm_hist_df, f)

In [None]:
##
## interp test
##

##
## compare different dbs
##


def make_inerp_df(tdf, y_series):
    #tdf = fit_hist_df[(fit_hist_df.reg==reg) ].drop_duplicates('mass')
    y = tdf[y_series].to_list()
    im = np.stack(y)
    x = standardized_bins.calc_bin_centers()
    return interpolate.RectBivariateSpline(x, tdf.mass, im.T)


for reg in ['SR1', 'SR2']:
    tdf = fit_hist_df[(fit_hist_df.reg==reg)].drop_duplicates('mass')
    iterpolator = make_inerp_df(tdf.iloc[::2], 'nom')
    masses = tdf.iloc[1:-2:2].mass
    masses = masses[masses<=350]
    bin_centers = standardized_bins.calc_bin_centers()
    bin_edges = standardized_bins.bin_edges
    for mass in masses:
        fig, ax = plt.subplots(1)
        mean, sigma = mean_model(mass), sigma_model(mass)
        scaled_bin_centers = bin_centers*sigma+mean
        scaled_bin_edges = bin_edges*sigma+mean

        #mc comparison
        signal_df = sp.get_signal_hist(feature, reg, dbs=0.04, mass=mass)
        hist0p04 = signal_df.iloc[0]['hist']
        hist0p04.normalize().draw(ax, label='MC')
        
        #interpolation
        bin_width = scaled_bin_edges[1]-scaled_bin_edges[0]
        bin_scale = hist0p04.normalize().nominal.sum()*hist0p04.calc_bin_widths()[0]/bin_width
        z_interp = iterpolator(bin_centers, [mass]).reshape(-1)
        z_interp = z_interp/z_interp.sum()*bin_scale
        #hinterp_hist = SysHist(z_interp, z_interp*0,z_interp*0, z_interp*0.1, scaled_bin_edges)
        ax.plot(scaled_bin_centers, z_interp, label='interpolation', color='red')
        #hinterp_hist.normalize().draw(ax, errorbar=False)
        
        ax.legend()
        ax.set_ylabel('Events')
        ax.set_xlabel('$m_{\ell\ell}$ [GeV]')
        ax.set_xlim([scaled_bin_centers[0], scaled_bin_centers[-1]])
        cms_format_fig(era, ax, "\emph{Simulation}")  
        fig.savefig('{}/sig_interpolation/{}_{}_interpolation_comp.pdf'.format(output_dir, reg, mass))
        
##make intermediate mass plots:
#masses = tdf.iloc[1:-2:2].mass
#masses = masses[masses<=350]
#fig, axs = nratio_plot_template(nPlots=[len(masses), 1], figsize=(40,15))
#for mass, ax in zip(masses, axs):
#    top, bottom = ax[0]
#    bin_centers = standardized_bins.calc_bin_centers()
#    hist = tdf[tdf.mass==mass].iloc[0]['nom']
#    fit = tdf[tdf.mass==mass].iloc[0]['nom']
#    
#    z_interp = iterpolator(bin_centers, [mass])
#
#    #top
#    hist.draw(top, label='{} GeV'.format(mass))
#    mean, sigma = mean_model(mass), sigma_model(mass)
#    scaled_bin_centers = bin_centers*sigma+mean
#    top.plot(scaled_bin_centers,  z_interp, ds='steps-mid', label='interpolation', color='red', zorder=3)
#    #bottom
#    (hist*(1./z_interp)).draw(bottom)
#    bottom.plot(scaled_bin_centers, np.full(len(scaled_bin_centers), 1) , color='black', zorder=3)
#    bottom.plot(scaled_bin_centers, np.full(len(scaled_bin_centers), 1.1) , color='black', zorder=3, ls=':')
#    bottom.plot(scaled_bin_centers, np.full(len(scaled_bin_centers), .9) , color='black', zorder=3, ls=':')
#    
#    cms_format_fig(era, top)
#    top.legend()
#    bottom.set_xlabel(mll + ' [GeV]')
#    top.set_ylabel('counts')
#    bottom.set_ylabel('ratio')
#    bottom.set_ylim(0,2)
#fig.savefig('{}/sig_interpolation/{}_{}_interp_test.pdf'.format(outdir, era, reg))

In [None]:
tdf[tdf.mass==mass].iloc[0]

In [None]:



break

In [None]:
from src.plotting_tools.SysHist import SysHist, make_sys_hist_v2
from src.plotting_tools.latexAssets import mll
from src.plotting_tools.cms_format import cms_style, cms_format_fig
from src.plotting_tools.Bins import Bins
from src.plotting_tools.utils import ratio_plot_template, nratio_plot_template
from src.general.functions import make_bpoly

cms_style()

In [None]:
from src.general.functions import linear, make_bpoly, double_crystalball

In [None]:
from src.data_tools.get_data import get_data

In [None]:

_df, lumi = get_data(era, outdir, df_filter=lambda x: x.type=='sig', verbose=1)

In [None]:
#def get_puid_stats(df, weight):
#    tdf = df[(df.SR2_jet_nom_muon_corrected_pt_ele_pt==1)]
#    #tdf = df
#    #return (tdf['{}Up'.format(weight)].mean())/tdf['Weight'].mean()
#    return (tdf['{}Up'.format(weight)]).mean()/(tdf.Weight).mean()

In [None]:
df = _df

In [None]:
fit_params = pd.read_csv('{}/fits/{}_signal_fits_params.csv'.format(outdir, era))

In [None]:
fit_params

In [None]:
def make_model(x, y, func, ax=0, **kwargs):
    popt, pcov = curve_fit(func, x, y, **kwargs)
    def model(x):
        return func(x, *popt)
    if ax:
        ax.scatter(x,y, label='data (1+2 jet SR)')
        x_centers = np.linspace(min(x), max(x), len(y)*20)
        ax.plot(x_centers, model(x_centers), label='fit')
    return model

In [None]:
def make_mean_sigma_model():
    fig, (mean_ax, sigma_ax) = plt.subplots(1,2, figsize = (20, 10))
    
    mean_model = make_model(fit_params.mass, fit_params.mu, linear, ax=mean_ax)
    sigma_model = make_model(fit_params.mass, fit_params.sigma, make_bpoly, ax=sigma_ax,
                            p0=[1,1, 1, 1])
    
    cms_format_fig(era, mean_ax)
    cms_format_fig(era, sigma_ax)
    
    mean_ax.set_xlabel(mll+ ' [GeV]')
    sigma_ax.set_xlabel(mll+ ' [GeV]')
    
    mean_ax.set_ylabel('mean [GeV]')
    sigma_ax.set_ylabel('sigma [GeV]') 
    
    mean_ax.legend()
    fig.savefig('{}/sig_interpolation/{}_mean_sigma_fit.pdf'.format(outdir, era))
    return mean_model, sigma_model

In [None]:
mean_model, sigma_model = make_mean_sigma_model()

In [None]:
#make signal hist
hist_list = []
n_hist_bins = 100
for reg in ['SR1', 'SR2']:
    for mass in df.mass.unique():
        mdf = df[df.mass==mass]
        for dbs in mdf.dbs.unique():
            dbsdf = mdf[mdf.dbs==dbs]
            if dbsdf.size==0: continue
            #if mass != 300: continue
            print(mass, dbs, reg)
            mean, sigma = mean_model(mass), sigma_model(mass)
            nSigma = 5
            bins = Bins(np.linspace(mean-sigma*nSigma, mean+sigma*nSigma, n_hist_bins))
            standardized_bins = Bins(np.linspace(-nSigma, nSigma, n_hist_bins))
            #make hist
            hist =  make_sys_hist_v2( dbsdf, 'DiLepMass', reg, bin_edges=bins.bin_edges).normalize()
            print(hist.sys_string())
            #make fit hist
            fit_param = fit_params[(fit_params.mass==mass) & (fit_params.region==reg)].iloc[0]
            fit_y = double_crystalball(bins.calc_bin_centers(), 1.0, fit_param.mu, fit_param.sigma, 
                       fit_param.alphal, fit_param.nl,
                       fit_param.alphar, fit_param.nr)
            fit_hist = SysHist(fit_y, fit_y*0, fit_y*0, fit_y*0, bins.bin_edges).normalize()
            hist_list.append({
                'fit': fit_hist,
                'hist': hist,
                'mass': mass,
                'dbs': dbs,
                'reg': reg,
                'standardized_bins': standardized_bins
            })

hist_df = pd.DataFrame(hist_list)

In [None]:
#compare different dbs
color = ['red', 'green', 'blue']
for mass in [200]:
    for reg in hist_df.reg.unique():
        tdf = hist_df[(hist_df.mass==mass) & (hist_df.reg==reg)]
        print(mass, reg)
        hist = tdf.iloc[0]['hist']
        bins = hist.bins
        for sys in hist.sys:
            print(mass, sys, reg)
            fig, ax = plt.subplots()
            for i, row in tdf.reset_index().iterrows():
                thist = row['hist']
                #ax.plot(thist.nominal/thist.nominal, label=row.dbs)
                ax.errorbar(bins.calc_bin_centers(), (thist.nominal+thist.sys[sys][0])/thist.nominal, 
                             #yerr = thist.std/thist.nominal,
                             label=row.dbs, color=color[i])
                ax.errorbar(bins.calc_bin_centers(), (thist.nominal+thist.sys[sys][1])/thist.nominal, 
                             #yerr = thist.std/thist.nominal,
                             color=color[i], ls=':')
                
            cms_format_fig(era, ax)
            ax.legend()
            ax.plot(bins.calc_bin_centers(), np.full(len(bins.calc_bin_centers()), 1), color='black')
            #ax.set_title("{} {} {}".format(reg, mass, sys.replace('_',' ')))
            
            fig.savefig('{}/output/sys_individual_dbs_comp/{}_{}_{}_{}.pdf'.format(outdir, era,reg, mass, sys))
            plt.show()
            plt.close()

In [None]:
def cystalball_by_mass(mass):
    mean, sigma = mean_model(mass), sigma_model(mass)
    return lambda x, norm, b1, m1, b2, m2: double_crystalball(x, norm, mean, sigma, b1, m1, b2, m2)
    

In [None]:
#make_interp
draw = False
fit_dict_list = []
n_fit_bins = n_hist_bins
for mass in  hist_df.mass.unique():
    print(mass)
    dcbbm = cystalball_by_mass(mass)
    for reg in ['SR1', 'SR2']:
        tdf = hist_df[(hist_df.mass==mass) & (hist_df.reg==reg)]
        
        for i, row in tdf.iterrows():
            fit_dict = {}
            fit_dict['mass'] = mass
            fit_dict['reg'] = reg
            fit_dict['dbs'] = row.dbs
            thist = row['hist']

            x = thist.calc_bin_centers()
            x_extended = np.linspace(thist.bin_edges.min(), thist.bin_edges.max(), n_fit_bins)
            y = thist.nominal
            std = thist.std
            fit_param = fit_params[(fit_params.mass==mass) & (fit_params.region==reg)].iloc[0]
            p0 = [1.0, 
                       fit_param.alphal, fit_param.nl,
                       fit_param.alphar, fit_param.nr]
            popt, pcov = curve_fit(dcbbm, x, y, p0=p0,maxfev=int(1e5))
            nom_pred_extended = dcbbm(x_extended, *popt)
            nom_pred = dcbbm(x, *popt)
            for sys in thist.sys:
                y_down, y_up = thist.sys[sys]
                popt_down, pcov = curve_fit(dcbbm, x, y_down+y, p0=p0,maxfev=int(1e5))
                down = dcbbm(x_extended, *popt_down)/np.sum(nom_pred_extended)
                popt_up, pcov = curve_fit(dcbbm, x, y_up+y, p0=p0,maxfev=int(1e5))
                up = dcbbm(x_extended, *popt_up)/np.sum(nom_pred_extended)
                fit_dict[sys] = [down, up]
                if draw:
                    fig, ax = plt.subplots()
                    ax.plot(x_extended, down, color='green')
                    ax.plot(x_extended, up, color='red')
                    ax.errorbar(x, y_down+ y, yerr=std, color='green', ls='None', marker='o')
                    ax.errorbar(x, y_up+ y, yerr=std, color='red', ls='None', marker='o')
                    ax.plot(x, y, color='black')
                    cms_format_fig(era, ax)
                    ax.legend()
                    plt.show()
                    fig.savefig('{}/output/sys_individual_dbs_comp/{}_{}_{}_{}_{}.pdf'.format(outdir, era,reg, mass, row.dbs, sys))
                    plt.close()
                    
            #normaliza and save nom pred extended
            nom_pred_extended = nom_pred_extended/np.sum(nom_pred_extended)
            fit_dict['nom'] = nom_pred_extended
            fit_dict_list.append(fit_dict)  

    

In [None]:
df = pd.DataFrame(fit_dict_list)

In [None]:
df.to_csv('{}/output/{}_fit_dict_signal.csv'.format(outdir, era))

In [None]:
def plot_2d(df, values):
    fig, ax = plt.subplots()
    ax.imshow(values, 
               extent = [-5, 5, 
                df.mass.max(), df.mass.min()],
                            aspect =5/500)
    ax.set_xlabel('$\sigma$')
    ax.set_ylabel('$m_{\ell\ell}$')
    cms_format_fig(era, ax)
    return fig

In [None]:
from scipy import interpolate

In [None]:
interpolators = {}

In [None]:
dbs = 0.04
key = 'nom'
for reg in ['SR1', 'SR2']:
    tdf = df[(df.dbs==dbs) & (df['reg']==reg)]
    values = np.stack(tdf[key])
    fig = plot_2d(df, values)
    fig.savefig('{}/output/sys_individual_dbs_comp/twoD_plot_{}_{}_{}.pdf'.format(outdir, era,reg, mass, key))
    bins = standardized_bins.bin_edges
    interpolators['{}_{}'.format(reg,key)] = interpolate.interp2d(bins, tdf.mass, values)

In [None]:
syss = tdf.filter(regex='SR|Weight').columns
syss = np.unique([x.replace('SR2',"SR1") for x in syss])

In [None]:
for reg in ['SR1', 'SR2']:
    for sys in syss:
        tdf = df[(df.dbs==dbs) & (df['reg']==reg)]
        sys = sys.replace('SR1', reg)
        print(sys)
        values = np.stack(tdf[sys])
        down, up = values[:,0,:],  values[:,1,:]
        fig = plot_2d(df, down)
        fig.savefig('{}/output/sys_individual_dbs_comp/twoD_plot_down_{}_{}_{}.pdf'.format(outdir, era,reg, mass, key))
        plt.show()
        plt.clf()
        fig = plot_2d(df, up)
        fig.savefig('{}/output/sys_individual_dbs_comp/twoD_plot_up_{}_{}_{}.pdf'.format(outdir, era,reg, mass, key))
        plt.show()
        plt.clf()
        bins = standardized_bins.bin_edges
        sys = sys.replace('SR1_', '').replace('SR2_', '')
        interpolators['{}_{}_down'.format(reg,sys)] = interpolate.interp2d(bins, tdf.mass, down)
        interpolators['{}_{}_up'.format(reg,sys)] = interpolate.interp2d(bins, tdf.mass, up)

In [None]:
masses = np.linspace(125,400,int((400-125)/7+1), dtype=int)

In [None]:
masses

In [None]:
interpolated = {}
for mass in masses:
    mean, sigma = mean_model(mass), sigma_model(mass)
    nSigma = 5
    mass_bins = Bins(np.linspace(mean-sigma*nSigma, mean+sigma*nSigma, n_hist_bins))
    interpolated[mass] = {}
    interpolated[mass]['bins'] = mass_bins
    for sys, interp in interpolators.items():
        interpolated[mass][sys] =  interp(bins, [mass])
        


In [None]:
from scipy.optimize import curve_fit
def dbs_fit(dbs, p0, p1):
    return (p0+p1*dbs**2)

In [None]:
#make dbs points
dbs_plot_list = []
masses = df[df.dbs==1.0].mass.unique()
mdf = df[df.mass.isin(masses)]
for reg in ['SR1', 'SR2']:
    tmdf = mdf[ (mdf['reg']==reg)]
    for sys in syss:
        sys = sys.replace('SR1', reg)
        dbs_dict = {"up":{}, "down":{}}
        for dbs in [0.04, 0.5, 1.0]:
            ttmdf = tmdf[tmdf.dbs==dbs]
            dbs_dict['up'][dbs] = []
            dbs_dict['down'][dbs] = []
            for (down, up) in ttmdf[sys]:
                down, up = down.sum(), up.sum()
                dbs_dict['up'][dbs].append(up)
                dbs_dict['down'][dbs].append(down)
        
        print(sys.replace('SR1_', '').replace('SR2_', ''))
        up = np.sum([x for _, x in dbs_dict['up'].items()], axis=1)
        down = np.sum([x for _, x in dbs_dict['down'].items()], axis=1)
        up = up/up[0]
        down = down/down[0]
        popt_up, _ = curve_fit(dbs_fit, [0.04, 0.5, 1.0], up)
        popt_down, _ = curve_fit(dbs_fit, [0.04, 0.5, 1.0], down)
        dbs_plot_list.append({"reg": reg, "sys": sys.replace('SR1_', '').replace('SR2_', ''),
                              "up": lambda x: dbs_fit(x, *popt_up), "down": lambda x: dbs_fit(x, *popt_down)})

In [None]:
dbs_fit_df = pd.DataFrame(dbs_plot_list)

In [None]:
dbs_fit_df

In [None]:
dbses = np.linspace(.00, 1.0, int((1.0-.00)/.1+1) )
#dbses = np.array([0.04, 0.5, 1.0])
dbses  = [0. , 0.04, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]

In [None]:
import re

In [None]:
interpolated_w_dbs = {}
for mass in interpolated:
    interpolated_w_dbs[mass] = {}
    interpolated_w_dbs[mass]['bins'] = interpolated[mass]['bins']
    for sys in interpolated[mass]:
        if sys=='bins': continue
        if re.match('SR[1-2]_nom', sys):
            interpolated_w_dbs[mass]["{}_{:.2f}".format(sys,0)] = interpolated[mass][sys]
        else:
            var = interpolated[mass][sys]
            updownkey = 'down' if 'down' in sys else 'up'
            # replace SR2 with SR1 because i do it above for consistency in naming
            tdfd = dbs_fit_df[(dbs_fit_df.reg==reg) & (dbs_fit_df.sys.apply(lambda x: x in sys.replace('SR2','SR1')))]
            function = tdfd[updownkey].iloc[0]
            for dbs in dbses:
                interpolated_w_dbs[mass]["{}_{:.2f}".format(sys,dbs)] = var*function(dbs)

In [None]:
fig, ax = plt.subplots(1)
reg ='SR1_nom'
for mass in interpolated:
    ax.plot(interpolated[mass]['bins'].bin_edges,
            interpolated[mass][reg]/np.sum(interpolated[mass][reg]))
    
cms_format_fig(era, ax)

ax.set_xlabel(mll+ ' [GeV]')

ax.set_ylabel('Count')
fig.savefig('{}/output/all_masses_{}_{}.pdf'.format(outdir, era,reg))

In [None]:
fig, ax = plt.subplots(1)
reg ='SR2_nom'
for mass in interpolated:
    ax.plot(interpolated[mass]['bins'].bin_edges,
            interpolated[mass][reg]/np.sum(interpolated[mass][reg]))
    
cms_format_fig(era, ax)

ax.set_xlabel(mll+ ' [GeV]')

ax.set_ylabel('Count')
fig.savefig('{}/output/all_masses_{}_{}.pdf'.format(outdir, era,reg))

In [None]:
import pickle

In [None]:
with open('{}/data/{}_bff_interp.pkl'.format(outdir, era), 'wb') as f:
    pickle.dump(interpolated_w_dbs, f)

In [None]:
import sys
sys.getsizeof(interpolated_w_dbs)

In [None]:
# test interpolation

In [None]:
from scipy import interpolate
def make_interpolation(df):
    masses = df.mass.unique()
    print('using masses:', masses)
    bins = df.iloc[0]['standardized_bins']
    bin_centers = bins.calc_bin_centers()
    z = np.stack(df.fit.apply(lambda x: x.nominal).values, axis=0)
    #return (bin_centers, masses, z)
    print(np.shape(bin_centers), np.shape(masses), np.shape(z))
    return interpolate.interp2d(bin_centers, masses, z)

In [None]:
for reg in ['SR1', 'SR2']:
    tdf = hist_df[(hist_df.reg==reg) & (hist_df.dbs==0.04)]
    
    iterpolator = make_interpolation(tdf.iloc[::2])
    
    #make intermediate mass plots:
    masses = tdf.iloc[1:-2:2].mass
    masses = masses[masses<=350]
    fig, axs = nratio_plot_template(nPlots=[len(masses), 1], figsize=(40,15))
    for mass, ax in zip(masses, axs):
        top, bottom = ax[0]
        bin_centers = tdf.iloc[0]['standardized_bins'].calc_bin_centers()
        hist = tdf[tdf.mass==mass].iloc[0]['hist']
        fit = tdf[tdf.mass==mass].iloc[0]['fit']
        
        z_interp = iterpolator(bin_centers, [mass])
    
        #top
        hist.draw(top, label='{} GeV'.format(mass))
        mean, sigma = mean_model(mass), sigma_model(mass)
        scaled_bin_centers = bin_centers*sigma+mean
        top.plot(scaled_bin_centers,  z_interp, ds='steps-mid', label='interpolation', color='red', zorder=3)
        #bottom
        (hist*(1./z_interp)).draw(bottom)
        bottom.plot(scaled_bin_centers, np.full(len(scaled_bin_centers), 1) , color='black', zorder=3)
        bottom.plot(scaled_bin_centers, np.full(len(scaled_bin_centers), 1.1) , color='black', zorder=3, ls=':')
        bottom.plot(scaled_bin_centers, np.full(len(scaled_bin_centers), .9) , color='black', zorder=3, ls=':')
        
        cms_format_fig(era, top)
        top.legend()
        bottom.set_xlabel(mll + ' [GeV]')
        top.set_ylabel('counts')
        bottom.set_ylabel('ratio')
        bottom.set_ylim(0,2)
    fig.savefig('{}/sig_interpolation/{}_{}_interp_test.pdf'.format(outdir, era, reg))

In [None]:
end document

In [None]:

row = hist_df.iloc[3]
hist = row['hist']
mass = row.mass
reg = row.reg


#for key, (sysup, sysdown) in hist.sys.items():
#    plt.plot(bin_centers, nominal)
#    plt.plot(bin_centers, (sysup+nominal))
#    plt.plot(bin_centers, (sysdown+nominal))
#    fit__sys_up = double_crystalball(bins.calc_bin_centers(), 1.0, fit_param.mu, fit_param.sigma, 
#                       fit_param.alphal, fit_param.nl,
#                       fit_param.alphar, fit_param.nr)
#    plt.title(key.replace('_', ' '))
#    plt.show()
#    plt.clf()

In [None]:
import matplotlib.pyplot as plt

In [None]:
##
## model sys
##

In [None]:
def get_sys(row):
    return row['hist'].sys_pers()

In [None]:
sys_dict = pd.DataFrame(hist_df.apply(get_sys, axis=1).to_list())

In [None]:
hist_df = pd.concat([hist_df,sys_dict], axis=1)

In [None]:
hist_df[['mass', 'dbs', 'reg',  'Weight_ISRFSR_Comb']]

In [None]:
##
## testing 175 gev
##

In [None]:
pwd = '/eos/cms/store/group/phys_exotica/bffZprime/private_samples/signal/2018_NanoAODv6/ZprimeToMuMu175_2018/ZprimeToMuMu175_2018.root'
pwd0p05 = '/eos/cms/store/group/phys_exotica/bffZprime/private_samples/signal/2018_NanoAODv6/ZprimeToMuMu175dbs0p5_2018/ZprimeToMuMu175dbs0p5_2018.root'

In [None]:
import uproot as upr

In [None]:
upf = upr.open(pwd)['Events']

In [None]:
upf0p05 = upr.open(pwd0p05)['Events']

In [None]:
upf.arrays(['LHEScaleWeight'], library='pd').std()

In [None]:
upf0p05.arrays(['LHEScaleWeight'], library='pd').std()

In [None]:
hist_df.columns

In [None]:
def exp_poly(x, *popt):
    y_sum = x*0
    for i, p in enumerate(popt):
        y_sum+= p*x**(-i)
    return y_sum
    return np.log(y_sum)

In [None]:
def get_reg_dbs(reg, dbs, sys_key='sys', _hist_df=hist_df):
    _tdf = _hist_df[(_hist_df.reg==reg) & (_hist_df.dbs==dbs) ]
    return _tdf.mass, _tdf[sys_key]

def sys_func(x, p_dbs, *popt):
    dbs = x.dbs
    mass = x.mass
    y =  exp_poly(mass, *popt)
    y = y*(1+p_dbs*dbs**.5)
    return y

def make_sys_plot(reg, sys_key='sys'):
    fig, ax = plt.subplots(1,1)
    mass_0p04, sys_0p04 = get_reg_dbs(reg, 0.04, sys_key=sys_key)
    plt.scatter(mass_0p04, sys_0p04, label='0.04')
    mass_0p5, sys_0p5 = get_reg_dbs(reg, 0.5, sys_key=sys_key)
    plt.scatter(mass_0p5, sys_0p5, label='0.5')
    mass_1p0, sys_1p0 = get_reg_dbs(reg, 1.0, sys_key=sys_key)
    plt.scatter(mass_1p0, sys_1p0, label='1.0')

    popt, pcov = curve_fit(sys_func, hist_df[(hist_df.reg==reg)], hist_df[(hist_df.reg==reg)][sys_key], 
                           p0=[1, 1, 1, 1, 1] , maxfev=int(1e4))
    
    def make_df_space(dbs, *popt):
        return pd.DataFrame([{'mass': mass, 'dbs': dbs} for mass in np.linspace(*popt)])
    x = make_df_space(0.04, 125,500, 100)
    plt.plot(x.mass, sys_func(x, *popt))
    x = make_df_space(0.5, 125,500, 100)
    plt.plot(x.mass, sys_func(x, *popt))
    x = make_df_space(1, 125,500, 100)
    plt.plot(x.mass, sys_func(x, *popt))
    cms_format_fig(era, ax)
    ax.set_xlabel('$m_{\ell\ell}$ [GeV]')
    ax.set_ylabel('Systematic/Nominal')
    ax.legend(title="{}: {}".format(reg, sys_key).replace("_", " "))
    print(popt)
    return lambda x: sys_func(x, *popt), fig
    

In [None]:
#visualize difference between min/max dbs points

In [None]:
def make_delta_plot(reg, sys_key):
    mass_1p0, sys_1p0 = get_reg_dbs(reg, 1.0, sys_key=sys_key)
    mass_0p04, sys_0p04 = get_reg_dbs(reg, 0.04, sys_key=sys_key, _hist_df = hist_df[hist_df.mass.isin(mass_1p0)])
    plt.plot(mass_1p0, (sys_1p0.to_numpy()-sys_0p04.to_numpy()))
    plt.title('{}: {}'.format(reg, sys_key.replace('_', ' ')))
    plt.show()
    return mass_1p0, (sys_1p0.to_numpy()-sys_0p04.to_numpy())


In [None]:
hist_df

In [None]:
sr1_sys_func, sr1_fig = make_sys_plot('SR1', sys_key = 'tot')
sr1_fig.savefig('output/sys_total/{}_{}.png'.format(era, 'SR1'))

In [None]:
sr2_sys_func, sr2_fig = make_sys_plot('SR2', sys_key = 'tot')
sr2_fig.savefig('output/sys_total/{}_{}.png'.format(era, 'SR2'))

In [None]:
sys_list = ['{}_jet_jesTotalComb_muon_corrected_pt_ele_pt',
       '{}_jet_nom_muon_correctedComb_pt_ele_pt',
       '{}_jet_jerComb_muon_corrected_pt_ele_pt', 'Weight_PuComb',
       'Weight_BTagComb', 'Weight_PUIDComb', 'Weight_PDF_Comb',
       'Weight_ISRFSR_Comb', 'Weight_MuonSFComb', 'Weight_ElectronSFComb',
       'Weight_L1Comb', 'tot']

In [None]:
#plot all the sys

In [None]:
sys_list = ['{}_jet_jesTotalComb_muon_corrected_pt_ele_pt',
       '{}_jet_nom_muon_correctedComb_pt_ele_pt',
       '{}_jet_jerComb_muon_corrected_pt_ele_pt', 'Weight_PuComb',
       'Weight_BTagComb', 'Weight_PUIDComb', 'Weight_PDF_Comb',
       'Weight_ISRFSR_Comb', 'Weight_MuonSFComb', 'Weight_ElectronSFComb',
        'tot']

In [None]:
function_dict = {}
for sys in sys_list:
    function_dict[sys] = {}
    for reg in ['SR1', 'SR2']:    
        _sys = sys.format(reg)
        _sys_func, _sys_fig = make_sys_plot(reg, sys_key = _sys)
        function_dict[sys][reg] = _sys_func
        _sys_fig.savefig('output/sys_total/{}_{}_{}.png'.format(era, reg, sys))

In [None]:
sr1_sys_func = make_sys_plot('SR1', sys_key = 'Weight_MuonSFComb')

In [None]:
sr1_sys_func = make_sys_plot('SR1', sys_key = 'tot')

In [None]:
sr1_sys_func = make_sys_plot('SR2', sys_key = 'Weight_ISRFSR_Comb')

In [None]:
sr1_sys_func = make_sys_plot('SR2', sys_key = 'tot')

In [None]:
from scipy import interpolate
def make_interpolation(df):
    masses = df.mass.unique()
    print('using masses:', masses)
    bins = df.iloc[0]['standardized_bins']
    bin_centers = bins.calc_bin_centers()
    z = np.stack(df.fit.apply(lambda x: x.nominal).values, axis=0)
    #return (bin_centers, masses, z)
    print(np.shape(bin_centers), np.shape(masses), np.shape(z))
    return interpolate.interp2d(bin_centers, masses, z)
for reg in ['SR1', 'SR2']:
    tdf = hist_df[(hist_df.reg==reg) & (hist_df.dbs==0.04)]
    
    iterpolator = make_interpolation(tdf.iloc[::2])
    
    #make intermediate mass plots:
    masses = tdf.iloc[1:-2:2].mass
    fig, axs = nratio_plot_template(nPlots=[len(masses), 1], figsize=(40,15))
    for mass, ax in zip(masses, axs):
        top, bottom = ax[0]
        bin_centers = tdf.iloc[0]['standardized_bins'].calc_bin_centers()
        hist = tdf[tdf.mass==mass].iloc[0]['hist']
        fit = tdf[tdf.mass==mass].iloc[0]['fit']
        
        z_interp = iterpolator(bin_centers, [mass])
    
        #top
        hist.draw(top, label='{} GeV'.format(mass))
        mean, sigma = mean_model(mass), sigma_model(mass)
        scaled_bin_centers = bin_centers*sigma+mean
        top.plot(scaled_bin_centers,  z_interp, ds='steps-mid', label='interpolation', color='red', zorder=3)
        #bottom
        (hist*(1./z_interp)).draw(bottom)
        bottom.plot(scaled_bin_centers, np.full(len(scaled_bin_centers), 1) , color='black', zorder=3)
        bottom.plot(scaled_bin_centers, np.full(len(scaled_bin_centers), 1.1) , color='black', zorder=3, ls=':')
        bottom.plot(scaled_bin_centers, np.full(len(scaled_bin_centers), .9) , color='black', zorder=3, ls=':')
        
        cms_format_fig(era, top)
        top.legend()
        bottom.set_xlabel(mll + ' [GeV]')
        top.set_ylabel('counts')
        bottom.set_ylabel('ratio')
        bottom.set_ylim(0,2)
    #fig.savefig('{}/sig_interpolation/{}_{}_interp_test.pdf'.format(outdir, era, reg))