In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit

In [None]:
import uproot as upr
upf = upr.open('/eos/cms/store/group/phys_exotica/bffZprime/nanoAODskimmed/crab_bffv2/2017/BFF_150_dbs0p5_deepflavour_bffv2/221019_070838/0000/tree_1.root')

In [None]:
uparr = upf['Events'].arrays(['SR1_jet_nom_muon_correctedUp_pt_ele_pt'], library='pd')

In [None]:
uparr.sum()

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from src.plotting_tools.SysHist import SysHist, make_sys_hist_v2
from src.plotting_tools.latexAssets import mll
from src.plotting_tools.cms_format import cms_style, cms_format_fig
from src.plotting_tools.Bins import Bins
from src.plotting_tools.utils import ratio_plot_template, nratio_plot_template
from src.general.functions import make_bpoly

cms_style()

In [None]:
from src.general.functions import linear, make_bpoly, double_crystalball

In [None]:
from src.data_tools.get_data import get_data

In [None]:
era = '2017'
_df, lumi = get_data(era, df_filter=lambda x: x.type=='sig', verbose=1)

In [None]:
def get_puid_stats(df, weight):
    tdf = df[(df.SR2_jet_nom_muon_corrected_pt_ele_pt==1)]
    #tdf = df
    #return (tdf['{}Up'.format(weight)].mean())/tdf['Weight'].mean()
    return (tdf['{}Up'.format(weight)]).mean()/(tdf.Weight).mean()

In [None]:
df = _df

In [None]:
df[df.mass==350]

In [None]:
fit_params = pd.read_csv('fits/{}_signal_fits_params.csv'.format(era))

In [None]:
def make_model(x, y, func, ax=0, **kwargs):
    popt, pcov = curve_fit(func, x, y, **kwargs)
    def model(x):
        return func(x, *popt)
    if ax:
        ax.scatter(x,y, label='data (1+2 jet SR)')
        x_centers = np.linspace(min(x), max(x), len(y)*20)
        ax.plot(x_centers, model(x_centers), label='fit')
    return model

In [None]:
def make_mean_sigma_model():
    fig, (mean_ax, sigma_ax) = plt.subplots(1,2, figsize = (20, 10))
    
    mean_model = make_model(fit_params.mass, fit_params.mu, linear, ax=mean_ax)
    sigma_model = make_model(fit_params.mass, fit_params.sigma, make_bpoly, ax=sigma_ax,
                            p0=[1,1, 1, 1])
    
    cms_format_fig(era, mean_ax)
    cms_format_fig(era, sigma_ax)
    
    mean_ax.set_xlabel(mll+ ' [GeV]')
    sigma_ax.set_xlabel(mll+ ' [GeV]')
    
    mean_ax.set_ylabel('mean [GeV]')
    sigma_ax.set_ylabel('sigma [GeV]') 
    
    mean_ax.legend()
    fig.savefig('assets_exo_jul_22/sig_interpolation/{}_mean_sigma_fit.pdf'.format(era))
    return mean_model, sigma_model

In [None]:
mean_model, sigma_model = make_mean_sigma_model()

In [None]:
#make signal hist
hist_list = []
for reg in ['SR1', 'SR2']:
    for mass in df.mass.unique():
        mdf = df[df.mass==mass]
        for dbs in mdf.dbs.unique():
            dbsdf = mdf[mdf.dbs==dbs]
            if dbsdf.size==0: continue
            #if mass != 300: continue
            print(mass, dbs, reg)
            mean, sigma = mean_model(mass), sigma_model(mass)
            nSigma = 5
            bins = Bins(np.linspace(mean-sigma*nSigma, mean+sigma*nSigma, 20))
            standardized_bins = Bins(np.linspace(-nSigma, nSigma, 20))
            #make hist
            hist =  make_sys_hist_v2( dbsdf, 'DiLepMass', reg, bin_edges=bins.bin_edges).normalize()
            print(hist.sys_string())
            #make fit hist
            fit_param = fit_params[(fit_params.mass==mass) & (fit_params.region==reg)].iloc[0]
            fit_y = double_crystalball(bins.calc_bin_centers(), 1.0, fit_param.mu, fit_param.sigma, 
                       fit_param.alphal, fit_param.nl,
                       fit_param.alphar, fit_param.nr)
            fit_hist = SysHist(fit_y, fit_y*0, fit_y*0, fit_y*0, bins.bin_edges).normalize()
            hist_list.append({
                'fit': fit_hist,
                'hist': hist,
                'mass': mass,
                'dbs': dbs,
                'reg': reg,
                'standardized_bins': standardized_bins
            })

hist_df = pd.DataFrame(hist_list)

In [None]:
hist = hist_df.iloc[3]['hist']
bin_centers = hist.bins.calc_bin_centers()
nominal = hist.nominal
for key, (sysup, sysdown) in hist.sys.items():
    plt.plot(bin_centers, nominal)
    plt.plot(bin_centers, (sysup+nominal))
    plt.plot(bin_centers, (sysdown+nominal))
    
    plt.title(key.replace('_', ' '))
    plt.show()
    plt.clf()

In [None]:
import matplotlib.pyplot as plt

In [None]:
##
## model sys
##

In [None]:
def get_sys(row):
    return row['hist'].sys_pers()

In [None]:
sys_dict = pd.DataFrame(hist_df.apply(get_sys, axis=1).to_list())

In [None]:
hist_df = pd.concat([hist_df,sys_dict], axis=1)

In [None]:
hist_df[['mass', 'dbs', 'reg',  'Weight_ISRFSR_Comb']]

In [None]:
##
## testing 175 gev
##

In [None]:
pwd = '/eos/cms/store/group/phys_exotica/bffZprime/private_samples/signal/2018_NanoAODv6/ZprimeToMuMu175_2018/ZprimeToMuMu175_2018.root'
pwd0p05 = '/eos/cms/store/group/phys_exotica/bffZprime/private_samples/signal/2018_NanoAODv6/ZprimeToMuMu175dbs0p5_2018/ZprimeToMuMu175dbs0p5_2018.root'

In [None]:
import uproot as upr

In [None]:
upf = upr.open(pwd)['Events']

In [None]:
upf0p05 = upr.open(pwd0p05)['Events']

In [None]:
upf.arrays(['LHEScaleWeight'], library='pd').std()

In [None]:
upf0p05.arrays(['LHEScaleWeight'], library='pd').std()

In [None]:
hist_df.columns

In [None]:
def exp_poly(x, *popt):
    y_sum = x*0
    for i, p in enumerate(popt):
        y_sum+= p*x**(-i)
    return y_sum
    return np.log(y_sum)

In [None]:
def get_reg_dbs(reg, dbs, sys_key='sys', _hist_df=hist_df):
    _tdf = _hist_df[(_hist_df.reg==reg) & (_hist_df.dbs==dbs) ]
    return _tdf.mass, _tdf[sys_key]

def sys_func(x, p_dbs, *popt):
    dbs = x.dbs
    mass = x.mass
    y =  exp_poly(mass, *popt)
    y = y*(1+p_dbs*dbs**.5)
    return y

def make_sys_plot(reg, sys_key='sys'):
    fig, ax = plt.subplots(1,1)
    mass_0p04, sys_0p04 = get_reg_dbs(reg, 0.04, sys_key=sys_key)
    plt.scatter(mass_0p04, sys_0p04, label='0.04')
    mass_0p5, sys_0p5 = get_reg_dbs(reg, 0.5, sys_key=sys_key)
    plt.scatter(mass_0p5, sys_0p5, label='0.5')
    mass_1p0, sys_1p0 = get_reg_dbs(reg, 1.0, sys_key=sys_key)
    plt.scatter(mass_1p0, sys_1p0, label='1.0')

    popt, pcov = curve_fit(sys_func, hist_df[(hist_df.reg==reg)], hist_df[(hist_df.reg==reg)][sys_key], 
                           p0=[1, 1, 1, 1, 1] , maxfev=int(1e4))
    
    def make_df_space(dbs, *popt):
        return pd.DataFrame([{'mass': mass, 'dbs': dbs} for mass in np.linspace(*popt)])
    x = make_df_space(0.04, 125,500, 100)
    plt.plot(x.mass, sys_func(x, *popt))
    x = make_df_space(0.5, 125,500, 100)
    plt.plot(x.mass, sys_func(x, *popt))
    x = make_df_space(1, 125,500, 100)
    plt.plot(x.mass, sys_func(x, *popt))
    cms_format_fig(era, ax)
    ax.set_xlabel('$m_{\ell\ell}$ [GeV]')
    ax.set_ylabel('Systematic/Nominal')
    ax.legend(title="{}: {}".format(reg, sys_key).replace("_", " "))
    print(popt)
    return lambda x: sys_func(x, *popt), fig
    

In [None]:
#visualize difference between min/max dbs points

In [None]:
def make_delta_plot(reg, sys_key):
    mass_1p0, sys_1p0 = get_reg_dbs(reg, 1.0, sys_key=sys_key)
    mass_0p04, sys_0p04 = get_reg_dbs(reg, 0.04, sys_key=sys_key, _hist_df = hist_df[hist_df.mass.isin(mass_1p0)])
    plt.plot(mass_1p0, (sys_1p0.to_numpy()-sys_0p04.to_numpy()))
    plt.title('{}: {}'.format(reg, sys_key.replace('_', ' ')))
    plt.show()
    return mass_1p0, (sys_1p0.to_numpy()-sys_0p04.to_numpy())


In [None]:
hist_df

In [None]:
sr1_sys_func, sr1_fig = make_sys_plot('SR1', sys_key = 'tot')
sr1_fig.savefig('output/sys_total/{}_{}.png'.format(era, 'SR1'))

In [None]:
sr2_sys_func, sr2_fig = make_sys_plot('SR2', sys_key = 'tot')
sr2_fig.savefig('output/sys_total/{}_{}.png'.format(era, 'SR2'))

In [None]:
sys_list = ['{}_jet_jesTotalComb_muon_corrected_pt_ele_pt',
       '{}_jet_nom_muon_correctedComb_pt_ele_pt',
       '{}_jet_jerComb_muon_corrected_pt_ele_pt', 'Weight_PuComb',
       'Weight_BTagComb', 'Weight_PUIDComb', 'Weight_PDF_Comb',
       'Weight_ISRFSR_Comb', 'Weight_MuonSFComb', 'Weight_ElectronSFComb',
       'Weight_L1Comb', 'tot']

In [None]:
#plot all the sys

In [None]:
sys_list = ['{}_jet_jesTotalComb_muon_corrected_pt_ele_pt',
       '{}_jet_nom_muon_correctedComb_pt_ele_pt',
       '{}_jet_jerComb_muon_corrected_pt_ele_pt', 'Weight_PuComb',
       'Weight_BTagComb', 'Weight_PUIDComb', 'Weight_PDF_Comb',
       'Weight_ISRFSR_Comb', 'Weight_MuonSFComb', 'Weight_ElectronSFComb',
        'tot']

In [None]:
function_dict = {}
for sys in sys_list:
    function_dict[sys] = {}
    for reg in ['SR1', 'SR2']:    
        _sys = sys.format(reg)
        _sys_func, _sys_fig = make_sys_plot(reg, sys_key = _sys)
        function_dict[sys][reg] = _sys_func
        _sys_fig.savefig('output/sys_total/{}_{}_{}.png'.format(era, reg, sys))

In [None]:
sr1_sys_func = make_sys_plot('SR1', sys_key = 'Weight_MuonSFComb')

In [None]:
sr1_sys_func = make_sys_plot('SR1', sys_key = 'tot')

In [None]:
sr1_sys_func = make_sys_plot('SR2', sys_key = 'Weight_ISRFSR_Comb')

In [None]:
sr1_sys_func = make_sys_plot('SR2', sys_key = 'tot')

In [None]:
from scipy import interpolate

In [None]:
def make_interpolation(df):
    masses = df.mass.unique()
    print('using masses:', masses)
    bins = df.iloc[0]['standardized_bins']
    bin_centers = bins.calc_bin_centers()
    z = np.stack(df.fit.apply(lambda x: x.nominal).values, axis=0)
    #return (bin_centers, masses, z)
    return interpolate.interp2d(bin_centers, masses, z)

In [None]:
for reg in ['SR1', 'SR2']:
    tdf = hist_df[hist_df.reg==reg]
    
    iterpolator = make_interpolation(tdf.iloc[::2])
    
    #make intermediate mass plots:
    masses = tdf.iloc[1:-2:2].mass
    fig, axs = nratio_plot_template(nPlots=[len(masses), 1], figsize=(40,15))
    for mass, ax in zip(masses, axs):
        top, bottom = ax[0]
        bin_centers = tdf.iloc[0]['standardized_bins'].calc_bin_centers()
        hist = tdf[tdf.mass==mass].iloc[0]['hist']
        fit = tdf[tdf.mass==mass].iloc[0]['fit']
        
        z_interp = iterpolator(bin_centers, [mass])
    
        #top
        hist.draw(top, label='{} GeV'.format(mass))
        mean, sigma = mean_model(mass), sigma_model(mass)
        scaled_bin_centers = bin_centers*sigma+mean
        top.plot(scaled_bin_centers,  z_interp, ds='steps-mid', label='interpolation', color='red', zorder=3)
        #bottom
        (hist*(1./z_interp)).draw(bottom)
        bottom.plot(scaled_bin_centers, np.full(len(scaled_bin_centers), 1) , color='black', zorder=3)
        bottom.plot(scaled_bin_centers, np.full(len(scaled_bin_centers), 1.1) , color='black', zorder=3, ls=':')
        bottom.plot(scaled_bin_centers, np.full(len(scaled_bin_centers), .9) , color='black', zorder=3, ls=':')
        
        cms_format_fig(era, top)
        top.legend()
        bottom.set_xlabel(mll + ' [GeV]')
        top.set_ylabel('counts')
        bottom.set_ylabel('ratio')
        bottom.set_ylim(0,2)
    fig.savefig('assets_exo_jul_22/sig_interpolation/{}_{}_interp_test.pdf'.format(era, reg))