In [None]:
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import mplhep as hep
hep.style.use(hep.style.CMS)
plt.rcParams.update({
    "text.usetex": True,
})

In [None]:
from plotting_meta.plotting_meta import cms_format_fig, color_cycle, lumi_dict, BS_BS_y, trident_y, lighten_color
from bff_processor.plotting_utils import color_fader
from limit_utils.utils import *

In [None]:
# csv era which we get the sample data from
era = '16-18'
xsec_df = pd.read_csv('fits/limit_setting/xsec_fits_after_6_{}.csv'.format(era))

In [None]:
####
## Now, produce era specific plots
####
lera = 2018
era_sel = xsec_df.era==lera
lxsec_df = xsec_df[xsec_df.era==lera]
lera = str(lera)

In [None]:
def poln(x, *params):
    x = x.astype('float')
    y = x*0.
    for i, par in enumerate(params):
        y += np.power(x,i)*par
    return y

def logisitic(x, c1, c2):
    return 1/(1+10**(-(x-c1)/c2))

def dbs_func(x, c1, c2):
    return c1*(1+c2*x**.5)

def dbs_acceptance_fit(x, c1, c2, c3, c4, c5, *params):
    #y = poln(x.mass, *params)
    y = logisitic(x.mass, *params)
    yp = c3 + c4*y + c5*y**2
    return yp*dbs_func(x.dbs, c1, c2)

In [None]:
def make_mass_x_df(dbs, nSamples = 100):
    dbs_gb_df = pd.DataFrame()
    dbs_gb_df['mass'] = np.linspace(125,500, nSamples)
    dbs_gb_df['dbs'] = np.full(nSamples, dbs)
    return dbs_gb_df

In [None]:
# computes the best fit acceptance for lera
acceptance_dict = {}
for reg in ['SR1','SR2', 'SRX']:
    tdf = xsec_df[era_sel & (xsec_df.acceptance_SR1 < .2) & (xsec_df.acceptance_SR2 < .1)]
    popt_acc, _ = curve_fit(dbs_acceptance_fit, tdf,tdf['acceptance_{}'.format(reg)], p0=[.1, .3, 0, 1, 0, 180,100], maxfev = int(2e5))
    acceptance_dict[reg] = popt_acc

In [None]:
def compute_acceptance(x_df, reg):
    '''From region, df, use previous fit params to compute acceptance'''
    popt_acc = acceptance_dict[reg]
    return dbs_acceptance_fit(x_df,*popt_acc)

In [None]:
# make acceptance plot
fig, axs = plt.subplots(3,1, figsize=[10,10])
for i, (reg, popt_acc) in enumerate(acceptance_dict.items()):
    ax = axs[i]
    acceptance_string = 'acceptance_{}'.format(reg)
    acceptance_uc_string = 'acceptance_unc_{}'.format(reg)
    colors = [ color_fader('blue', 'red', mix=float(i)/3) for i in range(3)]
    for dbs, c in zip([0.04, 0.5, 1.0],colors):
        tdf = xsec_df[era_sel & (xsec_df.dbs==dbs)]
        x = tdf.mass
        y = tdf[acceptance_string]
        yerr = tdf[acceptance_uc_string]
        x_df = make_mass_x_df(dbs)
        yacc = compute_acceptance(x_df, reg)
        ax.plot(x_df.mass, yacc, color=c)
        ax.errorbar(x,y, yerr=yerr, label='$\delta_{{bs}}={}$'.format(dbs), linestyle='', marker='.', color=c)
        ax.text(290,1.1e-3, reg, fontsize=15) 
        if reg=='SR1':
            ax.set_ylim(1e-3,3e-1)
        if reg=='SRX':
            ax.set_ylim(1e-3,3e-1)
        else: 
            ax.set_ylim(1e-3,3e-1)
    #plt.yscale('log')
    ax.set_ylabel('acceptance')
    
    ax.set_yscale('log')
axs[1].set_xlabel('DiLepMass [Gev]')
cms_format_fig(lera, axs[0])
plt.legend()
fig.savefig('fits/limit_setting/acceptance_{}.png'.format(lera))

In [None]:
#added computed acceptance to xsec_df
for i, (reg, popt_acc) in enumerate(acceptance_dict.items()):
    acc_string = 'comp_acceptance_{}'.format(reg)
    weight_string = 'comp_weight_{}'.format(reg)
    xsec_df.loc[era_sel, acc_string] = xsec_df[era_sel].apply(lambda x: dbs_acceptance_fit(x,*popt_acc), axis=1)
    xsec_df.loc[era_sel, weight_string] = xsec_df[era_sel][acc_string]*xsec_df.lumi*xsec_df.comp_xsec

In [None]:
# not needed really, but making a linear fit to mass and gmu values from paper values to show validity of min gmu value for mass
gmu = [0.08, 0.14, 0.20]
mass = np.array([200,350,500])
def linear(x,c1,c2): 
    return x*c1+c2
popt_mass_gmu ,_ = curve_fit(linear, mass, gmu)
plt.plot(mass, gmu)
def mass_to_gmu(x): return linear(x, *popt_mass_gmu)
plt.plot(mass, mass_to_gmu(mass))

In [None]:
def calc_gm(row):
    constant = 1.3*1e-5
    return constant*(row.mass/100)**2/(row.gb*row.dbs)
def calc_gb(row):
    constant = 1.3*1e-5
    return constant*(row.mass/100)**2/(row.gmu*row.dbs)

In [None]:
import pickle
with open('fits/limit_setting/popt_c1.pkl', 'rb') as f:
    popt_c1 = pickle.load(f)
with open('fits/limit_setting/popt_k.pkl', 'rb') as f:
    popt_k = pickle.load(f)
def produce_xsec_model(popt_c1, popt_k):
    '''load xsec model using c1 and k'''
    return lambda x: compute_expected_xsec(x, popt_c1, popt_k)
xsec_model = produce_xsec_model(popt_c1, popt_k)

In [None]:
def compute_xsec(df):
    df['comp_total_xsec'] = xsec_model(df)
    df['branching_ratio'] = branching_ratio(df)
    df['comp_xsec'] = df['branching_ratio']*df['comp_total_xsec']
    for reg in ['SR1', 'SR2', 'SRX']:
        df['comp_acc_'+reg] = compute_acceptance(df, reg)
        df['comp_xsec_'+reg] = df['comp_xsec']*df['comp_acc_'+reg]
        for era in [2016, 2017, 2018]:
            lumi = lumi_dict[str(era)]
            df['comp_nevt_{}_{}'.format(reg,era)] = df['comp_xsec_'+reg]*lumi
    return df

In [None]:
def make_dbs_mass_df():
    mass = np.array([125,150,175,200,350,500])
    gmu = mass_to_gmu(mass)
    #gb = np.linspace(2e-4, 2e-2, int((2e-2-2e-4)/2e-4+1))
    dbs = np.linspace(1e-2, 1., int((1-1e-2)/(1e-2)+1))
    df =  pd.DataFrame([{"mass": m, "gmu": mass_to_gmu(m), "dbs": d} for m in mass for d in dbs])
    df['gb'] = df.apply(calc_gb, axis=1)
    return compute_xsec(df)
def make_gb_dbs_mass_df():
    mass = np.array([125,150,175,200,350,500])
    gmu = mass_to_gmu(mass)
    gb = np.linspace(2e-4, 2e-2, int((2e-2-2e-4)/2e-4+1))
    dbs = np.linspace(1e-2, 1., int((1-1e-2)/(1e-2)+1))
    df = pd.DataFrame([{"mass": m, "gmu": mass_to_gmu(m),"gb":b, "dbs": d} for m in mass for b in gb for d in dbs])
    return compute_xsec(df)

In [None]:
dbs_mass_df = make_dbs_mass_df()
gb_dbs_mass_df = make_gb_dbs_mass_df()

In [None]:
# plot change from dbs value to dbs value
for mass in  [125,150,175,200,350,500]:
    tdf = gb_dbs_mass_df[gb_dbs_mass_df.mass==mass].sort_values(['dbs'])
    y = (tdf['comp_nevt_SR1_2016']/tdf['comp_nevt_SR2_2016']).to_numpy()
    plt.plot(tdf.dbs, y, label=mass)
    print(y[-1]/y[0])
plt.ylabel('$\\frac{n_{SR1}}{n_{SR2}}$')
plt.xlabel('$\delta_{bs}$')
plt.legend()

In [None]:
#draw branching ratio plot
dbss = [.04,.5,1]
fig, axs = plt.subplots(1,len(dbss), figsize=[15,10])
for i, (dbs,ax) in enumerate(zip(dbss,axs)):
    for mass, c in zip([125,150,175,200,350,500],color_cycle):
            tdf = gb_dbs_mass_df[(gb_dbs_mass_df.mass==mass) &  (gb_dbs_mass_df.dbs==dbs)]
            tdf = tdf.sort_values(['gb', 'mass'])
            ax.plot(tdf.gb, tdf.branching_ratio, label=mass, color=c)
            ax.set_title('$\\delta_{{bs}}={}$'.format(dbs))
            ax.set_ylim(0, .7)
plt.legend()
axs[0].set_ylabel("$\\frac{\\sigma \\left( pp\\rightarrow Z'\\rightarrow\\mu\\mu\\right )}{\\sigma \\left( pp\\rightarrow Z'\\right )}$")
axs[2].set_xlabel('$g_b$')
fig.savefig('fits/limit_setting/branching_ratios.png')  

In [None]:
def dbs_mass_nevents_reg_to_gb(df,nevents, reg, lumi):
    acceptance = compute_acceptance(df, reg)
    mm_xsec = nevents/(acceptance*lumi)
    gb = gb_from_mm_xsec_gmu_dbs(mm_xsec,df)
    return gb

In [None]:
def gb_from_mm_xsec_gmu_dbs(mm, df): 
    c1, k = compute_c1(df.mass, popt_c1), compute_k(df.mass, popt_k)
    noom =  mm
    denom = 2/3*c1*mm*(1+k*df.dbs**2)-2*df.dbs**2 -1
    return noom/denom**.5

In [None]:
#def xsec_gmu_dbs_to_gb(x, xsec):
#    c1, k = compute_c1(tdf.mass), compute_k(tdf.mass)
#    return (xsec/(c1*(1 + k * x.dbs**2) * x.gmu**2))**.5
#def total_xsec_gmu_dbs_to_gb(x, xsec):
#    c1, k = compute_c1(tdf.mass), compute_k(tdf.mass)
#    return (xsec/(c1*(1 + k * x.dbs**2) ))**.5

In [None]:
lim_df = pd.read_csv('limits/limit_{}.csv'.format(lera))
comb_lim_df = pd.read_csv('limits/limit_201X.csv'.format(lera))
comb_lim_df.era='16-18'

In [None]:
def make_gb_dbs_df(reg, limit_level, lera, lim_df):
    selection = lim_df.nJets==reg
    tdf = lim_df[selection]
    gb = dbs_mass_nevents_reg_to_gb(lim_df,lim_df[limit_level], reg, lumi_dict[lera])
    lim_df.loc[selection, "{}_gb".format(limit_level)] = gb

In [None]:
# compute gb, dbs for the limits
for reg in ['SR1', 'SR2', 'SRX']:
    for level in ['16.0', '2.5', '50.0', '84.0', '97.5']:
        make_gb_dbs_df(reg, level, lera, lim_df)
        make_gb_dbs_df(reg, level, '16-18', comb_lim_df)

In [None]:
# prepare data frame with correct limits
lim_count_mean_df = pd.DataFrame()
for reg in ['SR1', 'SR2', 'SRX']:
    for mass in np.unique(lim_df.mass):
        tdf = lim_df[(lim_df.nJets==reg)&
               (lim_df.mass==mass)]
        means = tdf[['mass','16.0', '2.5', '50.0', '84.0', '97.5']].mean(axis=0)
        means['reg'] = reg
        lim_count_mean_df = lim_count_mean_df.append(means,ignore_index=True)

In [None]:
def make_mass_df(mass):
    gmu = mass_to_gmu(mass)
    #gb = np.linspace(2e-4, 2e-2, int((2e-2-2e-4)/2e-4+1))
    dbs = np.linspace(1e-4, 1., int((1-1e-4)/(1e-4)+1))
    df =  pd.DataFrame([{"mass": mass, "gmu": mass_to_gmu(mass), "dbs": d} for d in dbs])
    return df

In [None]:
#sigma[%] = exp[2ln(g_mu) + 1.381] + 0.00038

def width_to_gmu(width):
    return np.exp((np.log(width-0.00038)-1.381)/2)
def curve_of_const_gmu(zmass, gmu):
    dbs = np.linspace(1e-4, 1., int((1-1e-4)/(1e-2)+1))
    c9 = 1.3*1e-5
    #updated c9 value
    c9 =  6.5e-6
    gb = c9*(zmass/100)**2/(gmu*dbs)
    return gb, dbs

In [None]:
def make_gb_dbs_plot(ax, reg_df,masses=[125,350,500], duplicate=0, postfix = "", single_color="blue"):
    for k, (j, row) in enumerate(reg_df.iterrows()):
        if row.mass not in masses: continue
        dbs_mass_df =  make_mass_df(row.mass)
        def gb_from_lim_level(ll):
            return dbs_mass_nevents_reg_to_gb(dbs_mass_df,row[ll], row.reg, lumi_dict[lera])
        gbs = {ll: gb_from_lim_level(ll) for ll in ['16.0', '2.5', '50.0', '84.0', '97.5']}
        
        color = color_fader('blue', 'red', mix=float(k)/reg_df.shape[0])
        #color = color_fader('blue', 'purple', mix=float(k)/reg_df.shape[0])
        if len(masses)==1: 
            color=single_color
        #if duplicate: color = lighten_color(color, amount=.5)
        if len(masses)==1:
            ax.plot(gbs['50.0'], dbs_mass_df.dbs, label='{} exp. limit'.format(postfix), color=color, zorder=1)
        else: 
            ax.plot(gbs['50.0'], dbs_mass_df.dbs, label='{}{}'.format(int(row.mass), postfix), color=color, zorder=1)
        ax.fill_betweenx( dbs_mass_df.dbs,gbs['16.0'], gbs['84.0'], color=color, alpha=.1, zorder=1)
        #plot data
        def plot_data(dtype, **kwargs):
            tldf = lim_df[(lim_df.mass==row.mass) & (lim_df.nJets==reg)]
            ftldf = tldf[tldf.sig_type==dtype]
            xerror = (ftldf['50.0_gb']-ftldf['16.0_gb'],ftldf['84.0_gb']-ftldf['50.0_gb'])
            ax.errorbar(ftldf['50.0_gb'], ftldf.dbs, xerr = xerror, **kwargs, zorder=1)
        plot_data('fit', color=color,linestyle='', marker='o')
        plot_data('hist', color=color,linestyle='', marker='x')
        if reg=='SR1':
            ax.set_xlim(0,.025)
        else: 
            ax.set_xlim(0,0.02)
        ax.set_ylim(0,1.01)
        if not duplicate: 
            #color="green"
            #draw gmu width curve
            gb, dbs = curve_of_const_gmu(row.mass, width_to_gmu(1))
            ax.plot(gb, dbs, color='black',linestyle='-.', zorder=1, label='width $<$ 1\%')
            #draw BS-BS, neutrino triden
            gb = np.linspace(1e-8, 0.25,1000)
            bsbsy = BS_BS_y(row.mass, gb)
            ty = trident_y(row.mass, gb)
            ax.fill_between(gb,bsbsy,bsbsy+999, color='#c2c2c2', label=r'$B_s-\bar{B_s}$', zorder=0)
            ax.fill_between(gb,ty, color='#ffd7ff', label='$\\nu$ Trident', zorder=0)
    if not duplicate: 
        ax.set_xlim(0,0.02)
        #ax.plot(-10,-10, marker='', linestyle='-.', label='width $<$ 1\%', color='black')
        legend_opts = {
            "facecolor": 'white',
            "framealpha": 1,
            "frameon": True
        }
        if len(masses)==1:
            legend_opts["title"] = "$m_{{Z'}} = {}$ GeV".format(masses[0])
        legend = ax.legend( **legend_opts)
        ax.set_xlabel('$g_b$')
        ax.set_ylabel('$\\delta_{bs}$')
    cms_format_fig(str(lera), ax,  data=True, label="Work in Progress")

In [None]:
def draw_other_experiments(ax, mass):
    #this draws exclusion curves from other experiments
    #draw BS-BS, neutrino triden
    gb = np.linspace(1e-8, 0.25,1000)
    bsbsy = BS_BS_y(mass, gb)
    ty = trident_y(mass, gb)
    ax.fill_between(gb,bsbsy,bsbsy+999, color='#c2c2c2', label=r'$B_s-\bar{B_s}$', zorder=0)
    ax.fill_between(gb,ty, color='#ffd7ff', label='$\\nu$ Trident', zorder=0)
def draw_gmu_curve(ax, mass):
    #draw gmu width curve
    gb, dbs = curve_of_const_gmu(mass, width_to_gmu(1))
    ax.plot(gb, dbs, color='black',linestyle='-.', zorder=1, label='width $<$ 1\%')
def format_gb_gmu_plot(ax, mass=""):
    ax.set_ylim(0,1.01)
    ax.set_xlim(0,0.02)
    legend_opts = {
            "facecolor": 'white',
            "framealpha": 1,
            "frameon": True
        }
    if mass!="": legend_opts["title"] = "$m_{{Z'}} = {}$ GeV".format(mass)
    legend = ax.legend( **legend_opts)
    ax.set_xlabel('$g_b$')
    ax.set_ylabel('$\\delta_{bs}$')
    cms_format_fig(str(lera), ax,  data=True, label="Work in Progress")

In [None]:
def make_single_curve(ax, row, lim_df, lera, duplicate=0, postfix = "", color="blue"):
    mass = row.mass
    dbs_mass_df =  make_mass_df(mass)
    def gb_from_lim_level(ll):
        return dbs_mass_nevents_reg_to_gb(dbs_mass_df,row[ll], row.reg, lumi_dict[lera])
    gbs = {ll: gb_from_lim_level(ll) for ll in ['16.0', '2.5', '50.0', '84.0', '97.5']}
    ax.plot(gbs['50.0'], dbs_mass_df.dbs, label='{} exp. limit'.format(postfix), color=color, zorder=1)
    ax.fill_betweenx( dbs_mass_df.dbs,gbs['16.0'], gbs['84.0'], color=color, alpha=.1, zorder=1)
    #def plot data
    def plot_data(dtype, **kwargs):
            tldf = lim_df[(lim_df.mass==row.mass) & (lim_df.nJets==reg)]
            ftldf = tldf[tldf.sig_type==dtype]
            xerror = (ftldf['50.0_gb']-ftldf['16.0_gb'],ftldf['84.0_gb']-ftldf['50.0_gb'])
            ax.errorbar(ftldf['50.0_gb'], ftldf.dbs, xerr = xerror, **kwargs, zorder=1)
    plot_data('fit', color=color,linestyle='', marker='o')

In [None]:
lim_count_mean_df.loc[lim_count_mean_df.mass==201, "mass"] = 200
dbs_mass_df.loc[dbs_mass_df.mass==201, "mass"] = 200
lim_df.loc[lim_df.mass==201, "mass"] = 200
comb_lim_df.loc[comb_lim_df.mass==201, "mass"] = 200
comb_lim_df = comb_lim_df.dropna()
comb_lim_df['reg'] = comb_lim_df.nJets

In [None]:
fig, ax = plt.subplots(1,1, figsize=[10,10], frameon=True)
mass = 200
draw_other_experiments(ax, mass)
draw_gmu_curve(ax, mass)
for i, (reg) in enumerate(['SR1', 'SR2', 'SRX']):
    reg_df = lim_count_mean_df[lim_count_mean_df.reg==reg]
    tdf = reg_df[reg_df.mass==mass].dropna()
    row = tdf.iloc[-1]
    if reg=='SR1':
        make_single_curve(ax, row, lim_df, lera, duplicate=1, postfix=" {}".format(reg), color="blue")
    elif reg=='SR2': 
        make_single_curve(ax, row, lim_df, lera, duplicate=1, postfix=" {}".format(reg), color="red")
    elif reg=='SRX': 
        make_single_curve(ax, row, lim_df, lera, duplicate=1, postfix=" {}".format("Comb."), color="green")
#combine all years
tdf = comb_lim_df[comb_lim_df.mass==mass]
row = tdf.iloc[-1]
make_single_curve(ax, row, comb_lim_df, '16-18', duplicate=1, postfix=" {}".format("Comb. 16, 17"), color="purple")
format_gb_gmu_plot(ax, mass=mass)        
fig.savefig('fits/limit_setting/gb_dbs_lim_200_{}.pdf'.format(lera))  