In [None]:
import pandas as pd
import glob 
import matplotlib.pyplot as plt
import re
from src.plotting_tools.cms_format import cms_format_fig, cms_style
cms_style()

from src.assets.lumi import lumi_dict
from src.assets.shape_scaling import scale

In [None]:
era = '201X'
lumi_fraction = lumi_dict[str(era)]/lumi_dict['201X']
scale = scale*lumi_fraction
from src.assets.output_dir import output_dir
outdir = output_dir
outdir

In [None]:
scale

In [None]:
filestring = '{}/combine_data/model_ind/out/{}/*.out'.format(output_dir, era)
#filestring ='/afs/cern.ch/work/r/rymuelle/public/nanoAODzPrime/CMSSW_12_1_0/src/bff_plotting_code_v3/model_ind/out/{}/*.out'.format( era)
files = glob.glob(filestring)
files

In [None]:
multiplicities = ['0b(1b)', '0b(2b)', '1b', '1s', '1b+1s', '2b', '2s', 'shape']
def getmultiplicity(text):
    selected_mul = "unkown"
    for mul in multiplicities:
        tmul = mul.replace('(', '-').replace(')', '-')
        tmul = '_{}.'.format(tmul)
        if tmul in text: 
            selected_mul=mul
    print(selected_mul, text)
    return selected_mul

In [None]:
def get_data_from_file(f):
    try:
        nJets = re.findall(r'SR[0-9]', f)[0]
    except:
        nJets = -1
    mass = float((re.findall(r'_(\d{3}|\d{3}.\d)_', f)[0]))
    try:
        dbs = re.findall(r'_(\d)p(\d+)_', f)[0]
    except:
        dbs = ['0','0']
    try:
        reg = re.findall(r'_(SR[0-9])_', f)[0]
    except:
        reg = 'Comb.'
    
    dbs = float(('{}.{}'.format(*dbs)))
    era = ((re.findall(r'(201.)',f)[0]))
    condor_run = int(re.findall('txt([0-9]+)\.',f)[0])
    multiplicity = getmultiplicity(f)
    with open(f, 'r') as _f:
        r = _f.read()
        limits = re.findall(r'Expected +(\d+.\d+)%: r < (\d+.\d+)',r)
        limits = {float(x): float(y) for (x,y) in limits}
        obs = re.findall(r'Observed Limit: r < (\d+.\d+)',r)
        if len(obs)==1: obs = float(obs[0])
        else: obs = 0
    return { "condor_run": condor_run, "nJets":nJets, "reg":reg, "multiplicity":multiplicity,
            "mass":mass, "dbs":dbs, "era": era, "obs": obs, **limits}

In [None]:
df = pd.DataFrame(list(map(get_data_from_file, files)))

In [None]:
# only keep latest run
max_run = max(df.condor_run.unique())
df = df[df.condor_run==max_run]
max_run, df.era.unique()

In [None]:
import matplotlib.pyplot as plt
# confirms observed is as expected (only for synthetic data)
plt.hist(df.obs/df[50.0])

In [None]:
#This is for style only
era = era if era != '201X' else 'Run 2'

In [None]:
df = df[~ df.mass.isin([125, 150,175,200, 250, 300, 350])]

In [None]:
df = df[df.reg!='Comb.']

In [None]:
df = df.dropna()

In [None]:
for reg in ['SR1', 'SR2']:
    fig, ax = plt.subplots()
    cms_format_fig(era, ax, "\emph{Preliminary}")
    ax.set_ylim(1,10000)
    ax.set_yscale('log')
    ax.set_ylabel(' $\sigma$ [fb]')
    ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')
    for fs_type in multiplicities:
        if fs_type in ['2s', 'shape']: continue
        #if fs_type=='2s': continue
        tdf = df[(df.reg==reg) & (df.multiplicity==fs_type)]
        tdf = tdf.sort_values("mass")
        err = (tdf[50.0]-tdf[16.0])*scale/lumi_dict[era]
        #print(fs_type, tdf[50.0]*scale/lumi_dict['Run 2'])
        ax.errorbar(tdf.mass, tdf[50.0]*scale/lumi_dict[era], label=fs_type)
        
        obs = tdf.obs*scale/lumi_dict[era]
        ax.plot(tdf.mass, obs, color='black', ls='-', marker='o')
            
        plt.fill_between(tdf.mass, tdf[16]*scale/lumi_dict[era], tdf[84]*scale/lumi_dict[era], alpha=.2)
    ax.legend()
    fig.savefig('{}/gen_b_s/{}_limit_{}.pdf'.format(outdir, reg, era))
    

In [None]:
'{}/gen_b_s/{}_limit.pdf'.format(outdir, reg)

In [None]:
for reg in [ 'SR1', 'SR2']:
    fig, ax = plt.subplots()
    cms_format_fig(era, ax, "\emph{Preliminary}")
    ax.set_ylim(1e-2,1e3)
    ax.set_yscale('log')
    ax.set_ylabel("$\sigma \\cdot Br(Z' \\rightarrow  \mu\mu)  \\cdot A$ [fb]")
    ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')
    tdf = df[(df.reg==reg) & (df.multiplicity=='shape')]
    tdf = tdf.sort_values("mass")
    err = (tdf[50.0]-tdf[16.0])*scale/lumi_dict[era]
    values = tdf[[2.5, 16.0, 50.0, 84.0, 97.5]]*scale/lumi_dict[era]
    obs = tdf.obs*scale/lumi_dict[era]
    plt.plot(tdf.mass, values[50], color='black', ls=':')
    plt.plot(tdf.mass, obs, color='black', ls='-', marker='o', label='Obs.')
    plt.fill_between(tdf.mass, values[2.5], values[97.5], color='#fffe59')
    plt.fill_between(tdf.mass, values[16], values[84], color='#a3fb56')
    plt.legend()
    fig.savefig('{}/gen_b_s/{}_shape_limit_{}.pdf'.format(outdir, reg, era))
    

In [None]:
for reg in [ 'SR1', 'SR2']:
    fig, ax = plt.subplots()
    cms_format_fig(era, ax, "\emph{Preliminary}")
    ax.set_ylim(0,300)
    #ax.set_yscale('log')
    ax.set_ylabel("95\% CL upper limit on number of events?")
    ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')
    tdf = df[(df.reg==reg) & (df.multiplicity=='shape')]
    tdf = tdf.sort_values("mass")
    err = (tdf[50.0]-tdf[16.0])*scale
    values = tdf[[2.5, 16.0, 50.0, 84.0, 97.5]]*scale
    obs = tdf.obs*scale
    plt.plot(tdf.mass, values[50], color='black', ls=':')
    plt.plot(tdf.mass, obs, color='black', ls='-', marker='o', label='Obs.')
    plt.fill_between(tdf.mass, values[2.5], values[97.5], color='#fffe59')
    plt.fill_between(tdf.mass, values[16], values[84], color='#a3fb56')
    plt.legend()
    fig.savefig('{}/gen_b_s/{}_shape_limit_counts_{}.pdf'.format(outdir, reg, era))
    

In [None]:
'{}/gen_b_s/{}_shape_limit.pdf'.format(outdir, reg)

In [None]:
break

In [None]:
for reg in ['Comb.']:
    fig, ax = plt.subplots()
    cms_format_fig('Run 2', ax)
    ax.set_ylim(0,100)
    #ax.set_yscale('log')
    ax.set_ylabel('x-section [fb]')
    ax.set_xlabel('$m_{Z^{\prime}}$ [GeV]')
    for fs_type in ['0b', '1b', '1s', '1b+1s', '2b']:
        tdf = df[(df.reg==reg) & (df.multiplicty==fs_type)]
        tdf = tdf.sort_values("mass")
        err = (tdf[50.0]-tdf[16.0])*1000/lumi_dict['Run 2']
        
        print(fs_type, tdf[50.0]*1000/lumi_dict['Run 2'])
        ax.errorbar(tdf.mass, tdf[50.0]*1000/lumi_dict['Run 2'], yerr=err, label=fs_type)
    ax.legend()
    fig.savefig('{}/gen_b_s/{}_limit.pdf'.format(output, reg))
    

In [None]:
tdf = df[df.nJets=='SRX']

In [None]:
import numpy as np

In [None]:
def array_to_indicies(arr):
    unique_values = np.unique(arr)
    index_dict = {val:i for i, val in enumerate(unique_values)}
    return [index_dict[val] for val in arr]

In [None]:
x_val, y_val, z  = tdf.mass, tdf.dbs, tdf[50.0].values
x, y = array_to_indicies(x_val), array_to_indicies(y_val)
image = 0* np.empty( (np.unique(y).shape[0], np.unique(x).shape[0]) )
image[y,x] = z

In [None]:
fig, ax = plt.subplots()
im_shows = ax.imshow(image, aspect='auto', extent=[x_val.min(),x_val.max(),y_val.min(),y_val.max()])
fig.colorbar(im_shows, ax=ax)
ax.set_xlabel("Z' Mass [GeV]")
ax.set_ylabel("$\delta_{bs}$")
ax.set_title('50% expected limits: counts')

In [None]:
plt.plot(*list(zip(*sorted(zip(tdf.mass, tdf[50.0].values), key=lambda x: x[0]))))

In [None]:
df.to_csv('{}/combine_data/{}_count_limits.csv'.format(output_dir, era))

In [None]:
# don't run past
break

In [None]:
import matplotlib.pyplot as plt

In [None]:
import uproot as upr

In [None]:
upf = upr.open('/afs/cern.ch/work/r/rymuelle/public/nanoAODzPrime/CMSSW_12_1_0/src/bff_plotting_code_v3/assets_nov_2022/combine_data/2017/2017_shapes_df_input.root')

In [None]:
upf.keys()

In [None]:
plt.plot(upf['SR1-sys_0_nominal-background'].values())
plt.plot(upf['SR1-sys_0_jer_2017_Up-sig202'].values()*100)


In [None]:
plt.plot(upf['SR1-sys_0_nominal-background'].values())
plt.plot(upf['SR1-sys_0_jer_2016_Up-sig202'].values()*100)
plt.plot(upf['SR1-sys_0_nominal-sig202'].values()*100)

plt.plot(upf['SR1-sys_0_jer_2016_Up-sig195'].values()*100)
plt.plot(upf['SR1-sys_0_nominal-sig202'].values()*100)