In [None]:
"hi"

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import pickle as pkl
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from src.plotting_tools.Bins import bins
from ROOT import TH1F
import array
import subprocess
import uncertainties as unc
from uncertainties import unumpy

In [None]:
from src.plotting_tools.utils import calc_bin_widths, calc_bin_centers
from src.plotting_tools.SysHist import SysHist
from src.data_tools.StackPlotter import get_stack_plotter
from src.plotting_tools.utils import rebin_np
from src.plotting_tools.cms_format import cms_format_fig, cms_style
from src.plotting_tools.Bins import Bins
from src.assets.lumi import lumi_dict

In [None]:
cms_style()

In [None]:
def make_hist(values, errors, bin_edges, *args):
    hpx    = TH1F(*args, len(bin_edges)-1, array.array('d', bin_edges))
    for i, (x,e) in enumerate(zip(values,errors)):
        hpx.SetBinContent(i, x) 
        hpx.SetBinError(i, e) 
    return hpx


In [None]:
def add_overflow(arr, top=0, bottom=0):
    return   np.concatenate([[bottom],arr,[top]])

In [None]:
# http://cms-analysis.github.io/HiggsAnalysis-CombinedLimit/part2/settinguptheanalysis/
# https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit/blob/102x/data/tutorials/shapes/simple-shapes-df_input.csv

In [None]:
from src.assets.output_dir import output_dir
outdir = output_dir
era = '2016'
lumi_fraction = lumi_dict[str(era)]/lumi_dict['201X']
#rootfname = '{outdir}/combine_data/{era}/{era}_shapes_df_input.root'.format(outdir=outdir, era=era)

In [None]:
if era=='2016':
    lumi=1.025
    uncorr = 1.01
    corr_1 = 1.006
    corr_2 = 1.0
    lumi_str = f'''
lumi_uncorr_{era} lnN -      {uncorr}
lumi_corr1 lnN -      {corr_1}
'''
if era=='2017':
    lumi=1.023
    uncorr = 1.02
    corr_1 = 1.009
    corr_2 = 1.006
    lumi_str = f'''
lumi_uncorr_{era} lnN -      {uncorr}
lumi_corr1 lnN -      {corr_1}
lumi_corr2  lnN -      {corr_2}
'''
if era=='2018':
    lumi=1.025
    uncorr = 1.015
    corr_1 = 1.02
    corr_2 = 1.002
    lumi_str = f'''
lumi_uncorr_{era} lnN -      {uncorr}
lumi_corr1 lnN -      {corr_1}
lumi_corr2  lnN -      {corr_2}
'''
print(lumi_str)

In [None]:
sp = get_stack_plotter(outdir, era)
data_dict = {}
data_dict['SR1'] = sp.make_data_hist('DiLepMass','SR1', blinded=False)
data_dict['SR2'] = sp.make_data_hist('DiLepMass','SR2', blinded=False)

In [None]:
with open('{}/data/{}_bff_interp_dbs_norm.pkl'.format(outdir, era), 'rb') as f:
    data = pkl.load(f)

In [None]:
outname="{}/abcd/abcd_dict_data_{}_ismc0.pkl".format(outdir, era)
with open(outname,'rb') as f:
    abcd = pkl.load(f)
    
outname="{}/abcd/ABCD_closure_unc.pkl".format(outdir, era)
with open(outname, 'rb') as f:
    uncertainty_dict= pkl.load(f)
    
    
    print(pd.DataFrame(uncertainty_dict))
uncertainty_dict = {k:v+1 for k,v in uncertainty_dict[int(era)].items()}

In [None]:
accpt_df = pd.read_csv('/eos/cms/store/group/phys_exotica/bffZprime/assets_june_23'+"/data_gen_b_s/summary_df.csv")
accpt_df

isrfsr = abs((accpt_df['Weight_ISRFSR_Up']-accpt_df['Weight_ISRFSR_Down']))/(accpt_df['acceptance']*2)

pdf = abs(accpt_df['Weight_PDF_Up']-accpt_df['Weight_PDF_Down'])/(accpt_df['acceptance']*2)

min(isrfsr), max(isrfsr), np.mean(isrfsr), min(pdf), max(pdf), np.mean(pdf)


In [None]:
template = '''#higgs combine tool shape analysis card for z'to mumu 1 jet
#https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit/blob/102x/data/tutorials/shapes/simple-shapes-df.txt
-------------------------

imax 1  number of channels                                      #1 Jet
jmax 1  number of backgrounds -1                                    #following AN2015_207_v5, not sure why the -1 is there?
kmax *  number of nuisance parameters (sources of systematic uncertainties)

-------------------------

bin       {reg}_{era}_{binCount}
observation   {obs}

-------------------------

bin       {reg}_{era}_{binCount}      {reg}_{era}_{binCount}
process     ABCD_{reg}_{era}    sig_{reg}_{era}_{mass}_{dbs}
process     1     -1
rate      {abcd_count}   {sig_count}

-------------------------
back_fit_{era}_{binCount} lnN {back_fit}    -    
Closure_{era}_{binCount} lnN  {back_closure}   -  
jer_{era}   lnN -      {jer}
jes_{era}   lnN -      {jes}
roch_{era}   lnN -      {roch}
HEM_{era}   lnN -      {HEM}
btagCorr   lnN -      {btagCorr}
btagUncorr_{era}   lnN -      {btagUncorr}
elSF_{era}   lnN -      {el}
ISRFSR_{era}   lnN -      {ISRFSR}
Muon_{era}   lnN -      {Muon}
trigger_{era}   lnN -      {trigger}
pdf_{era}   lnN -      {pdf}
puid_{era}   lnN -      {puid}
pu   lnN -      {pu}'''
template += lumi_str
print(template)

In [None]:
def get_sig_bin(reg, mass, dbs, sys, nBin):
    tdf =  data[(data.reg==reg) & (data.mass==mass) & (data.dbs==dbs) & (data.sys==sys)]
    assert tdf.shape[0]==1, "more than length one"
    row =tdf.iloc[0]
    x, y = row.x, row.y
    y_prime = rebin_np(x, bin_edges, y) 
    return y_prime[nBin]

def get_sig_bins(reg, mass, dbs, sys, nBin, nom):
    nom = nom+.001
    down = get_sig_bin(reg, mass, dbs, sys.format("Down"), nBin)
    up = get_sig_bin(reg, mass, dbs, sys.format("Up"), nBin)
    x = [down/nom, up/nom]
    if x == [0,0]: x = [1,1]
    if (x[0]==x[1]): string = "{:.2f}".format(x[0])
    else: string =  "{:.2f}/{:.2f}".format(*x)
    if string == "1.00/1.00": string = "1.00"
    return string

def get_norm_bin(reg, mass, dbs, sys, norm):
    tdf =  data[(data.reg==reg) & (data.mass==mass) & (data.dbs==dbs) & (data.sys==sys)]
    assert tdf.shape[0]==1, "more than length one"
    row =tdf.iloc[0]
    return norm/row.y.sum()

In [None]:
def make_bin_card(reg, mass, dbs, nBin, norm, stat_err, ISRFSR="0.97/1.03", pdf="1.0", verbose=False):
    bin_edges = abcd[reg]['bins']
    nom = abcd[reg]['nom'][nBin]
    std = abcd[reg]['std'][nBin]
    target = Bins(bin_edges).calc_bin_centers()[nBin]
    #use new get value function to ensure matching data value
    obs = data_dict[reg].get_value_at(target)[0]
    if verbose: print(obs, nom)
    #blinded
    obs = obs
    #signal 
    sig_nom = get_sig_bin(reg, mass, dbs, 'nom', nBin)
    jes = get_sig_bins(reg, mass, dbs, 'Reg_jet_jesTotal{}_muon_corrected_pt_ele_pt', nBin, sig_nom)
    roch = get_sig_bins(reg, mass, dbs, 'Reg_jet_nom_muon_corrected{}_pt_ele_pt', nBin, sig_nom)
    jer = get_sig_bins(reg, mass, dbs, 'Reg_jet_jer{}_muon_corrected_pt_ele_pt', nBin, sig_nom)
    pu = get_sig_bins(reg, mass, dbs, 'Weight_Pu{}', nBin, sig_nom)
    btagCorr = get_sig_bins(reg, mass, dbs, 'Weight_BTagCorr{}', nBin, sig_nom)
    btagUncorr = get_sig_bins(reg, mass, dbs, 'Weight_BTagUncorr{}', nBin, sig_nom)
    puid = get_sig_bins(reg, mass, dbs, 'Weight_PUID{}', nBin, sig_nom)
    #pdf = get_sig_bins(reg, mass, dbs, 'Weight_PDF_{}', nBin, sig_nom)\
    pdf = pdf
    #fixed 2% width
    ISRFSR  = ISRFSR #= get_sig_bins(reg, mass, dbs, 'Weight_ISRFSR_{}', nBin, sig_nom)
    muon = get_sig_bins(reg, mass, dbs, 'Weight_MuonSF{}', nBin, sig_nom)
    el = get_sig_bins(reg, mass, dbs, 'Weight_ElectronSF{}', nBin, sig_nom)
    trigger = get_sig_bins(reg, mass, dbs, 'Weight_MuonTrigger{}', nBin, sig_nom)
    try:
        HEM = get_sig_bins(reg, mass, dbs, "Reg_jet_jesHEMIssue{}_muon_corrected_pt_ele_pt", nBin, sig_nom)
    except:
        HEM = "1.00"
        
    #scale sig_nom 
    norm_factor = get_norm_bin(reg, mass, dbs, 'nom', norm)
    sig_nom = sig_nom*norm_factor
    
    value_dict = {
                "era": era,
                "reg": reg,
                "binCount": nBin,
                "lumi": lumi, 
                "mass": mass, 
                "dbs": dbs,
                "obs": "{:.2f}".format(obs),
        
                "abcd_count": "{:.2f}".format(nom),
                "back_fit": "{:.2f}".format((nom+std)/nom),
                "back_closure": "{:.2f}".format(uncertainty_dict[reg]),
        
                "sig_count": "{:.2f}".format(sig_nom),
                "jer": jer,
                "jes": jes,
                "roch": roch,
                "HEM": HEM,
                "btagCorr": btagCorr,
                "btagUncorr": btagUncorr,
                "el": el,
                "ISRFSR": ISRFSR,
                "Muon": muon,
                "trigger": trigger,
                "pdf": pdf,
                "puid": puid,
                "pu": pu,
                "stat": "{:.2f}".format(1+stat_err/norm),
                 }
    return template.format(**value_dict), sig_nom

In [None]:
#path = '/afs/cern.ch/work/r/rymuelle/public/nanoAODzPrime/CMSSW_12_1_0/src/bff_plotting_code_v3/exo-datacards/EXO-22-006/combine_data/model_ind'
path = '{}/combine_data/model_ind'.format(outdir)
path

In [None]:
import os

In [None]:
data.mass.unique()

In [None]:
bin_centers = calc_bin_centers(abcd['SR1']['bins'])
reg, mass, dbs, fs_type = 'SR1', 250, 0.04, '1b'
bin_edges = abcd[reg]['bins']

def make_card(reg, mass, dbs, fs_type):
    combine_str = 'combineCards.py '
    
    signal = '{}_{}_{}_{}'.format(reg, mass, str(dbs).replace('.', 'p'), fs_type.replace('(','-').replace(')','-'))
    path_prime = '{}/{}/{}'.format(path, era, signal)

    acceptance_row = accpt_df[(accpt_df.reg==reg) & (accpt_df.mass==mass) & (accpt_df.type==fs_type)]
    assert acceptance_row.shape[0] == 1, acceptance_row
    acceptance = acceptance_row.iloc[0]['acceptance']
    stat_error = acceptance_row.iloc[0]['statistical']
    norm = acceptance*1000*lumi_fraction
    isrfsr = get_sys(acceptance_row.iloc[0], "Weight_ISRFSR")
    pdf = get_sys(acceptance_row.iloc[0], "Weight_PDF")
    print(isrfsr, pdf)
    os.makedirs(path_prime, exist_ok=True)
    for i in range(len(bin_centers)):
        # don't count 0 sig bins for speed
        sig_count = get_sig_bin(reg, mass, dbs, 'nom', i)
        if sig_count==0: continue
        template_filled, sig_nom = make_bin_card(reg, mass, dbs, i, norm, stat_error, ISRFSR=isrfsr, pdf=pdf)
        with open('{}/bin_{}.txt'.format(path_prime, i), 'w') as f:
            f.write(template_filled)
            combine_str+= ' Name{}={}/bin_{}.txt'.format(i,signal, i)
    combine_str += ' > datacard_{}.txt\n'.format(signal)
    return combine_str

In [None]:


def get_sys(row, string):
    syses = sorted([1+row[string+"_Up"]/row['acceptance'], 1+row[string+"_Down"]/row['acceptance']])

    syses_string = ["{:.2f}".format(sys) for sys in syses]
    if syses_string[0]=="1.00" and syses_string[1]=="1.00": syses_string="1.00"
    else: syses_string = "{}/{}".format(*syses_string)
    return syses_string

In [None]:
combine_sh = '#!bin/bash\n'
dbs = 0.5
for fs_type in accpt_df.type.unique():
    for mass in [125, 150, 175, 200, 250, 300, 350]:
        print(fs_type, mass)
        for reg in ['SR1', 'SR2']:
            #print(fs_type, mass, reg)
            combine_sh += make_card(reg, mass, dbs, fs_type)



In [None]:
with open('{}/{}/make_combine.sh'.format(path, era), 'w') as f:
    f.write(combine_sh)


In [None]:
for fs_type in accpt_df.type.unique():
    for mass in [125, 150, 175, 200, 250, 300, 350]:
        combine_sh_all_years ='combineCards.py '
        i = 0
        for era in [2016, 2017, 2018]:
            for reg in ['SR1', 'SR2']:
                combine_sh_all_years += " Name{i}={era}/datacard_{reg}_{mass}_0p5_{fs_type}.txt".format(i=i, era=era, reg=reg,mass=mass, 
                                                                                                        fs_type=fs_type.replace('(','-').replace(')','-'))
                i+=1
        combine_sh_all_years += ' > 201X/datacard_{}_0p5_{}.txt'.format(mass, fs_type.replace('(','-').replace(')','-'))
        print(combine_sh_all_years)
    
for fs_type in accpt_df.type.unique():
    for mass in [125, 150, 175, 200, 250, 300, 350]:
        combine_sh_all_years ='combineCards.py '
        i = 0
        for era in [2016, 2017, 2018]:
            for reg in ['SR1']:
                combine_sh_all_years += " Name{i}={era}/datacard_{reg}_{mass}_0p5_{fs_type}.txt".format(i=i, era=era, reg=reg,mass=mass, 
                                                                                                        fs_type=fs_type.replace('(','-').replace(')','-'))
                i+=1
        combine_sh_all_years += ' > 201X/datacard_SR1_{}_0p5_{}.txt'.format(mass, fs_type.replace('(','-').replace(')','-'))
        print(combine_sh_all_years)
    
for fs_type in accpt_df.type.unique():
    for mass in [125, 150, 175, 200, 250, 300, 350]:
        combine_sh_all_years ='combineCards.py '
        i = 0
        for era in [2016, 2017, 2018]:
            for reg in ['SR2']:
                combine_sh_all_years += " Name{i}={era}/datacard_{reg}_{mass}_0p5_{fs_type}.txt".format(i=i, era=era, reg=reg,mass=mass, 
                                                                                                        fs_type=fs_type.replace('(','-').replace(')','-'))
                i+=1
        combine_sh_all_years += ' > 201X/datacard_SR2_{}_0p5_{}.txt'.format(mass, fs_type.replace('(','-').replace(')','-'))
        print(combine_sh_all_years)  

In [None]:
# era combine regions
for fs_type in accpt_df.type.unique():
    fs_string = fs_type.replace('(','-').replace(')','-')
    for dbs in [0.5]:
        dbsstring = str(dbs).replace('.', 'p')
        for mass in [125, 150, 175, 200, 250, 300, 350]:
            for era in [2016, 2017, 2018]:
                combine_sh ='combineCards.py '
                i = 0
                for reg in ['SR1', 'SR2']:
                    combine_sh += f" Name{i}={era}/datacard_{reg}_{mass}_0p5_{fs_string}.txt"
                    i+=1
                combine_sh += f' > {era}/datacard_{mass}_0p5_{fs_string}.txt'
                print(combine_sh)

In [None]:
run_combine = '''#!/bin/sh
#ulimit -s unlimited
#set -e
cd /afs/cern.ch/work/r/rymuelle/public/nanoAODzPrime/higgscombine/CMSSW_10_2_13/src
export SCRAM_ARCH=slc7_amd64_gcc700
source /cvmfs/cms.cern.ch/cmsset_default.sh
eval `scramv1 runtime -sh`
cd {path} 

combine -M AsymptoticLimits "$1"

'''.format(path=path)

In [None]:
!cp src/combine_scripts/* {path}

In [None]:
with open('{}/run_combine.sh'.format(path), 'w') as f:
    f.write(run_combine)

In [None]:
!mkdir {path}/out
!mkdir {path}/err
!mkdir {path}/log

!mkdir {path}/out/2016
!mkdir {path}/err/2016
!mkdir {path}/log/2016

!mkdir {path}/out/2017
!mkdir {path}/err/2017
!mkdir {path}/log/2017


!mkdir {path}/out/2018
!mkdir {path}/err/2018
!mkdir {path}/log/2018


!mkdir {path}/out/201X
!mkdir {path}/err/201X
!mkdir {path}/log/201X

In [None]:
condor_submit submit_jobs_201X.sub
condor_submit submit_jobs_2016.sub
condor_submit submit_jobs_2017.sub
condor_submit submit_jobs_2018.sub
watch condor_q