In [None]:
from __future__ import print_function
import glob
import re
import subprocess
import pandas as pd
import numpy as np

In [None]:
def make_file_df(pattern='201[0-9].*txt'):
    '''This function produces a dataframe of all the cfg files that we could run combine over.'''
    file_list = [f for f in glob.glob('combine_data/*') if len(re.findall(pattern,f)) > 0]
    cfg_list =[]
    for i, f in enumerate(file_list):
        nJets = re.findall(r'SR.', f)[0]
        mass = (re.findall(r'M_(\d+)_', f)[0])
        sig_type = (re.findall(r'Mu_([a-z]+)_M_', f)[0])
        dbs = re.findall(r'dbs(\d)p(\d+)', f)[0]
        dbs = ('{}.{}'.format(*dbs))
        era = (re.findall(r'(201.)',f)[0])
        cfg_list.append({"file":f, "nJets":nJets, "mass":mass, "dbs":dbs, "era": era, "sig_type": sig_type})
    return pd.DataFrame(cfg_list)
   
def make_combine_df(_df, _name):
    '''This function takes a dataframe of configs and produces a single datacard with those cards.'''
    files = _df.file.to_numpy()
    file_string = "combineCards.py"
    for count, f in enumerate(files):
        file_string += " name{}={}".format(count, f)
    p = subprocess.Popen(file_string.split(' '), stdout=subprocess.PIPE, 
                                   stderr=subprocess.PIPE)
    out, err = p.communicate()
    with open(_name, 'wb') as f:
        f.write(out)

df = make_file_df()

In [None]:
#setting some params for which era and stuff to run on
era = '2016'
run_hist_test = False

In [None]:
filename_template = 'combine_data/{}_{}_BFFZprimeToMuMu_{}_M_{}_dbs{}.txt'

In [None]:
#For this year, produce SR1 and SR2 combined cards. New retion is called SRX
mass_dbs_nJets = df[['mass', 'dbs', 'nJets', 'sig_type']].to_numpy().astype(str)
for (mass, dbs, _, sig_type) in np.unique(mass_dbs_nJets, axis=0):
    #select for era, dbs, mass, sig type (fit or hist)
    df_temp = df[(df.era==era) & (df.dbs==dbs) & (df.mass==mass)  & (df.sig_type==sig_type) & (df.nJets!="SRX")]
    #make combined config
    fname = filename_template.format(era, 'SRX', sig_type, mass, str(dbs).replace('.', 'p'))
    make_combine_df(df_temp, fname)

In [None]:
#Produce combined limits for a mass, dbs point for all eras.
mass_dbs_nJets = df[['mass', 'dbs', 'nJets', 'sig_type']].to_numpy().astype(str)
for (mass, dbs, _, sig_type) in np.unique(mass_dbs_nJets, axis=0):
    df_temp = df[(df.dbs==dbs) & (df.mass==mass)  & (df.sig_type==sig_type) & (df.nJets!="SRX")]
    # three years, two regions. If something is wrong, do not run to avoid confusion
    if df_temp.shape[0] != 3*2: continue
    fname = filename_template.format("201X", 'SRX', sig_type, mass, str(dbs).replace('.', 'p'))
    make_combine_df(df_temp, fname)

In [None]:
#get all files, including new created ones
df = make_file_df(pattern='201.*txt')

df = df[df.era==era]
#remove signals that are marked hist type, these are for testing purposes only
if not run_hist_test: df = df[df.sig_type=="fit"]

In [None]:
#compute limits
# We do this using subprocess to run a command line command. Is there a python api?
#This is the command to run, change it as needed
combine_command_template = "combine -M AsymptoticLimits {} --run blind"
#This will save the results
limit_list = []
for i, (f, mass, dbs, nJets, era, sig_type) in enumerate(df[['file', 'mass', 'dbs', 'nJets', 'era', 'sig_type']].to_numpy()):
    print(f, mass, dbs, nJets, era)
    command = combine_command_template.format(f)
    print(command)
    print('file: {}, {} out of {}'.format(f, i, df.shape[0]))
    p = subprocess.Popen(command.split(' '), stdout=subprocess.PIPE, 
                                   stderr=subprocess.PIPE)
    out, err = p.communicate()
    print(command)
    print(out.decode('utf-8')) 
    #find and save results
    limits = re.findall(r'Expected +(\d+.\d+)%: r < (\d+.\d+)',out.decode('UTF-8'))
    lim_dict = {k:float(v) for k,v in limits}
    lim_dict['mass'] = int(mass)
    lim_dict['nJets'] = (nJets)
    lim_dict['dbs'] = float(dbs)
    lim_dict['era'] = era
    lim_dict['sig_type'] = sig_type
    limit_list.append(lim_dict)

In [None]:
lim_df = pd.DataFrame(limit_list)
lim_df = lim_df.dropna()

In [None]:
print(lim_df.sort_values(['mass', 'dbs'])[(lim_df.sig_type=='fit') & (lim_df.nJets=='SR2') ][['mass','dbs', '16.0', '50.0', '84.0']].round(3).to_latex())

In [None]:
print(lim_df.sort_values(['mass', 'dbs'])[(lim_df.sig_type=='hist') & (lim_df.nJets=='SR2') ][['mass','dbs', '16.0', '50.0', '84.0']].round(3).to_latex())

In [None]:
lim_df.to_csv('limits/limit_{}.csv'.format(era))