In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import pandas as pd
import os
from run_nano.sample_list import sample_df

In [None]:
era_dict = {
            "deepcsv": {
                2016:.6321,
                2017:.4941,
                2018:.4184,
            },
           "deepflavour": {
                2016:.3093,
                2017:.3033,
                2018:.2770,
            },
        }
trigger_dict = {
    2016: ['HLT_Mu50','HLT_TkMu50', 'HLT_DoubleEle33_CaloIdL_MW', 'HLT_DoubleEle33_CaloIdL_GsfTrkIdVL'],
    2017: ['HLT_Mu50','HLT_OldMu100','HLT_TkMu100', 'HLT_DoubleEle33_CaloIdL_MW', 'HLT_DoubleEle25_CaloIdL_MW'],
    2018: ['HLT_Mu50','HLT_OldMu100','HLT_TkMu100', 'HLT_DoubleEle25_CaloIdL_MW'],
}

conditions = [
#data conditions
{
    'era':2016,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff',
    'isMC':False,
    'infile':'$1',
    'json': 'Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt',
},
{
    'era':2017,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff',
    'isMC':False,
    'infile':'$1',
    'json': 'Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt',
},
{
    'era':2018,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff',
    'isMC':False,
    'infile':'$1',
    'json': 'Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt',
},

#MC conditions
{
    'era':2016,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff',
    'isMC':True,
    'infile':'$1',
    'json': 'Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt',
},
{
    'era':2017,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff',
    'isMC':True,
    'infile':'$1',
    'json': 'Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt',
},
{
    'era':2018,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff',
    'isMC':True,
    'infile':'$1',
    'json': 'Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt',
},
#beff
#MC conditions
{
    'era':2016,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff_eff',
    'isMC':True,
    'infile':'$1',
    'json': 'Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt',
},
{
    'era':2017,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff_eff',
    'isMC':True,
    'infile':'$1',
    'json': 'Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt',
},
{
    'era':2018,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff_eff',
    'isMC':True,
    'infile':'$1',
    'json': 'Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt',
},




]

#set btaggingWP
for i, cond in enumerate(conditions):
    era = cond['era']
    btag_type = cond['btag_type']
    selector = cond['selector']
    #get correct wp
    btagWP = era_dict[btag_type][era]
    #get correct triggers
    triggers = trigger_dict[era]
    #set up keep and drop for eff:
    if 'bff_eff' in selector:
        keep_and_drop = 'keep_and_drop_bff_eff.txt'
    else:
        keep_and_drop = 'keep_and_drop_bff.txt'
    #set correct values
    conditions[i]['btagWP'] = btagWP
    conditions[i]['triggers'] = triggers
    conditions[i]['keep_and_drop'] = keep_and_drop


In [None]:
bash_script = '''
#this is not mean to be run locally
#
echo Check if TTY
if [ "`tty`" != "not a tty" ]; then
  echo "YOU SHOULD NOT RUN THIS IN INTERACTIVE, IT DELETES YOUR LOCAL FILES"
else

echo "ENV..................................."
env 
echo "VOMS"
voms-proxy-info -all
echo "CMSSW BASE, python path, pwd"
echo $CMSSW_BASE 
echo $PYTHON_PATH
echo $PWD 
rm -rf $CMSSW_BASE/lib/
rm -rf $CMSSW_BASE/src/
rm -rf $CMSSW_BASE/module/
rm -rf $CMSSW_BASE/python/
mv lib $CMSSW_BASE/lib
mv src $CMSSW_BASE/src
mv module $CMSSW_BASE/module
mv python $CMSSW_BASE/python

echo Found Proxy in: $X509_USER_PROXY
{command}
fi
'''

In [None]:
processor_run_string = """python run_processor.py {infile} {era} '{runPeriod}' "{triggers}" {btagWP} {btag_type} {selector} {keep_and_drop} --isMC={isMC} --crab"""

In [None]:
#produce a crab_script for each condition group
for i, cond in enumerate(conditions):
    bash_script_keys = {"command": processor_run_string.format(**cond)}
    name = 'bash_scripts/bash_script_{era}_{selector}_{btag_type}_isMC_{isMC}.sh'.format(**cond)
    script_text = bash_script.format(**bash_script_keys)
    conditions[i]['bash_script'] = name
    with open(name, 'w') as f:
        f.write(script_text)

In [None]:
crab_cfg = '''
from WMCore.Configuration import Configuration

config = Configuration()

config.section_('General')
config.General.requestName = '{requestName}'
config.General.transferLogs = False
config.General.workArea ='work_areas'
config.section_('JobType')
config.JobType.pluginName = 'Analysis'
config.JobType.psetName = 'PSet.py'
config.JobType.scriptExe = '{bash_script}'
# hadd nano will not be needed once nano tools are in cmssw
config.JobType.inputFiles = ['run_processor.py', '../scripts/haddnano.py', '{keep_and_drop}']
config.JobType.sendPythonFolder = True
config.section_('Data')
config.Data.inputDataset = '{das}'
#config.Data.inputDBS = 'phys03'
config.Data.inputDBS = 'global'
config.Data.splitting = 'FileBased'
#config.Data.splitting = 'EventAwareLumiBased'
config.Data.unitsPerJob = 1
if '{json}'!='0':
    config.Data.lumiMask = '{json}'

config.Data.outLFNDirBase = '/store/group/phys_exotica/bffZprime/nanoAODskimmed/crab_{selector}/{era}'
config.Data.publication = False
config.Data.outputDatasetTag = '{datasetTag}'
config.section_('Site')
config.Site.storageSite = 'T2_CH_CERN'
# config.section_('User')
#config.User.voGroup = 'dcms'
config.JobType.allowUndistributedCMSSW = True'''

In [None]:
def write_crab_cfg(condition, row):
    #make request name
    _, samplename, metaname, datatier = row['das'].split('/')
    
    rn_length = 50 
    requestName = "{}_{}_{}_{}_{}_{}".format(row['era'], row['type'], row['name'], samplename[:rn_length], 
                                             condition['btag_type'], condition['selector'])
    outname = '{}_{}_{}'.format(row['name'], condition['btag_type'], condition['selector'])
    
    #set json if not mc
    if condition['isMC']:
        json = 0
    else:
        json = condition['json']
    cfg_key = {
        'requestName': requestName,
        'bash_script': condition['bash_script'],
        'das': row['das'],
        'era': row['era'],
        'datasetTag': outname,
        'json': json,
        'keep_and_drop': keep_and_drop,
        'selector': condition['selector']
    }
    crab_cfg_name = 'crab_cfg/crab_{}.py'.format(requestName)
    with open(crab_cfg_name, 'w') as f:
        f.write(crab_cfg.format(**cfg_key))
    print('crab submit -c {} --dryrun'.format(crab_cfg_name))

In [None]:
for cond in conditions:
    print(cond)
    pd_filter = (sample_df.era==cond['era']) & (sample_df.ismc==cond['isMC'])
    sample_df_filtered = sample_df[pd_filter]
    for i, row in sample_df_filtered.iterrows():
        print(row['name'])
        write_crab_cfg(cond, row)