In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
import pandas as pd
import os
from run_nano.sample_list import sample_df
import numpy as np

In [4]:
era_dict = {
            "deepcsv": {
                2016:.6321,
                2017:.4941,
                2018:.4184,
            },
           "deepflavour": {
                2016:.3093,
                2017:.3033,
                2018:.2770,
            },
        }
trigger_dict = {
    2016: ['HLT_Mu50','HLT_TkMu50', 'HLT_DoubleEle33_CaloIdL_MW', 'HLT_DoubleEle33_CaloIdL_GsfTrkIdVL'],
    2017: ['HLT_Mu50','HLT_OldMu100','HLT_TkMu100', 'HLT_DoubleEle33_CaloIdL_MW', 'HLT_DoubleEle25_CaloIdL_MW'],
    2018: ['HLT_Mu50','HLT_OldMu100','HLT_TkMu100', 'HLT_DoubleEle25_CaloIdL_MW'],
}

conditions = [
#MC conditions
{
    'era':2016,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff',
    'isMC':True,
    'infile':'$1',
    'json': 'Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt',
},
{
    'era':2017,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff',
    'isMC':True,
    'infile':'$1',
    'json': 'Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt',
},
{
    'era':2018,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff',
    'isMC':True,
    'infile':'$1',
    'json': 'Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt',
},
#beff
#MC conditions
{
    'era':2016,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff_eff',
    'isMC':True,
    'infile':'$1',
    'json': 'Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt',
},
{
    'era':2017,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff_eff',
    'isMC':True,
    'infile':'$1',
    'json': 'Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt',
},
{
    'era':2018,
    'runPeriod':'',
    'btag_type':'deepflavour',
    'selector':'bff_eff',
    'isMC':True,
    'infile':'$1',
    'json': 'Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt',
},




]

#set btaggingWP
for i, cond in enumerate(conditions):
    era = cond['era']
    btag_type = cond['btag_type']
    selector = cond['selector']
    #get correct wp
    btagWP = era_dict[btag_type][era]
    #get correct triggers
    triggers = trigger_dict[era]
    #set up keep and drop for eff:
    if 'bff_eff' in selector:
        keep_and_drop = 'keep_and_drop_bff_eff.txt'
    else:
        keep_and_drop = 'keep_and_drop_bff.txt'
    #set correct values
    conditions[i]['btagWP'] = btagWP
    conditions[i]['triggers'] = triggers
    conditions[i]['keep_and_drop'] = keep_and_drop


In [5]:
bash_script = '''
#this is not mean to be run locally
#
echo Check if TTY
if [ "`tty`" != "not a tty" ]; then
  echo "YOU SHOULD NOT RUN THIS IN INTERACTIVE, IT DELETES YOUR LOCAL FILES"
else

echo "ENV..................................."
env 
echo "VOMS"
voms-proxy-info -all
echo "CMSSW BASE, python path, pwd"
echo $CMSSW_BASE 
echo $PYTHON_PATH
echo $PWD 
rm -rf $CMSSW_BASE/lib/
rm -rf $CMSSW_BASE/src/
rm -rf $CMSSW_BASE/module/
rm -rf $CMSSW_BASE/python/
mv lib $CMSSW_BASE/lib
mv src $CMSSW_BASE/src
mv module $CMSSW_BASE/module
mv python $CMSSW_BASE/python

echo Found Proxy in: $X509_USER_PROXY
{command}
fi
'''

In [6]:
processor_run_string = """python run_processor.py {infile} {era} '{runPeriod}' "{triggers}" {btagWP} {btag_type} {selector} {keep_and_drop} --isMC={isMC} --crab"""

In [7]:
#produce a crab_script for each condition group
for i, cond in enumerate(conditions):
    bash_script_keys = {"command": processor_run_string.format(**cond)}
    name = 'bash_scripts/bash_script_{era}_{selector}_{btag_type}_isMC_{isMC}.sh'.format(**cond)
    script_text = bash_script.format(**bash_script_keys)
    conditions[i]['bash_script'] = name
    with open(name, 'w') as f:
        f.write(script_text)

In [8]:
with open('run_nano/files.list', 'r') as f:
    files = [line.replace('\n', '') for line in f.readlines()]
with open('run_nano/y3_m1000_private.list', 'r') as f:
    y3_m1000_private = [line.replace('\n', '') for line in f.readlines()]
with open('run_nano/y3_m400_private.list', 'r') as f:
    y3_m400_private = [line.replace('\n', '') for line in f.readlines()]
with open('run_nano/y3_m700_private.list', 'r') as f:
    y3_m700_private = [line.replace('\n', '') for line in f.readlines()]
    
with open('run_nano/y3_m250_private.list', 'r') as f:
    y3_m700_private = [line.replace('\n', '') for line in f.readlines()]
files+=y3_m1000_private
files+=y3_m700_private
files+=y3_m400_private

In [9]:
categories = list(map(lambda x: x.split('/')[-2], files))
df = pd.DataFrame([{'name': cat, 'file': f} for cat, f in zip(categories, files)])

In [10]:
y3_m1000_private[0]

'/eos/cms/store/group/phys_exotica/bffZprime/private_samples/signal_y3/2016/y3_m1000'

In [11]:
crab_cfg = '''
from WMCore.Configuration import Configuration

config = Configuration()

config.section_('General')
config.General.requestName = '{requestName}'
config.General.transferLogs = False
config.General.workArea ='work_areas'
config.section_('JobType')
config.JobType.pluginName = 'Analysis'
config.JobType.psetName = 'PSet.py'
config.JobType.scriptExe = '{bash_script}'
# hadd nano will not be needed once nano tools are in cmssw
config.JobType.inputFiles = ['run_processor.py', '../scripts/haddnano.py', '{keep_and_drop}']
config.JobType.sendPythonFolder = True
config.section_('Data')
config.Data.userInputFiles = {inputFile}
config.Data.splitting = 'FileBased'
config.Data.unitsPerJob = 1
if '{json}'!='0':
    config.Data.lumiMask = '{json}'

config.Data.outLFNDirBase = '/store/group/phys_exotica/bffZprime/nanoAODskimmed/crab_{selector}/{era}'
config.Data.publication = False
config.Data.outputDatasetTag = '{datasetTag}'
config.section_('Site')
config.Site.storageSite = 'T2_CH_CERN'
# config.section_('User')
#config.User.voGroup = 'dcms'
config.JobType.allowUndistributedCMSSW = True'''

In [12]:
import re

In [13]:
def write_crab_cfg(condition, file_list, name):
    #get mass
    mass = re.findall('.*[M|u]([0-9]{3}).*', name)
    if len(mass)==1: 
        mass = mass[0]
    #get dbs
    dbs = re.findall('.*dbs_{0,1}([0-9]+p[0-9]+).*', name)
    if len(dbs)==1: dbs = dbs[0]
    else: dbs = '0p04'
        
    #get gmu
    gmu = re.findall('.*gmu_([0-9]+p[0-9]+).*', name)
    if len(gmu)==1: gmu = gmu[0]
    else: gmu=0
        
    #get dbs
    gb = re.findall('.*gb_([0-9]+p[0-9]+).*', name)
    if len(gb)==1: gb = gb[0]
    else: gb=0
        
    name = 'BFF_{}_dbs{}'.format(mass, dbs)
    if gmu: name = 'BFF_{}_dbs{}_gmu{}_gb{}'.format(mass, dbs, gmu, gb)


    rn_length = 25 
    sampleType = 'BFF'
    requestName = "{}_{}_{}_{}_{}_{}".format(condition['era'], sampleType, name, name[:rn_length], 
                                             condition['btag_type'], condition['selector'])
    outname = '{}_{}_{}'.format(name, condition['btag_type'], condition['selector'])
 
    #set json if not mc
    if condition['isMC']:
        json = 0
    else:
        json = condition['json']
    cfg_key = {
        'requestName': requestName,
        'bash_script': condition['bash_script'],
        'era': condition['era'],
        'datasetTag': outname,
        'json': json,
        'keep_and_drop': condition['keep_and_drop'],
        'selector': condition['selector'],
        'inputFile': str(file_list)
    }
    crab_cfg_name = 'crab_cfg/crab_{}.py'.format(requestName)
    with open(crab_cfg_name, 'w') as f:
        f.write(crab_cfg.format(**cfg_key))
    print('crab submit -c {} --dryrun'.format(crab_cfg_name))

In [14]:
for cond in conditions:
    era = cond['era']
    df_filtered = df[df.name.apply(lambda x: str(era) in x)]
    for unique_cat in df_filtered.name.unique():
        file_list = df[df.name==unique_cat].file.to_list()
        write_crab_cfg(cond, file_list, unique_cat)

crab submit -c crab_cfg/crab_2016_BFF_BFF_125_dbs0p04_BFF_125_dbs0p04_deepflavour_bff.py --dryrun
crab submit -c crab_cfg/crab_2016_BFF_BFF_125_dbs0p5_BFF_125_dbs0p5_deepflavour_bff.py --dryrun
crab submit -c crab_cfg/crab_2016_BFF_BFF_125_dbs1p0_BFF_125_dbs1p0_deepflavour_bff.py --dryrun
crab submit -c crab_cfg/crab_2016_BFF_BFF_150_dbs0p04_BFF_150_dbs0p04_deepflavour_bff.py --dryrun
crab submit -c crab_cfg/crab_2016_BFF_BFF_150_dbs0p5_BFF_150_dbs0p5_deepflavour_bff.py --dryrun
crab submit -c crab_cfg/crab_2016_BFF_BFF_150_dbs1p0_BFF_150_dbs1p0_deepflavour_bff.py --dryrun
crab submit -c crab_cfg/crab_2016_BFF_BFF_175_dbs0p04_BFF_175_dbs0p04_deepflavour_bff.py --dryrun
crab submit -c crab_cfg/crab_2016_BFF_BFF_175_dbs0p5_BFF_175_dbs0p5_deepflavour_bff.py --dryrun
crab submit -c crab_cfg/crab_2016_BFF_BFF_175_dbs1p0_BFF_175_dbs1p0_deepflavour_bff.py --dryrun
crab submit -c crab_cfg/crab_2016_BFF_BFF_200_dbs0p04_BFF_200_dbs0p04_deepflavour_bff.py --dryrun
crab submit -c crab_cfg/crab_201

In [15]:
!ls crab_cfg/crab_2018_BFF_BFF_175_dbs1p0_BFF_175_dbs1p0_deepflavour_bff_eff.py 

crab_cfg/crab_2018_BFF_BFF_175_dbs1p0_BFF_175_dbs1p0_deepflavour_bff_eff.py


In [20]:
crab_cfg_y3 = '''
from WMCore.Configuration import Configuration

config = Configuration()

config.section_('General')
config.General.requestName = '{requestName}'
config.General.transferLogs = False
config.General.workArea ='work_areas'
config.section_('JobType')
config.JobType.pluginName = 'Analysis'
config.JobType.psetName = 'PSet.py'
config.JobType.scriptExe = '{bash_script}'
# hadd nano will not be needed once nano tools are in cmssw
config.JobType.inputFiles = ['run_processor.py', '../scripts/haddnano.py', '{keep_and_drop}']
config.JobType.sendPythonFolder = True
config.section_('Data')
config.Data.userInputFiles = {inputFile}
config.Data.splitting = 'FileBased'
config.Data.unitsPerJob = 20
if '{json}'!='0':
    config.Data.lumiMask = '{json}'

config.Data.outLFNDirBase = '/store/group/phys_exotica/bffZprime/nanoAODskimmed/crab_{selector}/{era}'
config.Data.publication = False
config.Data.outputDatasetTag = '{datasetTag}'
config.section_('Site')
config.Site.storageSite = 'T2_CH_CERN'
# config.section_('User')
#config.User.voGroup = 'dcms'
config.JobType.allowUndistributedCMSSW = True'''

In [21]:
def write_crab_cfg_y3(condition, file_list, name):
    #get mass
    mass = re.findall('y3_m([0-9]*).*', name)[0]
    print(mass)
        
    name = 'y3_{}'.format(mass)

    rn_length = 25 
    sampleType = 'BFF'
    requestName = "{}_{}_{}_{}".format(condition['era'], name, 
                                             condition['btag_type'], condition['selector'])
    outname = '{}_{}_{}'.format(name, condition['btag_type'], condition['selector'])
    #set json if not mc
    if condition['isMC']:
        json = 0
    else:
        json = condition['json']
    cfg_key = {
        'requestName': requestName,
        'bash_script': condition['bash_script'],
        'era': condition['era'],
        'datasetTag': outname,
        'json': json,
        'keep_and_drop': condition['keep_and_drop'],
        'selector': condition['selector'],
        'inputFile': str(file_list)
    }
    crab_cfg_name = 'crab_cfg/crab_{}.py'.format(requestName)
    with open(crab_cfg_name, 'w') as f:
        f.write(crab_cfg_y3.format(**cfg_key))
    print('crab submit -c {} --dryrun'.format(crab_cfg_name))

In [22]:
#make jobs for y3 sampels
for cond in conditions:
    era = cond['era']
    if era != 2016: continue
    df_filtered  = df[df.name.str.contains('y3_')]
    #print(df_filtered)
    df_filtered = df_filtered[df_filtered.file.str.contains(str(era))]
    for name in df_filtered.name.unique():
        print(name)
        file_list = df[df.name==name].file.to_list()
        write_crab_cfg_y3(cond, file_list, name)

y3_m1000
1000
crab submit -c crab_cfg/crab_2016_y3_1000_deepflavour_bff.py --dryrun
y3_m250
250
crab submit -c crab_cfg/crab_2016_y3_250_deepflavour_bff.py --dryrun
y3_m400
400
crab submit -c crab_cfg/crab_2016_y3_400_deepflavour_bff.py --dryrun
y3_m1000
1000
crab submit -c crab_cfg/crab_2016_y3_1000_deepflavour_bff_eff.py --dryrun
y3_m250
250
crab submit -c crab_cfg/crab_2016_y3_250_deepflavour_bff_eff.py --dryrun
y3_m400
400
crab submit -c crab_cfg/crab_2016_y3_400_deepflavour_bff_eff.py --dryrun
