# Batch Run analysis script

2017-07-17 M.J.Rose

Heavily borrowed from https://github.com/tritemio/nbrun

<p class=lead>This notebook executes a [template notebook](C3030_modeling_template.ipynb) passing different arguments.
<p>

The module `nbrun.py` needs to be importable (i.e. placed in the current folder or in the python path):

In [2]:
from nbrun import run_notebook
import os, sys
import pandas as pd
from reduction_funcs import recursive_glob

In [3]:
# template name
nb_name = 'C3030_modeling_template.ipynb'
# where you keep the notebooks you generate with this script
notebook_dir = '/Users/mmcintosh/Dropbox/ASTRON2017/C3030/scripts/model_nbs'
# where you want to save key pngs for quick reference
image_dir = "/Users/mmcintosh/Dropbox/ASTRON2017/C3030/model_plots/"
# where you want to save files with evidence values for quick ref
model_ev_dir = "/Users/mmcintosh/Dropbox/ASTRON2017/C3030/model_ev/"

for path in [notebook_dir, image_dir, model_ev_dir]:
    if not os.path.exists(path):
        os.makedirs(path)

# print out version information
print('version information')
print(os.uname())
print(sys.version)
modules = list(set(sys.modules) & set(globals()))
for module_name in modules:
    module = sys.modules[module_name]
    print (module_name, getattr(module, '__version__', 'unknown'))
    
    
# image directories  
print('scripts saved in: {0}'.format(notebook_dir))
print('images saved in: {0}'.format(image_dir))
print('evidence values saved in: {0}'.format(model_ev_dir))

version information
posix.uname_result(sysname='Darwin', nodename='Missys-MacBook-Air.local', release='16.6.0', version='Darwin Kernel Version 16.6.0: Fri Apr 14 16:21:16 PDT 2017; root:xnu-3789.60.24~6/RELEASE_X86_64', machine='x86_64')
3.4.5 |Anaconda 2.3.0 (x86_64)| (default, Jul  2 2016, 17:47:57) 
[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]
path 0.0.0
os unknown
sys unknown
scripts saved in: /Users/mmcintosh/Dropbox/ASTRON2017/C3030/scripts/model_nbs
images saved in: /Users/mmcintosh/Dropbox/ASTRON2017/C3030/model_plots/
evidence values saved in: /Users/mmcintosh/Dropbox/ASTRON2017/C3030/model_ev/


# Running a single notebook

In [6]:
# read in list of source names and set up a new dataframe to save results to 
if not os.path.exists(model_ev_dir+'modeled_df.pkl'):
    # read in data
    sedfile = 'sed_df.pkl'
    sed_loc = os.getcwd()+"/"+sedfile
    sed_df = pd.read_pickle(sed_loc)
    sed_df.to_pickle(model_ev_dir+'modeled_df.pkl')
sed_df = pd.read_pickle(model_ev_dir+'modeled_df.pkl')
    
    
# default args
interactive = False
nwalkers = 150 
nsteps = 2000 
burnin = 500

In [7]:

# by default, the template does analysis for pks1740-649
# here, we pick out one source to reduce
source_index, name = [(i, name) for i,name in enumerate(sed_df.name.values) if '1744' in name][0]

nb_kwargs = {'source_index': source_index, 'interactive': interactive, 'nwalkers':nwalkers, 'nsteps':nsteps, 'burnin':burnin}
nb_suffix='-out_{0}'.format(name)

run_notebook(nb_name, nb_suffix=nb_suffix, nb_kwargs=nb_kwargs, hide_input=False,insert_pos=3, out_path='model_nbs/')

Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_j174425-5144.ipynb" for the traceback.


KeyboardInterrupt: 

# Looping over notebooks

In [15]:
# set up to loop over sources
iterlist_all = sed_df.index.tolist()

# select those with ATCA data
uvfmeas_paths = []
for p in ["/Volumes/mjrose/C3030/"]:
    path = p
    regex = 'uvfmeaslog*'
    uvfmeas_paths.extend(recursive_glob(path, regex))

missing_uv = []
for i in sed_df.index.tolist():
    name = sed_df.loc[i]['name']
    if not isinstance(name, str):
        name = name[0]

    path = [s for s in uvfmeas_paths if name in s]
    if path == []:
        path = [s for s in uvfmeas_paths if name.split('pks')[-1] in s]
    if path == []:
        path = [s for s in uvfmeas_paths if name.split('mwacsj')[-1] in s]
    if path == []:
        path = [s for s in uvfmeas_paths if name.split('mwacsj')[-1].split('-')[0] in s]
    if path == []:
        missing_uv.append(i)
        
iterlist_ATCAonly = [i for i in iterlist_all if i not in missing_uv]

In [16]:
# loop
overwrite = True
# default args
interactive = False
nwalkers = 50 
nsteps = 2000 
burnin = 1000

err_arr = []
done_arr = []
for i in iterlist_all: #iterlist_ATCAonly:
   
    if not sed_df.loc[i].best_model == '' and overwrite == False:
        continue 
    name = sed_df.loc[i]['name']
    nb_kwargs = {'source_index': i, 'interactive': interactive, 'nwalkers':nwalkers, 'nsteps':nsteps, 'burnin':burnin}
    nb_suffix='-out_{0}'.format(name)
    
    try:
        run_notebook(nb_name, nb_suffix=nb_suffix, nb_kwargs=nb_kwargs, hide_input=False,insert_pos=3, out_path='model_nbs/')
        done_arr.append(name)
    except:
        err_arr.append(name)

Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_j212155-6404.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_2311-452.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_mwacsj2352.4-3316.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_4c-06.76.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_0743-673.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_j001052-4153.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_j021622-3009.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_j044737-2203.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_j014922+0555.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_j033626+1302.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_pksb1042-269.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_j083639-2016.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_4c-00.45.ipynb" for the traceback.


In [17]:
print('reduced: ', len(done_arr))
print('errors on: ', err_arr)

reduced:  ['j231326-5343', 'j213801-4646', 'j221438-3835', 'pks2254-367', 'j221942-2756', 'j212921-1821', 'j225053-1204', 'pks2121-01', 'j031610-6820', 'j040820-6545', '0022-423', 'j011006-3651', 'pks0405-395', '0023-263', 'pks0439-337', 'pks0359-294', 'j011651-2052', 'j024008-2309', 'j040906-1757', 'pks0454-088', 'j012528-0005', 'j033931-0146', 'j042408+0204', '1245-197', '1015-314', 'pks1347-218', '0834-196', 'j103152-222823', 'pks1503-091', 'j124025-1717', '0859-140', 'pks0941-080', 'pks1200+045', '4c+12.50', 'pks0910+151', '1549-790', 'j194025-6907', 'j202100-6124', 'j185841-6313', 'pks1814-637', 'j172650-5529', 'pks1953-42', 'j145706-3604', 'pks1444-339', 'j145402-3400', 'pks1601-222', 'pks1545-120', 'pks2127+04', 'pks1543+005', '4c+02.43', 'j210700-6547', 'j233612-5236', 'j231255-4126', 'mwacsj2153.5-3445', '2135-209', 'j231117-1038', 'j015955-7430', 'j000253-5621', 'j041022-5232', 'j044033-4229', 'j010837-2851', 'j003830-2119', 'pks0207-224', 'pks0105-122', '4c+00.02', 'pks0500+

In [29]:
for err in err_arr: #iterlist_ATCAonly:
    i = [ind for ind,s in temp if err in s][0]
    name = sed_df.loc[i]['name']
    nb_kwargs = {'source_index': i, 'interactive': interactive, 'nwalkers':nwalkers, 'nsteps':nsteps, 'burnin':burnin}
    nb_suffix='-out_{0}'.format(name)
    
    try:
        run_notebook(nb_name, nb_suffix=nb_suffix, nb_kwargs=nb_kwargs, hide_input=False,insert_pos=3, out_path='model_nbs/')
        done_arr.append(name)
    except:
        err_arr.append(name)

Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_4c-06.76.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_0743-673.ipynb" for the traceback.


Error executing the notebook "C3030_modeling_template.ipynb".

See notebook "model_nbs/C3030_modeling_template-out_4c-00.45.ipynb" for the traceback.


TypeError: 'in <string>' requires string as left operand, not Series

# Playing with error bars

In [14]:
# model with 3,5, and 10% errors on ATCA data to see if models differ significantly
for ATCA_err in [0.03,0.05,0.1]:
    errname = str(ATCA_err).replace('.','-')
    nb_kwargs = {'ATCA_err': ATCA_err}
    nb_suffix='-out_{0}'.format(errname)

    nb_name = 'combine_surveys.ipynb'
    run_notebook(nb_name, nb_suffix=nb_suffix, nb_kwargs=nb_kwargs, hide_input=False,insert_pos=3, out_path='model_nbs/')

    sedfile = 'sed_df.pkl'
    sed_loc = os.getcwd()+"/"+sedfile
    sed_df = pd.read_pickle(sed_loc)
    sed_df.to_pickle(model_ev_dir+'modeled_df.pkl')

    source_index, name = [(i, name) for i,name in enumerate(sed_df.name.values) if '1744' in name][0]
    # default args
    interactive = False
    nwalkers = 50 
    nsteps = 1000 
    burnin = 500

    nb_name = 'C3030_modeling_template.ipynb'
    nb_kwargs = {'source_index': source_index, 'interactive': interactive, 'nwalkers':nwalkers, 'nsteps':nsteps, 'burnin':burnin}
    nb_suffix='-out_{0}_at{1}'.format(name, errname)

    run_notebook(nb_name, nb_suffix=nb_suffix, nb_kwargs=nb_kwargs, hide_input=False,insert_pos=3, out_path='model_nbs/')

In [None]:
# we concluded that 0.05 was good