# Batch Run reduction script

2017-07-17 M.J.Rose

Heavily borrowed from https://github.com/tritemio/nbrun

# Master Notebook

<p class=lead>This notebook executes a [template notebook](mirpy_reduction_template.ipynb) passing different arguments.
<p>

The module `nbrun.py` needs to be importable (i.e. placed in the current folder or in the python path):

In [1]:
from reduction_funcs import *
from nbrun import run_notebook

This notebook expects your data and blocks to be organized like so: 
* directory named after the date of observation
    * directory named 'blocks' which stores text files (.mos in image below) that list the observed sources in order of observation (including your periodic band/flux cal observations)
    * directory of raw data named 'raw'
![caption](notebook_images/data_tree.png "How this script expects your files to be organized")

Everything else is generated with this script

In [14]:
# template name
nb_name = 'mirpy_reduction_template.ipynb'
# where you keep the scripts
notebook_dir = '/Users/mmcintosh/Dropbox/ASTRON2017/C3030/scripts/reduction_nbs'
# where you want to save key pngs for quick reference
image_dir = "/Users/mmcintosh/Dropbox/ASTRON2017/C3030/reduction_plots/"


for path in [notebook_dir, image_dir]:
    if not os.path.exists(path):
        os.makedirs(path)

# print out version information
print('version information')
print(os.uname())
print(sys.version)
modules = list(set(sys.modules) & set(globals()))
for module_name in modules:
    module = sys.modules[module_name]
    print (module_name, getattr(module, '__version__', 'unknown'))
    
    
# image directories  
print('scripts saved in: {0}'.format(notebook_dir))
print('images saved in: {0}'.format(image_dir))

version information
posix.uname_result(sysname='Darwin', nodename='Missys-MacBook-Air.local', release='16.6.0', version='Darwin Kernel Version 16.6.0: Fri Apr 14 16:21:16 PDT 2017; root:xnu-3789.60.24~6/RELEASE_X86_64', machine='x86_64')
3.4.5 |Anaconda 2.3.0 (x86_64)| (default, Jul  2 2016, 17:47:57) 
[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]
glob unknown
sys unknown
psutil 2.2.1
os unknown
subprocess unknown
shutil unknown
fnmatch unknown
path 0.0.0
re 2.2.1
time unknown
scripts saved in: /Users/mmcintosh/Dropbox/ASTRON2017/C3030/scripts/reduction_nbs
images saved in: /Users/mmcintosh/Dropbox/ASTRON2017/C3030/reduction_plots/


# reduce a single source

In [15]:
datadirs = glob.glob('/Volumes/mjrose/C3030/*')
print(datadirs)

['/Volumes/mjrose/C3030/2015-04-11', '/Volumes/mjrose/C3030/2015-04-12', '/Volumes/mjrose/C3030/2016-04-05', '/Volumes/mjrose/C3030/2016-04-09']


In [16]:
datadir = datadirs[0]
# and one band
band = 'L'


# where your raw data is saved
raw_data_dir = datadir + '/raw'
# where you want to save the visibilities, other pngs, logs
processed_data_dir = datadir +'/{0}/reduced_{0}'.format(band)
#if not os.path.exists(processed_data_dir):
#	os.makedirs(processed_data_dir)
print('raw data sourced from: {0}'.format(raw_data_dir))
print('reduced data saved in: {0}'.format(processed_data_dir))


if band == 'L':
	suffix = '.2100'
	rawfiles = 'L'
	ifsel = 1 
elif band == 'C':
	suffix = '.5500'
	rawfiles ='CX'
	ifsel = 1
elif band == 'X':
	suffix = '.9000'
	rawfiles ='CX'
	ifsel = 2
    
    

raw data sourced from: /Volumes/mjrose/C3030/2015-04-11/raw
reduced data saved in: /Volumes/mjrose/C3030/2015-04-11/L/reduced_L


In [17]:
# dataframe for the source being processed to find its best flux calibrator
df_path = processed_data_dir+'/calibration_order.csv'

if not os.path.exists(df_path):

    # read in list of souces from mosaic order
    blocks = np.arange(1,11,1)
    temp_arr = []
    
    block_dir = raw_data_dir.split('/raw')[0]+"/blocks"
    # get list of blocks
    block_files = glob.glob(block_dir+"/*")
    block_files = sorted_nicely(block_files)
    
    # remove the bandflux calibrator file
    bfcal_path = [a for a in block_files if 'bandfluxcal' in a][0]
    block_files = [a for a in block_files if 'bandfluxcal' not in a]

    if len(block_files) > 1:

        for block in block_files: 
            temp_txt = np.loadtxt(block, dtype=bytes)
            temp_arr.append([t[-1].decode('UTF-8')[1:].lower() for t in temp_txt])

        # get the unique blocks if any are repeated
        unique_blocks = np.sort(np.unique(temp_arr, return_index=True)[1])
        sources = np.concatenate([temp_arr[index] for index in unique_blocks])

    else:
        temp_txt = np.loadtxt(block_files[0], dtype=bytes)
        temp_arr.append([t[-1].decode('UTF-8')[1:].lower() for t in temp_txt])
        sources = temp_arr[0]
        

    sources = [s+suffix for s in sources]
    # specify the seperate bandflux cal
    
    bandflux_cal = str(np.genfromtxt(bfcal_path, dtype=str))+suffix
    sources = np.append(bandflux_cal,sources)
    print('working with these sources: {0}'.format(sources))

    # some are repeated but they are in the order we need for phase calibration

    # init a dataframe to keep track of phasecals
    df_init = pd.DataFrame({'name' : sources, 'percent_flagged': '', 'flux' : '', 'phasecal':''})
    df_init.to_csv(df_path)
    
    
phasecal_df = pd.DataFrame.from_csv(df_path)
phasecal_df = phasecal_df.fillna('')
check_if_data_unpacked(phasecal_df,processed_data_dir,df_path)    
sources = phasecal_df["name"].values.tolist()


0 / 218
source = 1934-638.2100
phasecal = 1934-638.2100


In [37]:
# guess where you should start iterating next; assign phasecal
phasecal_df = pd.DataFrame.from_csv(df_path)
phasecal_df = phasecal_df.fillna('')
# guess where to start
h=0
# if you never got flux from your last reduction, go redo that one if redo = True
while True:         
    h = check_ifreduced(processed_data_dir, sources, h, phasecal_df, suffix)
    if h is None:
        print("all reduced")
        break
    p = find_phasecal(phasecal_df,h)
    if isinstance(p, bool) and p:
        break
    elif isinstance(p, bool) and not p:
        # then no flux for h
        h -=1
    else:
        h = p



print(h,'/',len(sources))
source = sources[h]
print('source = '+source) 
print('phasecal = '+phasecal_df.loc[h]["phasecal"])

2 / 218
source = pks1740-649.2100
phasecal = pks1934-638.2100


In [38]:
# for this source, run the .ipynb
manual_flagging = False
# if you need to change the reference antenna, which I needed for '2016'
#refant = 4
refant = 3

nb_kwargs = {'source': source, 'manual_flagging':manual_flagging, 'refant':refant, 'processed_data_dir':processed_data_dir, 'image_dir':image_dir}
nb_suffix='-out_{0}'.format(source)

run_notebook(nb_name, nb_suffix=nb_suffix, nb_kwargs=nb_kwargs, hide_input=False,insert_pos=3, out_path='reduction_nbs/')

# looping over all sources

In [None]:
# where your raw data is saved
raw_data_dir = datadir + '/raw'
# where you want to save the visibilities, other pngs, logs
processed_data_dir = datadir +'/{0}/reduced_{0}_quick'.format(band)
#if not os.path.exists(processed_data_dir):
#	os.makedirs(processed_data_dir)
print('raw data sourced from: {0}'.format(raw_data_dir))
print('reduced data saved in: {0}'.format(processed_data_dir))


if band == 'L':
	suffix = '.2100'
	rawfiles = 'L'
	ifsel = 1 
elif band == 'C':
	suffix = '.5500'
	rawfiles ='CX'
	ifsel = 1
elif band == 'X':
	suffix = '.9000'
	rawfiles ='CX'
	ifsel = 2
    
# dataframe for the source being processed to find its best flux calibrator
df_path = processed_data_dir+'/calibration_order.csv'

if not os.path.exists(df_path):

    # read in list of souces from mosaic order
    blocks = np.arange(1,11,1)
    temp_arr = []
    
    block_dir = raw_data_dir.split('/raw')[0]+"/blocks"
    # get list of blocks
    block_files = glob.glob(block_dir+"/*")
    block_files = sorted_nicely(block_files)
    
    # remove the bandflux calibrator file
    bfcal_path = [a for a in block_files if 'bandfluxcal' in a][0]
    block_files = [a for a in block_files if 'bandfluxcal' not in a]

    if len(block_files) > 1:

        for block in block_files: 
            temp_txt = np.loadtxt(block, dtype=bytes)
            temp_arr.append([t[-1].decode('UTF-8')[1:].lower() for t in temp_txt])

        # get the unique blocks if any are repeated
        unique_blocks = np.sort(np.unique(temp_arr, return_index=True)[1])
        sources = np.concatenate([temp_arr[index] for index in unique_blocks])

    else:
        temp_txt = np.loadtxt(block_files[0], dtype=bytes)
        temp_arr.append([t[-1].decode('UTF-8')[1:].lower() for t in temp_txt])
        sources = temp_arr[0]
        

    sources = [s+suffix for s in sources]
    # specify the seperate bandflux cal
    
    bandflux_cal = str(np.genfromtxt(bfcal_path, dtype=str))+suffix
    sources = np.append(bandflux_cal,sources)
    print('working with these sources: {0}'.format(sources))

    # some are repeated but they are in the order we need for phase calibration

    # init a dataframe to keep track of phasecals
    df_init = pd.DataFrame({'name' : sources, 'percent_flagged': '', 'flux' : '', 'phasecal':''})
    df_init.to_csv(df_path)
    
    
phasecal_df = pd.DataFrame.from_csv(df_path)
phasecal_df = phasecal_df.fillna('')
check_if_data_unpacked(phasecal_df,processed_data_dir,df_path)    
sources = phasecal_df["name"].values.tolist()

In [39]:
for source in sources: 
    # guess where you should start iterating next; assign phasecal
    phasecal_df = pd.DataFrame.from_csv(df_path)
    phasecal_df = phasecal_df.fillna('')
    # guess where to start
    h=0
    # if you never got flux from your last reduction, go redo that one if redo = True
    while True:         
        h = check_ifreduced(processed_data_dir, sources, h, phasecal_df, suffix)
        if h is None:
            print("all reduced")
            break
        p = find_phasecal(phasecal_df,h)
        if isinstance(p, bool) and p:
            break
        elif isinstance(p, bool) and not p:
            # then no flux for h
            h -=1
        else:
            h = p



    print(h,'/',len(sources))
    source = sources[h]
    print('source = '+source) 
    print('phasecal = '+phasecal_df.loc[h]["phasecal"])

    # for this source, run the .ipynb
    manual_flagging = False
    # if you need to change the reference antenna, which I needed for '2016'
    #refant = 4
    refant = 3

    nb_kwargs = {'source': source, 'manual_flagging':manual_flagging, 'refant':refant, 'processed_data_dir':processed_data_dir, 'image_dir':image_dir}
    nb_suffix='-out_{0}'.format(source)
    
    run_notebook(nb_name, nb_suffix=nb_suffix, nb_kwargs=nb_kwargs, hide_input=False,insert_pos=3, out_path='reduction_nbs/')


3 / 218
source = pmnj1726-6427.2100
phasecal = pks1740-649.2100


4 / 218
source = pks1814-637.2100
phasecal = pmnj1726-6427.2100


5 / 218
source = mwacsj2153.5.2100
phasecal = pks1814-637.2100


6 / 218
source = pks2149-28.2100
phasecal = pks1814-637.2100


7 / 218
source = pks2135-209.2100
phasecal = pks2149-28.2100


8 / 218
source = mwacsj2157.5.2100
phasecal = pks2135-209.2100


9 / 218
source = pks2254-367.2100
phasecal = mwacsj2157.5.2100


10 / 218
source = 2311-452.2100
phasecal = pks2254-367.2100


11 / 218
source = pks2333-528.2100
phasecal = 2311-452.2100
Error executing the notebook "mirpy_reduction_template.ipynb".

See notebook "reduction_nbs/mirpy_reduction_template-out_pks2333-528.2100.ipynb" for the traceback.


KeyboardInterrupt: 

# list of specific sources

In [None]:
# set a data dir, band, and source list
datadir = '/Volumes/mjrose/C3030/2016-04-05'
# and one band
band = 'C'
sources = ['j084205+1835.5500']
band = 'X'
sources = ['j053354-2344.9000']

    
datadir = '/Volumes/mjrose/C3030/2016-04-09'
band = 'X'
sources = ['1934-638.9000', 'j002549-2602.9000']
band = 'C'
sources = ['j002549-2602']


datadir = '/Volumes/mjrose/C3030/2015-04-12'
band = 'X'
sources = ['mwacsj2153.5.9000','pks1934-638',  'pks2149-28.9000', 
                 '0023-263.9000','pks0208+040.9000',  '1323-611.9000', '3c283.9000', 'pks1306-09',  'pks1120-274.9000',  'pks0941-080.9000']
#last 5 or so didn’t image–

band = 'C'
sources = ['0023-263.5500',  '0741-063.5500', '0823-500.5500', 'pks1934-638.5500','pks0208+040.5500', 
                 '1323-611.5500','3c283.5500', 'pks1120-274.5500', 'pks1306-09.5500','pks0941-080.5500','0743-673.5500']
#last 5 or so didn’t image–

# set a data dir, band, and source list
datadir = '/Volumes/mjrose/C3030/2015-04-11'
band = 'L'
sources = ['pks0105-122.2100', 'mwacsj0408.0.2100', 'pks0252-71.2100','1245-197.2100', '1015-314.2100', 'pks1120-274.2100', 
                 'pmnj1726-6427.2100', '0823-500.2100', '0743-673.2100', '1549-790.2100', 'pks0208+040.2100', 'pks1306-09.2100', 'pks0941-080.2100']

In [None]:
# where your raw data is saved
raw_data_dir = datadir + '/raw'
# where you want to save the visibilities, other pngs, logs
processed_data_dir = datadir +'/{0}/reduced_{0}'.format(band)
#if not os.path.exists(processed_data_dir):
#	os.makedirs(processed_data_dir)
print('raw data sourced from: {0}'.format(raw_data_dir))
print('reduced data saved in: {0}'.format(processed_data_dir))


if band == 'L':
	suffix = '.2100'
	rawfiles = 'L'
	ifsel = 1 
elif band == 'C':
	suffix = '.5500'
	rawfiles ='CX'
	ifsel = 1
elif band == 'X':
	suffix = '.9000'
	rawfiles ='CX'
	ifsel = 2
    
# dataframe for the source being processed to find its best flux calibrator
df_path = processed_data_dir+'/calibration_order.csv'

if not os.path.exists(df_path):

    # read in list of souces from mosaic order
    blocks = np.arange(1,11,1)
    temp_arr = []
    
    block_dir = raw_data_dir.split('/raw')[0]+"/blocks"
    # get list of blocks
    block_files = glob.glob(block_dir+"/*")
    block_files = sorted_nicely(block_files)
    
    # remove the bandflux calibrator file
    bfcal_path = [a for a in block_files if 'bandfluxcal' in a][0]
    block_files = [a for a in block_files if 'bandfluxcal' not in a]

    if len(block_files) > 1:

        for block in block_files: 
            temp_txt = np.loadtxt(block, dtype=bytes)
            temp_arr.append([t[-1].decode('UTF-8')[1:].lower() for t in temp_txt])

        # get the unique blocks if any are repeated
        unique_blocks = np.sort(np.unique(temp_arr, return_index=True)[1])
        sources = np.concatenate([temp_arr[index] for index in unique_blocks])

    else:
        temp_txt = np.loadtxt(block_files[0], dtype=bytes)
        temp_arr.append([t[-1].decode('UTF-8')[1:].lower() for t in temp_txt])
        sources = temp_arr[0]
        

    sources = [s+suffix for s in sources]
    # specify the seperate bandflux cal
    
    bandflux_cal = str(np.genfromtxt(bfcal_path, dtype=str))+suffix
    sources = np.append(bandflux_cal,sources)
    print('working with these sources: {0}'.format(sources))

    # some are repeated but they are in the order we need for phase calibration

    # init a dataframe to keep track of phasecals
    df_init = pd.DataFrame({'name' : sources, 'percent_flagged': '', 'flux' : '', 'phasecal':''})
    df_init.to_csv(df_path)
    
    
phasecal_df = pd.DataFrame.from_csv(df_path)
phasecal_df = phasecal_df.fillna('')
check_if_data_unpacked(phasecal_df,processed_data_dir,df_path)    
sources = phasecal_df["name"].values.tolist()

In [None]:
for source in sources: 
    # guess where you should start iterating next; assign phasecal
    phasecal_df = pd.DataFrame.from_csv(df_path)
    phasecal_df = phasecal_df.fillna('')
    # guess where to start
    h=0
    # if you never got flux from your last reduction, go redo that one if redo = True
    while True:         
        h = check_ifreduced(processed_data_dir, sources, h, phasecal_df, suffix)
        if h is None:
            print("all reduced")
            break
        p = find_phasecal(phasecal_df,h)
        if isinstance(p, bool) and p:
            break
        elif isinstance(p, bool) and not p:
            # then no flux for h
            h -=1
        else:
            h = p



    print(h,'/',len(sources))
    source = sources[h]
    print('source = '+source) 
    print('phasecal = '+phasecal_df.loc[h]["phasecal"])

    # for this source, run the .ipynb
    manual_flagging = False
    # if you need to change the reference antenna, which I needed for '2016'
    #refant = 4
    refant = 3

    nb_kwargs = {'source': source, 'manual_flagging':manual_flagging, 'refant':refant, 'processed_data_dir':processed_data_dir, 'image_dir':image_dir}
    nb_suffix='-out_{0}'.format(source)
    
    run_notebook(nb_name, nb_suffix=nb_suffix, nb_kwargs=nb_kwargs, hide_input=False,insert_pos=3, out_path='reduction_nbs/')



# Evernote

In [None]:
# add the html to evernote; a project for another day
'''import subprocess
# save as HTML
## double check this renders the last cells
notebook_dir = os.getcwd()
filename = '/'.join([notebook_dir,nb_name.split('.ipynb')[0]+ nb_suffix+'.ipynb'])
fn = filename.replace(".ipynb", ".html")
rename_cmd = fn + ' ' + filename
print(rename_cmd)

# converts ipynb to html
cmd = 'jupyter nbconvert --to html_embed --template toc2 {0}'.format(filename)
subprocess.call(cmd, shell=True)

# uses javascript to convert html to enml, evernote's format
# I didn't have javascript, so
# I needed to 'brew install node' for this bit
# then 'npm install html2enml
# cmd = 'html2enml {0}'.format(fn)
# subprocess.call(cmd, shell=True)

#settings.configure()
HtmlFile = open(fn, 'r', encoding='utf-8')
html_code = HtmlFile.read()
#html_code = HttpResponse(html_code, content_type="html")


# save to evernote 

import evernote.edam.type.ttypes as Types
import evernote.edam.notestore.ttypes as NoteStore
from evernote.api.client import EvernoteClient

auth_token = "???"
client = EvernoteClient(token=auth_token, sandbox=False)
userStore = client.get_user_store()
user = userStore.getUser()
userId = user.id
shardId = user.shardId


# write a new note
noteStore = client.get_note_store()
note = Types.Note()
notebooks = noteStore.listNotebooks()
notebook_guid = [n.guid for n in notebooks if "C3030 Reduction" in n.name]
note.notebookGuid = notebook_guid[0]
note.title = "I'm a test note!"
note.tagNames = ['test', 'fun', 'procrastinating']
note.content = '<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">'
note.content += '<en-note> {0} <br/></en-note>'.format(temp)

note = noteStore.createNote(note)
noteGuid = note.guid

# find note you want to link the new note you created to 
note_filter = NoteStore.NoteFilter()
note_filter.words = 'intitle:"Overview reduction notes"'
notes_metadata_result_spec = NoteStore.NotesMetadataResultSpec()
notes_metadata_list = note_store.findNotesMetadata(note_filter, 0, 1, notes_metadata_result_spec)
note_guid = notes_metadata_list.notes[0].guid

overview_note = note_store.getNote(note_guid, True, False, False, False)
# don't erase what is already there 
previous_content = overview_note.content.split('</en-note>')[0]
overview_note.content = previous_content
# new content
overview_note.content += '\n<div><a style="color:#69aa35;" href="evernote:///view/{0}/{1}/{2}/{2}/">{3}</a><br/></div>'.format(userId,shardId,noteGuid, 'source')
# end it
overview_note.content += '</en-note>\n'
# push
overview_note = noteStore.updateNote(overview_note)'''