In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
from nbdev.export import notebook2script
# from nbdev.imports import create_config
# create_config("nbx", user='mirkoklukas',
#               git_url="https://github.com/mirkoklukas/nbx/tree/master/",
#               lib_path = 'nbx',
#               path='.',
#               nbs_path = '.' , tst_flags='tst', cfg_name='settings.ini')

In [None]:
#default_exp om
notebook2script()

In [None]:
import os
print(os.environ['omx'])
print(os.environ['om'])
print(os.environ['om'])

/om2/user/mklukas/nbx-experiments
mklukas@openmind7.mit.edu
mklukas@openmind7.mit.edu


# Parsing the notebook

## Finding tags, filter cells

In [None]:
#export
import re
_re_tag = re.compile(r"^\s*#([a-zA-Z_]+).*$")

In [None]:
#export
def extract_tag(line):
    """Returns the name of a tag (#name), if it 
    occurs at the beginning of the line, or None."""
    m = _re_tag.match(line)
    if m is not None: return m.group(1)
    else: return None

In [None]:
assert extract_tag("#nbx ") == "nbx"
assert extract_tag("#nbx  something else ") == "nbx"
assert extract_tag("# nbx something else ") == None
assert extract_tag("#xarg ") == "xarg"

In [None]:
#export
def contains_tag(name):
    return lambda line: extract_tag(line) == name

is_nbx = contains_tag("nbx")

In [None]:
assert is_nbx("#nbx") 
assert is_nbx("# nbx") == False
assert is_nbx(" #nbx") 
assert is_nbx("  #nbx") 

In [None]:
#export 
def is_nbx_cell(cell):
    if cell['cell_type'] != 'code': return False
    if not cell['source']: return False
    line0 = cell['source'][0]
    return is_nbx(line0)

When we create our python script we need to exclude jupyter's *magic* functions and shell commands that can beused in a code cell.

In [None]:
#export
_re_magic =  re.compile(r"^\s*%{1,2}|^\s*!")

In [None]:
#export
def is_magic_or_shell(line):
    m = _re_magic.match(line)
    return m is not None

In [None]:
assert is_magic_or_shell("%pwd ")
assert is_magic_or_shell("%%capture ")
assert is_magic_or_shell("!ls")

## Parsing "xargs"

We need to parse the line below `#xarg`, and decompose it into a variable declaration and the parameter range for the sweep.

In [None]:
# export                
_re_xarg = re.compile(r"""
# parses the line below an `xarg` tag:
^
([^=]+)
=
([^;]+)
;?
(.*)
$""", re.VERBOSE)

In [None]:
#export
def strip(s):
    return s.strip()

def parse_xarg(line):
    m = _re_xarg.match(line)
    name, val, sweep = map(strip, m.groups())
    return name, val, sweep

In [None]:
parse_xarg("x = 0 ; [0,1,2,3]")

('x', '0', '[0,1,2,3]')

## Parsing "nbx" cells

First let's load the notebook

In [None]:
#export
import json
from argparse import Namespace

class Bunch(object):
    def __init__(self, adict={}):
        self.__dict__.update(adict)
        
    def __repr__(self):
        return str(self.__dict__.keys())

def load_nb(fname):
    nbdict = json.load(open(fname,'r',encoding="utf-8"))
    return Bunch(nbdict)

In [None]:
nb = load_nb("om.ipynb")
nb

dict_keys(['cells', 'metadata', 'nbformat', 'nbformat_minor'])

In [None]:
#export
def parse_src(a, src):
    if len(src) == 0: return a, []
    
    tag = extract_tag(src[0])
    if tag is None:
        if not is_magic_or_shell(src[0]):
            a['xbody'].append(src[0])
            
        rest = src[1:]
    elif tag == 'nbx':   
        a['xbody'].append(src[0])
        rest = src[1:]
    elif tag == 'xarg':   
        a['xarg'].append(src[1])
        rest = src[2:]

    return parse_src(a, rest)
    

def parse_nbx_cell(cell):
    a = {'xbody': [], 'xarg': [] }
    a, _ = parse_src(a, cell['source'])
    return a['xarg'], a['xbody']

Let's see how it works

In [None]:
#nbx

#xarg 
x = 0 ; [0,1,2,3,4]

#xarg 
y = 0 ;

#xarg
task_id = 0
#xarg
results_dir = "./"

# some comment
z = 1

In [None]:
#nbx

print("some result")

some result


In [None]:
nb = load_nb("om.ipynb")
for cell in list(filter(is_nbx_cell, nb.cells)):
    a = {'xbody': [], 'xarg': [] }
    a,_ = parse_src(a, cell['source'])
#     print("-----------\n-- CELL --\n-----------")
#     print("arg:", "".join(a['xarg']))
#     print("body:", "".join(a['xbody']))
    
    print("\n*****************\n** Parsed Cell **\n*****************\n")
    xarg, xbody = parse_nbx_cell(cell)
    print(xarg)
    print(xbody)
    



*****************
** Parsed Cell **
*****************

['x = 0 ; [0,1,2,3,4]\n', 'y = 0 ;\n', 'task_id = 0\n', 'results_dir = "./"\n']
['#nbx\n', '\n', '\n', '\n', '\n', '# some comment\n', 'z = 1']

*****************
** Parsed Cell **
*****************

[]
['#nbx\n', '\n', 'print("some result")']


## Parsing the whole thing

In [None]:
#export
from functools import reduce

def concat(list1, list2):
    return list1 + list2

def unzip(zipped):
    return zip(*zipped)

def negate(func):
    return lambda x: not func(x)

def is_constarg(a):
    return len(a[2]) == 0

not_constarg = negate(is_constarg)

def get_item(i):
    return lambda x: x[i]

def get_items(*I):
    return lambda x: tuple([x[i] for i in I])

In [None]:
#export
def parse_nb(nb):
    nbx_cells = filter(is_nbx_cell, nb.cells)

    xargs = []
    xbody = []
    for cell in nbx_cells:
        xa, xb = parse_nbx_cell(cell)        
        xargs += [parse_xarg(line) for line in xa]
        xbody += xb
    
    
    pnb = Bunch()
    

    print(xargs)
    
    pnb.func_body = xbody
    pnb.args = list(map(get_items(0,1), xargs))
    pnb.const_args = list(map(get_items(0,1), filter(is_constarg, xargs)))
    pnb.sweep_args = list(map(get_items(0,2), filter(not_constarg, xargs)))
         
    return pnb
        

In [None]:
nb = load_nb("om.ipynb")
pnb = parse_nb(nb)
print(pnb.args)
print(pnb.const_args)
print(pnb.sweep_args)
print(pnb.func_body)

[('x', '0', '[0,1,2,3,4]'), ('y', '0', ''), ('task_id', '0', ''), ('results_dir', '"./"', '')]
[('x', '0'), ('y', '0'), ('task_id', '0'), ('results_dir', '"./"')]
[('y', '0'), ('task_id', '0'), ('results_dir', '"./"')]
[('x', '[0,1,2,3,4]')]
['#nbx\n', '\n', '\n', '\n', '\n', '# some comment\n', 'z = 1', '#nbx\n', '\n', 'print("some result")']


# Creating the file bundle 

In [None]:
#export
from pathlib import Path
import pkg_resources
from nbx.templ import *
import os

def check_parsed_nb(pnb):
    keys = list(map(get_item(0), pnb.args))
    if "task_id" not in keys: raise KeyError("You didn't specify `task_id`!!")
    if "results_dir" not in keys: raise KeyError("You didn't specify `results_dir`!!")
        
class NbxBundle():
    def __init__(self, 
                 nbname, 
                 name=None, 
                 linting=True,
                 time=[1,0], 
                 ntasks=10, 
                 step=5, 
                 simg="mirko-datascience.simg"):

        if name is None:
            name = Path(nbname).stem
            

        self.nbname = nbname
        self.name = name
        self.path = Path(f"{name}_nbx")
        
        nb = load_nb(nbname)
        nb = parse_nb(nb)
        self.nb = nb
        
        check_parsed_nb(nb)
        
        self.num_configs = 1
        for k, vs in nb.sweep_args: 
            self.num_configs *= len(eval(vs))
        
        self.create_folders()
        self.create_script("experiment.tpl", "experiment.py", vars(nb))
        self.create_script("wrapper.tpl", "wrapper.py", {
            'experiment_module': "experiment"})
        self.create_script("run.tpl", "run.sh", {
            'job_name': name, 
            'nbx_folder': os.environ['omx'],
            'script_to_run': "wrapper.py", 
            'results_dir': "./results",
            'array_start': 1, 
            'array_end':  self.num_configs, 
            'array_step': step, 
            'hours': time[0],
            'mins': time[1],
            'ntasks': ntasks,
            'script': 'wrapper.py',
            'simg': Path(os.environ['omsimg'])/simg, 
            'mail_user': "mirko.klukas@gmail.com", 
            'mem_per_cpu': 2000
        })

        print(self)
        if linting: self.check_scripts()
            
            
    def create_script(self, tname, fname, vars):
        tpath = Path(pkg_resources.resource_filename(
                     __name__, f"/templates/{tname}"))
        
        print(tpath)
        create_file_from_template(tpath, 
            self.path/fname, vars)

        
    def create_folders(self):
        if not os.path.exists(self.path):
            os.makedirs(self.path)
            os.makedirs(self.path/'io')

        if os.path.exists('./src'):
            if not os.path.exists(self.path/'src'):
                os.makedirs(self.path/'src')
            os.system(f"cp -r src/* {self.path/'src'}") 

        open(self.path/'__init__.py', 'a').close()
    
                      
    def run_experiment(self):
        self.run()
        print("check status with `ssh $om squeue -u $omid` or `bundle.status()`")
        print("pull results with `bundle.pull_results()`")

    def __str__(self):
        return render_template_from_string(BUNDLE_SUMMARY, 
                                           vars(self))

    def _run_command(self, cmd):
        stream = os.popen(cmd)
        output = stream.read()
        return output.strip()

    def push(self):
        cmd = f"scp -r {self.path} $om:$omx"
        output = self._run_command(cmd)
        if len(output) > 0:
            print(output)

    def run(self):
        cmd = f"ssh $om sbatch -D $omx/{self.path} $omx/{self.path}/run.sh"
        print(self._run_command(cmd))
                      
    def status(self):
        cmd = f"ssh $om squeue -u $omid"
        print(self._run_command(cmd))

    def pull_results(self):
        cmd = f"scp -r $om:$omx/{self.path}/results ./{self.path}"
        output = self._run_command(cmd)
        if len(output) > 0:
            print(output)

        print(f"copied to `{self.path}/results/")

    def check_scripts(self):
        output = self._run_command(f"pylint -E {self.path/'wrapper.py'}") 
        if len(output) > 0:
            print(output)
            raise "Check wrapper script"

        output = self._run_command(f"pylint -E {self.path/'experiment.py'}") 
        if len(output) > 0:
            print(output)
            raise "Check experiment script"

        print("(pylinting went ok)")



BUNDLE_SUMMARY = """
** nbx bundle created **
Path:
    {{path}}
    
Source nb:
    {{nbname}}

Parameters (#configs {{num_configs}}):
    {% for k,v in nb.sweep_args %}* {{k}} = {{v}}{% if not loop.last %}
    {% endif %}{% endfor %}
    {% for k,v in nb.const_args %}  {{k}} = {{v}}{% if not loop.last %}
    {% endif %}{% endfor %}

Instructions:
    Copy to remote, run the bash script, and pull the results
    - `scp -r {{path}} $om:$omx`
    - `ssh $om sbatch -D $omx/{{path}} $omx/{{path}}/run.sh`
    - `scp -r $om:$omx/{{path}}/results ./results`

"""
    

In [None]:
NbxBundle(nbname="om.ipynb", 
          name="test", 
          linting=False,
          time=[2,0], 
          ntasks=4, 
          step=5, 
          simg="mirko-datascience.simg")

[('x', '0', '[0,1,2,3,4]'), ('y', '0', ''), ('task_id', '0', ''), ('results_dir', '"./"', '')]
nbx/templates/experiment.tpl
nbx/templates/wrapper.tpl
nbx/templates/run.tpl

** nbx bundle created **
Path:
    test_nbx
    
Source nb:
    om.ipynb

Parameters (#configs 5):
    * x = [0,1,2,3,4]
      y = 0
      task_id = 0
      results_dir = "./"

Instructions:
    Copy to remote, run the bash script, and pull the results
    - `scp -r test_nbx $om:$omx`
    - `ssh $om sbatch -D $omx/test_nbx $omx/test_nbx/run.sh`
    - `scp -r $om:$omx/test_nbx/results ./results`



<__main__.NbxBundle at 0x113c4f0f0>

'/om2/user/mklukas/simg'