In [23]:
# Auto notebook (.ipynb) to python file convertor
# The following code, when run from a notebook - starts a process on the VM of the notebooks
# that copies all *.ipynb --> ./notepy/*.py
# The script does that automatically.
# USAGE
# 1. Run this paragraph
# 2. Create a new notebook - for example create test1.ipynb with print("Hellow world") in it
# 3. import notepy.test1
# You will see "Hello world" as a result of the import
# _APACHE_SPARK_ 
# If you are using Apache Spark:
#     you can add the module file to spark with the code:
#     sc.addPyFile(notepy.test1.__file__)
# _GOOGLE_COLAB_
# If you are using Google Colab
#     Script will try to mount your drive and as for login
#     Script assume notebooks are under "/content/drive/My Drive/Colab Notebooks"
#     It adds this to sys.path which allows same usage (as above):
#     import notepy.test1


import os
import re
import sys
import time


def paragraph_to_py_module(py_filename, spark_context=None, src_text=None, paragraph_index=-2):
    """Write the last evaluated notebook paragraph to a file and send to executors"""
    if src_text is None:
        src_text = In[paragraph_index]  # last paragraph==-2
    with open(py_filename, "w+t") as out_py:
        out_py.write(src_text)
    if spark_context:
        spark_context.addPyFile(py_filename)


def relative_path(root_dir, dirpath, f):
    """get the relative part of a file name """
    full = os.path.join(dirpath, f)
    if not root_dir:
        return full
    if not full.startswith(root_dir):
        print("ERROR - bad path for root", full)
        return None
    full = full[len(root_dir):]
    if full.startswith("/"):
        return full[1:]
    return full


def is_ipython():
    """check if running inside a notebook"""
    return 'get_ipython' in globals()


def ipython_kind():
    if not is_ipython():
        return None
    if "google.colab._shell.Shell" in str(get_ipython()):
        return "google_colab"
    if "spark" in globals():
        if "dbutils" in globals():
            return "spark_databricks"
        return "spark"
    return "unknown_ipython"


def google_colab_login_if_needed():
    gcolab_root = '/content/drive'
    if os.path.isdir(gcolab_root):
        return
    from google.colab import drive
    drive.mount('/content/drive')


def do_system(cmd):
    print("Executing: ", cmd)
    if is_ipython():
        get_ipython().system_raw(cmd)
    else:
        os.system(cmd)
    
    
def iter_relative_path_recursive(root_dir):
    # generate all files in the directories under root_dir
    # generate names relative to root_dir
    for dirpath, _, filenames in os.walk(root_dir):
        for f in filenames:
            filename = relative_path(root_dir, dirpath, f)
            yield filename

            
def iter_relative_path(root_dir):
    for fname in os.listdir(root_dir):
        fullpath = os.path.join(root_dir, fname)
        if os.path.isdir(fullpath):
            continue
        yield fname

        
def iter_merge_infinite_loop(iter_builder1, iter_builder2):
    it1 = iter_builder1()
    it2 = iter_builder2()
    while True:
        try:
            x = it1.__next__()
            yield x
        except (StopIteration, RuntimeError):
            it1 = iter_builder1()
        try:
            x = it2.__next__()
            yield x
        except (StopIteration, RuntimeError):
            it2 = iter_builder2()

            
def copy_note_to_py(note_full_path, note_name, dst_dir):
    #
    dst_py = re.sub(r"(\.ipynb)?$", ".py", note_full_path) # file created by convert
    cmd = "jupyter nbconvert --to python {}".format(note_full_path)
    do_system(cmd)
    py_name = os.path.split(dst_py)[1]
    os.rename(dst_py, os.path.join(dst_dir, py_name))
    
    
def copy_local_notes_to_py(src_dir=".", dst_dir=None, exclude_notes=[]):
    if not dst_dir:
        dst_dir = os.path.join(src_dir, "notepy")
        if not os.path.isdir(dst_dir):
            os.mkdir(dst_dir)
        
    files_attr = {}

    def handle_file(filename):
        # return True, mtime if file needed upload
        # return False if not
        nonlocal files_attr
        now = int(time.time())
        full = os.path.join(src_dir, filename)
        if not os.path.exists(full):
            #file may have been deleted
            return False, now  # just ignore - not handling deletes
        mtime = os.path.getmtime(full)
        client_first_look = False
        if filename in files_attr:
            last_mtime = files_attr[filename]["mtime"]
            if mtime <= last_mtime:
                return False, mtime
        else:
            files_attr[filename] = {}
            client_first_look = True
        files_attr[filename]["mtime"] = mtime
        copy_note_to_py(full, filename, dst_dir)
        return True, mtime        
        
    log_count = 0
    speed=1.0
    recently_changed = {}
    for filename in iter_merge_infinite_loop(lambda : iter_relative_path(src_dir),
                                             lambda: iter(recently_changed.keys())):
        if not filename.endswith(".ipynb"):
            continue
        if filename in exclude_notes:
            continue
        time.sleep(0.1 * speed)
        speed = min(max(speed * 1.05, 0), 1.0) # slow down
        log_count += 1
        if log_count >= 50:
            # dilute the log by X50 to preven too much output
            print("Checking file ", filename)
            log_count = 0
        updated, mtime = handle_file(filename)
        if updated:
            recently_changed[filename] = mtime
            # if updated - accelerate
            speed /= 2.0
        else:
            # check if need to remove from recently changed
            if filename in recently_changed and time.time() - mtime > 5 * 60:
                del recently_changed[filename]


def kill_prev_script(ps_pattern):
    pid = os.getpid()                                     
    do_system("ps ax | grep note2mod.py | grep -v grep | awk '{print $1}'| grep -v %s | xargs kill " % pid)


def main():
    if is_ipython():
        cd_cmd = ""
        save_dir = os.getcwd()
        if ipython_kind() == "google_colab":
          google_colab_login_if_needed()
          colab_root = "/content/drive/My Drive/Colab Notebooks"
          os.chdir(colab_root)
          if not colab_root in sys.path:
            # script will create "notepy" under colab_root
            #this allows importing from notepy.MYNOTEBOOK
            sys.path.append(colab_root)
        # running in main notebook - save this paragraph to a py file
        paragraph_to_py_module("note2mod.py", paragraph_index=-1)
        # now run myself on the notebook machine machine
        do_system("python3 note2mod.py &")
        os.chdir(save_dir)  # restore
    else:
        # in main but not in notebook - in a script
        kill_prev_script("note2mod.py")
        copy_local_notes_to_py(exclude_notes=["note2mod.ipynb"])

if __name__ == "__main__":
    main()


Executing:  python3 note2mod.py &
