In [None]:
# default_exp export

# Export

> Code that allows you to export a notebook (.ipynb) as a python script( .py) to a target folder.

ipynb2py will allow you to convert the notebook (.ipynb) where the function is executed to a python script. 

The conversion applies these rules: 

* The notebook will be automatically saved when the function is executed.
* Only code cells will be converted (not markdown cells).
* A header will be added to indicate the script has been automatically generated. It also indicates where the original ipynb is.
* Cells with a #hide flag won't be converted. Flag variants like # hide, #Hide, #HIDE, ... are also acceptable.
* Empty cells and unnecessary empty lines within cells will be removed.
* By default the script will be created with the same name and in the same folder of the original notebook. But you can pass a target folder and a different file name if you wish.
* If a script with the same name already exists, it will be overwriten.

In [None]:
#export
"""
Code copied from the great nbdev library: https://github.com/fastai/nbdev/blob/master/nbdev/export.py
"""

def _mk_flag_re(body, n_params, comment):
    "Compiles a regex for finding nbdev flags"
    import re
    assert body!=True, 'magics no longer supported'
    prefix = r"\s*\#\s*"
    param_group = ""
    if n_params == -1: param_group = r"[ \t]+(.+)"
    if n_params == 1: param_group = r"[ \t]+(\S+)"
    if n_params == (0,1): param_group = r"(?:[ \t]+(\S+))?"
    return re.compile(rf"""
# {comment}:
^            # beginning of line (since re.MULTILINE is passed)
{prefix}
{body}
{param_group}
[ \t]*       # any number of spaces and/or tabs
$            # end of line (since re.MULTILINE is passed)
""", re.MULTILINE | re.VERBOSE)

_re_hide = _mk_flag_re("hide?", 0,
    "Matches any line with #hide without any module name")

In [None]:
# export
def _get_unhidden_cells(cells):
    result = []
    for i,cell in enumerate(cells):
        if cell['cell_type'] == 'code': 
            if not _re_hide.findall(cell['source'].lower()) and cell['source'] != '': result.append(i)
    return result
    
def _read_nb(fname):
    "Read the notebook in `fname`."
    from pathlib import Path
    import nbformat
    with open(Path(fname),'r', encoding='utf8') as f: return nbformat.reads(f.read(), as_version=4)

In [None]:
# export
# MIT License

# Copyright (c) 2020 Mark McPherson

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


import json
import urllib.error
import urllib.request
from itertools import chain
from pathlib import Path, PurePath
from typing import Generator, Tuple, Union

import ipykernel
from jupyter_core.paths import jupyter_runtime_dir
from traitlets.config import MultipleInstanceError


FILE_ERROR = "Can't identify the notebook {}."
CONN_ERROR = "Unable to access server;\n" \
           + "ipynbname requires either no security or token based security."


def _list_maybe_running_servers(runtime_dir=None) -> Generator[dict, None, None]:
    """ Iterate over the server info files of running notebook servers.
    """
    if runtime_dir is None:
        runtime_dir = jupyter_runtime_dir()
    runtime_dir = Path(runtime_dir)

    if runtime_dir.is_dir():
        for file_name in chain(
            runtime_dir.glob('nbserver-*.json'),  # jupyter notebook (or lab 2)
            runtime_dir.glob('jpserver-*.json'),  # jupyterlab 3
        ):
            yield json.loads(file_name.read_bytes())


def _get_kernel_id() -> str:
    """ Returns the kernel ID of the ipykernel.
    """
    connection_file = Path(ipykernel.get_connection_file()).stem
    kernel_id = connection_file.split('-', 1)[1]
    return kernel_id


def _get_sessions(srv):
    """ Given a server, returns sessions, or HTTPError if access is denied.
        NOTE: Works only when either there is no security or there is token
        based security. An HTTPError is raised if unable to connect to a 
        server.
    """
    try:
        qry_str = ""
        token = srv['token']
        if token:
            qry_str = f"?token={token}"
        url = f"{srv['url']}api/sessions{qry_str}"
        with urllib.request.urlopen(url) as req:
            return json.load(req)
    except Exception:
        raise urllib.error.HTTPError(CONN_ERROR)


def _find_nb_path() -> Union[Tuple[dict, PurePath], Tuple[None, None]]:
    try:
        kernel_id = _get_kernel_id()
    except (MultipleInstanceError, RuntimeError):
        return None, None  # Could not determine
    for srv in _list_maybe_running_servers():
        try:
            sessions = _get_sessions(srv)
            for sess in sessions:
                if sess['kernel']['id'] == kernel_id:
                    return srv, PurePath(sess['notebook']['path'])
        except Exception:
            pass  # There may be stale entries in the runtime directory
    return None, None


def get_nb_name() -> str:
    """ Returns the short name of the notebook w/o the .ipynb extension,
        or raises a FileNotFoundError exception if it cannot be determined.
    """
    _, path = _find_nb_path()
    if path:
        return path.stem
    else:
        return


def get_nb_path() -> Path:
    """ Returns the absolute path of the notebook,
        or raises a FileNotFoundError exception if it cannot be determined.
    """
    srv, path = _find_nb_path()
    if srv and path:
        root_dir = Path(srv.get('root_dir') or srv['notebook_dir'])
        return root_dir / path
    else:
        return

In [None]:
# export
def is_lab():
    import re
    import psutil
    return any(re.search('jupyter-lab', x) for x in psutil.Process().parent().cmdline())

def is_colab():
    from IPython.core import getipython
    return 'google.colab' in str(getipython.get_ipython())

def _save_nb(wait=2, verbose=True):
    """
    Save and checkpoints current jupyter notebook.
    """
    from IPython.core.display import Javascript, display, HTML
    import time
    if is_colab(): 
        if verbose: print('cannot automatically save the notebook. Save it manually if needed.')
    elif is_lab():
        script = """
        this.nextElementSibling.focus();
        this.dispatchEvent(new KeyboardEvent('keydown', {key:'s', keyCode: 83, metaKey: true}));
        """
        display(HTML(('<img src onerror="{}" style="display:none">' 
                      '<input style="width:0;height:0;border:0">').format(script)))
    else:
        display(Javascript('IPython.notebook.save_checkpoint();'))
    time.sleep(wait)

In [None]:
# export
def nb2py(nb_name=None, target='.', script_name=None, verbose=True):
    """Converts the notebook where the function is run to a python script.
    
    Args:
        nb_name     :   name of the notebook where the nb2py function is run. If None it will try to read automatically.
        target      :   target directory where the script will be created. Defaults to current directory.
        script_name :   name of the script that will be created. Defaults to notebook name.
                        If None it will be the same as the notebook, just replacing .ipynb by .py.
        verbose     :   prints out details of the export if True.
    """
    
    # check
    if is_colab(): 
        print("nb2py doesn't work in Google Colab.")
        return
    
    import os
    from pathlib import Path
    try: import nbformat
    except ImportError: raise ImportError("You need to install nbformat to use nb2py!")
    
    # save nb
    _save_nb(verbose=verbose)
    
    # get py full script name
    nb_path = get_nb_path()
    if nb_path is None:
        if nb_name is not None:
            nb_name = Path(nb_name)
            nb_name = Path('.'.join([nb_name.stem, 'ipynb']))
            nb_path = Path(os.getcwd())/nb_name
        else:
            print("nb2py couldn't get the nb_name. Pass it as an argument and re-run nb2py.")
            return
    else:
        nb_name = Path(nb_path.name)
    
    if script_name is None: 
        script_name = nb_name
    else:
        script_name = Path(Path(script_name).name)
    script_name = '.'.join([str(script_name).rstrip(''.join(script_name.suffixes)), 'py'])
    target = Path(target)
    script_path = target/script_name

    # delete file if exists and create target folder if doesn't exist
    if os.path.exists(script_path): os.remove(script_path)
    script_path.parent.mkdir(parents=True, exist_ok=True)
    
    # Write script header
    with open(script_path, 'w') as f:
        f.write(f'# -*- coding: utf-8 -*-\n')
        f.write(f'"""{nb_name}\n\n')
        f.write(f'Automatically generated.\n\n')
        if nb_path is not None:
            f.write(f'Original file is located at:\n')
            f.write(f'    {nb_path}\n')
        f.write(f'"""')

    # identify convertible cells (excluding empty and those with hide flags)
    nb = _read_nb(nb_name)
    idxs = _get_unhidden_cells(nb['cells'])
    pnb = nbformat.from_dict(nb)
    pnb['cells'] = [pnb['cells'][i] for i in idxs]

    # clean up cells and write script
    sep = '\n'* 2
    for i,cell in enumerate(pnb['cells']):
        source_str = cell['source'].replace('\r', '')
        code_lines = source_str.split('\n')    
        if code_lines == ['']: continue
        while code_lines[0] == '': code_lines = code_lines[1:]
        while code_lines[-1] == '': code_lines = code_lines[:-1]
        cl = []
        for j in range(len(code_lines)): 
            if list(set(code_lines[j].split(" "))) == ['']:
                code_lines[j] = ''
            if i == 0 or code_lines[j-1] != '' or code_lines[j] != '': 
                cl.append(code_lines[j])
        code_lines = cl
        code = sep + '\n'.join(code_lines)
        with open(script_path, 'a', encoding='utf8') as f: f.write(code)
            
    # check script exists
    assert os.path.isfile(script_path), f"an error occurred during the export and {script_path} doesn't exist"
    if verbose: 
        print(f"{nb_name} converted to {script_path}")
    return str(script_path)

In [None]:
# # hide
# import os
# from pathlib import Path
# import shutil

# script_name = None
# pyname = nb2py(nb_name=None, target='.', script_name=script_name)
# if pyname is not None: 
#     if script_name is None: 
#         pyname2 = str(get_nb_path().name).replace("ipynb", "py")
#     else:
#         pyname2 = ".".join([str(Path(script_name).name), "py"])
#     assert str(pyname) == str(pyname2)
#     assert os.path.isfile(pyname2)
#     os.remove(pyname2)
#     assert not os.path.isfile(pyname2)

# script_name = "007b_export"
# pyname = nb2py(nb_name=None, target='.', script_name=script_name)
# if pyname is not None: 
#     if script_name is None: 
#         pyname2 = get_nb_path().replace("ipynb", "py")
#     else:
#         pyname2 = ".".join([str(Path(script_name).name), "py"])
#     assert str(pyname) == str(pyname2)
#     assert os.path.isfile(pyname2)
#     os.remove(pyname2)
#     assert not os.path.isfile(pyname2)
    
# target = Path('test_export')
# script_name = None
# pyname = nb2py(nb_name=None, target=target, script_name=script_name)
# if pyname is not None: 
#     pyname2 = target/Path('.'.join([get_nb_name().rsplit('.', 1)[0],'py'])).name
#     assert str(pyname) == str(pyname2)
#     assert os.path.isfile(pyname2)
#     shutil.rmtree(Path(pyname2).parent)
#     assert not os.path.isfile(pyname2)

In [None]:
#hide
from tsai.imports import *
out = create_scripts(); beep(out)