In [1]:
#convert

In [2]:
#hide
import os
os.chdir("..")

# 01. Convert Notebooks to *.py and *.md

> This tool allows conversion from an ipynb notebook into .py or .md files.

In [3]:
#export
from typing import List, Dict, Optional
import json
import os
import sys

#hide

First we will implement a helper function that allows us to find all notebooks. For that we will scan the folder provided.

In [4]:
#export
def _get_notebooks(folder: str = ".") -> List[str]:
    nbs = []
    for root, dirs, files in os.walk(folder):
        if ".ipynb_checkpoints" in root:
            continue
        for fname in files:
            if fname.endswith(".ipynb"):
                nbs.append(os.path.join(root, fname).replace("\\", "/").replace("./", ""))
    return nbs

#hide

A quick test on the source folder.

In [5]:
#hide
_get_notebooks()

['jlabdev/convert.ipynb']

#hide

Now that this works. Let's load the first notebook.

In [6]:
#export
def _get_notebook(file_path: str) -> Dict:
    with open(file_path, "r") as f:
        return json.loads(f.read())

In [7]:
#hide
file_path = _get_notebooks(".")[0]
print("Using notebook: {}".format(file_path))
notebook = _get_notebook(file_path)
print(list(notebook.keys()))

print("cell 0")
print(notebook["cells"][0])

print("cell 1")
print(notebook["cells"][1])

print()
print("metadata")
print(notebook["metadata"])

print()
print("nbformat")
print(notebook["nbformat"])

print()
print("nbformat_minor")
print(notebook["nbformat_minor"])

Using notebook: jlabdev/convert.ipynb
['cells', 'metadata', 'nbformat', 'nbformat_minor']
cell 0
{'cell_type': 'code', 'execution_count': 1, 'metadata': {}, 'outputs': [], 'source': ['#convert']}
cell 1
{'cell_type': 'code', 'execution_count': 2, 'metadata': {}, 'outputs': [], 'source': ['#hide\n', 'import os\n', 'os.chdir("..")']}

metadata
{'kernelspec': {'display_name': 'Python 3', 'language': 'python', 'name': 'python3'}, 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3}, 'file_extension': '.py', 'mimetype': 'text/x-python', 'name': 'python', 'nbconvert_exporter': 'python', 'pygments_lexer': 'ipython3', 'version': '3.7.3'}}

nbformat
4

nbformat_minor
4


#hide

## Extracting code from the notebook

With the notebook loaded it is time to extract info:

1. Check if notebook is flaged as convertible
2. Get the Export Cells
3. Combine all the cells into python code

In [8]:
#export
def _convertible_nb(data) -> str:
    for cell in data["cells"]:
        if cell["cell_type"] == "code" and len(cell["source"]) > 0 and cell["source"][0].startswith("#convert"):
            return True
    return False

In [9]:
#hide
_convertible_nb(notebook)

True

In [10]:
#export
def _get_exported_cells(data) -> List:
    exported_cells = []
    for cell in data["cells"]:
        if cell["cell_type"] == "code" and len(cell["source"]) > 0 and cell["source"][0].startswith("#export"):
            cell["source"][0] = cell["source"][0].replace("#export", "# Cell: {}".format(len(exported_cells)))
            exported_cells.append(cell)
    return exported_cells

In [11]:
#hide
cells = _get_exported_cells(notebook)

In [12]:
#export
def _combine_cells_to_code(cells, file_path: str) -> str:
    code = "# AUTOGENERATED FROM: {}\n\n".format(file_path)
    for cell in cells:
        for line in cell["source"]:
            code += line
        # Ensure there are two empty lines between cells
        code += "\n\n\n"
        while code.endswith("\n\n\n\n"):
            code = code[:-2]
    
    # Ensure there is a single new line at file end.
    while code.endswith("\n\n"):
        code = code[:-2]
    return code

In [13]:
#hide
print(_combine_cells_to_code(cells, file_path))

# AUTOGENERATED FROM: jlabdev/convert.ipynb

# Cell: 0
from typing import List, Dict, Optional
import json
import os
import sys


# Cell: 1
def _get_notebooks(folder: str = ".") -> List[str]:
    nbs = []
    for root, dirs, files in os.walk(folder):
        if ".ipynb_checkpoints" in root:
            continue
        for fname in files:
            if fname.endswith(".ipynb"):
                nbs.append(os.path.join(root, fname).replace("\\", "/").replace("./", ""))
    return nbs


# Cell: 2
def _get_notebook(file_path: str) -> Dict:
    with open(file_path, "r") as f:
        return json.loads(f.read())


# Cell: 3
def _convertible_nb(data) -> str:
    for cell in data["cells"]:
        if cell["cell_type"] == "code" and len(cell["source"]) > 0 and cell["source"][0].startswith("#convert"):
            return True
    return False


# Cell: 4
def _get_exported_cells(data) -> List:
    exported_cells = []
    for cell in data["cells"]:
        if cell["cell_type"] == "code" and len(c

#hide

## Writing the code

Now the next step is to write the python file. For this we need to know the project root, the notebook and the filepath where the notebook was saved.
With that info we can create the python code and save it in the correct location relative to the project root.

In [14]:
#export
def _write_py(notebook, file_path: str, root: str= ".") -> None:
    exported_cells = _get_exported_cells(notebook)
    if not _convertible_nb(notebook):
        return
    py_package = file_path.replace("/", ".").replace("..", ".").replace(".ipynb", "")
    py_path = os.path.join(root, py_package.replace(".", os.sep) + ".py")
    code = _combine_cells_to_code(exported_cells, file_path)
    package_path = os.path.join(root, os.sep.join(py_package.split(".")[:-1]))
    os.makedirs(package_path, exist_ok=True)
    with open(py_path, "w") as f:
        f.write(code)

In [15]:
#hide
_write_py(notebook, file_path)

## Convert all Notebooks in a Folder to Python

One core feature of this library is to convert all notebook files located beneath the `nb_root` folder into python files relative to the `project_root`.
For this conversion the notebook is scanned for:

1. A code cell which contians `#default_exp python_package.for.this.notebook`
2. All code cells where the first line is `#export`

Then using the cells marked with `#export` a python file in `python_package/for/this/notebook.py` is created.
The generated python file should not be modified and the cell and AUTOGENERATED comments must not be deleted.

In [16]:
#export
def notebook2py(project_root: str = ".") -> None:
    """
    Convert all notebooks in the folder.
    
    :param project_root: The root directory of the project. The default exp path is relative to this folder.
    :param nb_root: The root directory of all the notebooks. Only notebooks in this or any subfolder will be considered.
    """
    notebooks = _get_notebooks(project_root)
    for nb_path in notebooks:
        print("Converting to py: {}".format(nb_path))
        notebook = _get_notebook(nb_path)
        _write_py(notebook, nb_path, root=project_root)

In [17]:
notebook2py()

Converting to py: jlabdev/convert.ipynb


#hide

# Markdown Conversion

This is actually more complicated. For this we will need to find all markdown cells and extract the doc from all #export cells and convert all non export cells to markdown.

In [18]:
#export
def _extract_doc(source:str) -> str:
    lines = source.split("\n")
    docs = []
    mode = 0
    for line in lines:
        if mode == 0:
            if ("def " in line or "class " in line) and not ("def _" in line or "class _" in line):
                current_doc = [line, ""]
                mode = 1
        elif mode == 1:
            if "\"\"\"" in line:
                mode = 2
            else:
                mode = 0
                docs.append(current_doc)
        elif mode == 2:
            if "\"\"\"" in line:
                mode = 0
                docs.append(current_doc)
            else:
                current_doc[1] += line.lstrip() + "\n"                
        
    if len(docs) == 0:
        return ""
    
    output = ""
    for doc in docs:
        output += "**" + doc[0] + "**\n\n"
        output += doc[1].replace(":param", "*")
        output += "\n\n"
        
    return output

In [19]:
#export
def _get_doc(data) -> str:
    doc = ""
    title = None
    for cell in data["cells"]:
        # Example Cell
        if cell["cell_type"] == "code" and len(cell["source"]) > 0 and not cell["source"][0].startswith("#export") and not cell["source"][0].startswith("#hide") and not cell["source"][0].startswith("#convert"):
            doc += "Example:\n"
            doc += "```python\n"
            for line in cell["source"]:
                doc += line
            doc += "\n```\n"
            doc += "Output:\n"
            doc += "```\n"
            for outp in cell["outputs"]:
                if "text" in outp:
                    for entry in outp["text"]:
                        doc += entry
                if "traceback" in outp:
                    for entry in outp["traceback"]:
                        while entry.find('\x1b') >= 0:
                            start = entry.find('\x1b')
                            end = entry.find("m", start)
                            entry = entry[:start] + entry[end+1:]
                        doc += entry + "\n"
            doc += "\n```\n"
            doc += "\n"

        # Export Cell
        if cell["cell_type"] == "code" and len(cell["source"]) > 0 and cell["source"][0].startswith("#export"):
            source = "".join(cell["source"])
            doc += _extract_doc(source)
            
        # Regular Markdown Cell
        if cell["cell_type"] == "markdown" and len(cell["source"]) > 0 and not cell["source"][0].startswith("#hide"):
            for line in cell["source"]:
                if line.startswith("# ") and title is None:
                    title = line[2:]
                doc += line
            doc += "\n\n"
    return doc, title

In [20]:
#hide
from IPython.display import Markdown as md
notebook = _get_notebook(file_path)
md(_get_doc(notebook)[0])

# 01. Convert Notebooks to *.py and *.md

> This tool allows conversion from an ipynb notebook into .py or .md files.

## Convert all Notebooks in a Folder to Python

One core feature of this library is to convert all notebook files located beneath the `nb_root` folder into python files relative to the `project_root`.
For this conversion the notebook is scanned for:

1. A code cell which contians `#default_exp python_package.for.this.notebook`
2. All code cells where the first line is `#export`

Then using the cells marked with `#export` a python file in `python_package/for/this/notebook.py` is created.
The generated python file should not be modified and the cell and AUTOGENERATED comments must not be deleted.

**def notebook2py(project_root: str = ".") -> None:**

Convert all notebooks in the folder.

* project_root: The root directory of the project. The default exp path is relative to this folder.
* nb_root: The root directory of all the notebooks. Only notebooks in this or any subfolder will be considered.


Example:
```python
notebook2py()
```
Output:
```
Converting to py: jlabdev/convert.ipynb

```

## Convert all Notebooks in a Folder to Markdown

One core feature of this library is to convert all notebook files located beneath the `nb_root` folder into python files relative to the `project_root`.
For this conversion the notebook is scanned for:

1. A code cell which contians `#default_exp python_package.for.this.notebook` is searched to find the output file name.
2. All cells are scanned and based on their type an action is taken:
    * `#hide` -> Cell is ignored.
    * markdown -> The cell is 1 to 1 copied into the doc.
    * `#export` code cell -> The cell is scanned for any public function and class. They are added with their docstring to the markdown.
    * other code cell -> The cell is treated as an example and the code and the output are inserted into the markdown.


**def notebook2md(project_root: str = ".") -> None:**

Convert all notebooks in the folder.

* project_root: The root directory of the project. The default exp path is relative to this folder.
* nb_root: The root directory of all the notebooks. Only notebooks in this or any subfolder will be considered.


Example:
```python
notebook2md()
```
Output:
```
Converting to md: jlabdev/convert.ipynb

```

**def python2nb(project_root: str = ".") -> None:**

Convert all notebooks in the folder.

* project_root: The root directory of the project. The default exp path is relative to this folder.
* nb_root: The root directory of all the notebooks. Only notebooks in this or any subfolder will be considered.




In [21]:
#export
def _write_md(file_path, root: str = ".") -> str:
    notebook = _get_notebook(file_path)
    doc_path = os.path.join(root, "docs")
    os.makedirs(doc_path, exist_ok=True)
    
    py_package = file_path.replace("/", ".").replace("..", ".").replace(".ipynb", "")
    md_name = py_package.replace(".", "/") + ".md"
    md_path = os.path.join(root, "docs", md_name).replace("\\", "/")
    doc, title = _get_doc(notebook)
    path = "/".join(md_path.split("/")[:-1])
    if not os.path.exists(path):
        os.makedirs(path)
    with open(md_path, "w") as f:
        f.write(doc)
    return md_name, title

In [22]:
#hide
_write_md("jlabdev/convert.ipynb")

('jlabdev/convert.md', '01. Convert Notebooks to *.py and *.md\n')

## Convert all Notebooks in a Folder to Markdown

One core feature of this library is to convert all notebook files located beneath the `nb_root` folder into python files relative to the `project_root`.
For this conversion the notebook is scanned for:

1. A code cell which contians `#default_exp python_package.for.this.notebook` is searched to find the output file name.
2. All cells are scanned and based on their type an action is taken:
    * `#hide` -> Cell is ignored.
    * markdown -> The cell is 1 to 1 copied into the doc.
    * `#export` code cell -> The cell is scanned for any public function and class. They are added with their docstring to the markdown.
    * other code cell -> The cell is treated as an example and the code and the output are inserted into the markdown.


In [23]:
#export
README_TEMPLATE = """
# Package List

{toc}

"""

In [24]:
#export
def notebook2md(project_root: str = ".") -> None:
    """
    Convert all notebooks in the folder.
    
    :param project_root: The root directory of the project. The default exp path is relative to this folder.
    :param nb_root: The root directory of all the notebooks. Only notebooks in this or any subfolder will be considered.
    """
    readme_template = README_TEMPLATE
    notebooks = _get_notebooks(project_root)
    index = []
    for nb_path in notebooks:
        print("Converting to md: {}".format(nb_path))
        index.append(_write_md(nb_path, root=project_root))
    
    index = sorted(index, key=lambda x: x[1])
    
    if len(index) > 0:
        with open(os.path.join(project_root, "docs", "README.md"), "w") as f:
            toc = ""
            for i in index:
                toc += "* [{}]({})\n".format(i[1], i[0])
            readme_template = readme_template.replace("`{toc}`", "`#toc%`").format(toc=toc).replace("`#toc%`", "`{toc}`")
            f.write(readme_template)

In [25]:
notebook2md()

Converting to md: jlabdev/convert.ipynb


In [26]:
#export
def _get_python_files(folder: str = ".") -> List[str]:
    pys = []
    for root, dirs, files in os.walk(folder):
        if ".ipynb_checkpoints" in root:
            continue
        for fname in files:
            if fname.endswith(".py"):
                pys.append(os.path.join(root, fname).replace("\\", "/").replace("./", ""))
    return pys

In [27]:
#hide
_get_python_files()

['setup.py', 'jlabdev/convert.py']

In [28]:
#export
def _get_py_cells(py_file):
    with open(py_file, "r") as f:
        data = f.read()
    if not data.startswith("# AUTOGENERATED FROM: "):
        return None, None
    
    lines = data.split("\n")
    file_path = lines[0].replace("# AUTOGENERATED FROM: ", "")
    cells = []
    current_cell = []
    header = True
    for line in lines:
        if line.startswith("# Cell:"):
            if not header:
                cells.append(current_cell)
            header = False
            current_cell = []
            continue
        if not header:
            current_cell.append(line)
    if not header:
        cells.append(current_cell)
    for i in range(len(cells)):
        cells[i] = "\n".join(cells[i])
        while cells[i].endswith("\n"):
            cells[i] = cells[i][:-1]
        cells[i] = cells[i].split("\n")
        for idx in range(len(cells[i]) -1):
            cells[i][idx] += "\n"
    return file_path, cells

In [29]:
#hide
file_path, exported_cells = _get_py_cells(_get_python_files()[0])
print(file_path, exported_cells)
file_path, exported_cells = _get_py_cells(_get_python_files()[1])
print(file_path, exported_cells[-1])

None None
jlabdev/convert.ipynb ['if __name__ == "__main__":\n', '    if "--nb2py" in sys.argv:\n', '        notebook2py()\n', '    if "--nb2md" in sys.argv:\n', '        notebook2md()\n', '    if "--py2nb" in sys.argv:\n', '        python2nb()']


In [30]:
#export
def _overwrite_exported_cells(data, cells):
    i = 0
    for cell in data["cells"]:
        if cell["cell_type"] == "code" and len(cell["source"]) > 0 and cell["source"][0].startswith("#export"):
            cell["source"] = ["#export\n"] + cells[i]
            i += 1

In [31]:
#hide
notebook = _get_notebook(file_path)
_overwrite_exported_cells(notebook, exported_cells)

In [32]:
#export
def _save_notebook(file_path: str, notebook: Dict) -> None:
    with open(file_path, "w") as f:
        return f.write(json.dumps(notebook))

In [33]:
#hide
_save_notebook("jlabdev/test.ipynb", notebook)

37277

In [34]:
#hide
os.remove("jlabdev/test.ipynb")

In [35]:
#export
def python2nb(project_root: str = ".") -> None:
    """
    Convert all notebooks in the folder.
    
    :param project_root: The root directory of the project. The default exp path is relative to this folder.
    :param nb_root: The root directory of all the notebooks. Only notebooks in this or any subfolder will be considered.
    """
    readme_template = README_TEMPLATE
    pyfiles = _get_python_files(project_root)
    index = []
    for py_path in pyfiles:
        print("Converting to notebook: {}".format(py_path))
        
        file_path, exported_cells = _get_py_cells(py_path)
        if file_path is not None:
            notebook = _get_notebook(file_path)
            _overwrite_exported_cells(notebook, exported_cells)
            _save_notebook(file_path, notebook)
            print("Updated notebook: {}".format(file_path))

In [36]:
#export
if __name__ == "__main__":
    if "--nb2py" in sys.argv:
        notebook2py()
    if "--nb2md" in sys.argv:
        notebook2md()
    if "--py2nb" in sys.argv:
        python2nb()