In [1]:
#default_exp professional_notebooks.notebook_convert

# 01. Convert Notebooks to *.py and *.md

> This tool allows conversion from an ipynb notebook into .py or .md files.

In [2]:
#export
from typing import List, Dict, Optional
import json
import os
import sys

#hide

First we will implement a helper function that allows us to find all notebooks. For that we will scan the folder provided.

In [3]:
#export
def _get_notebooks(folder: str) -> List[str]:
    nbs = []
    for root, dirs, files in os.walk(folder):
        if ".ipynb_checkpoints" in root:
            continue
        for fname in files:
            if fname.endswith(".ipynb"):
                nbs.append(os.path.join(root, fname))
    return nbs

#hide

A quick test on the source folder.

In [4]:
#hide
_get_notebooks(".")

['.\\notebook_convert.ipynb']

#hide

Now that this works. Let's load the first notebook.

In [5]:
#export
def _get_notebook(file_path: str) -> Dict:
    with open(file_path, "r") as f:
        return json.loads(f.read())

In [6]:
#hide
file_path = "notebook_convert.ipynb"
notebook = _get_notebook(file_path)
print(list(notebook.keys()))

print("cell 0")
print(notebook["cells"][0])

print("cell 1")
print(notebook["cells"][1])

print()
print("metadata")
print(notebook["metadata"])

print()
print("nbformat")
print(notebook["nbformat"])

print()
print("nbformat_minor")
print(notebook["nbformat_minor"])

['cells', 'metadata', 'nbformat', 'nbformat_minor']
cell 0
{'cell_type': 'code', 'execution_count': 1, 'metadata': {}, 'outputs': [], 'source': ['#default_exp professional_notebooks.notebook_convert']}
cell 1
{'cell_type': 'markdown', 'metadata': {}, 'source': ['# 01. Convert Notebooks to *.py and *.md\n', '\n', '> This tool allows conversion from an ipynb notebook into .py or .md files.']}

metadata
{'kernelspec': {'display_name': 'Python 3', 'language': 'python', 'name': 'python3'}, 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3}, 'file_extension': '.py', 'mimetype': 'text/x-python', 'name': 'python', 'nbconvert_exporter': 'python', 'pygments_lexer': 'ipython3', 'version': '3.7.6'}}

nbformat
4

nbformat_minor
4


#hide

## Extracting code from the notebook

With the notebook loaded it is time to extract info:

1. Get the Export Path
2. Get the Export Cells
3. Combine all the cells into python code

In [7]:
#export
def _get_default_exp(data) -> str:
    for cell in data["cells"]:
        if cell["cell_type"] == "code" and len(cell["source"]) > 0 and cell["source"][0].startswith("#default_exp "):
            return cell["source"][0].replace("#default_exp ", "")

In [8]:
#hide
_get_default_exp(notebook)

'professional_notebooks.notebook_convert'

In [9]:
#export
def _get_exported_cells(data) -> List:
    exported_cells = []
    for cell in data["cells"]:
        if cell["cell_type"] == "code" and len(cell["source"]) > 0 and cell["source"][0].startswith("#export"):
            cell["source"][0] = cell["source"][0].replace("#export", "# Cell: {}".format(len(exported_cells)))
            exported_cells.append(cell)
    return exported_cells

In [10]:
#hide
cells = _get_exported_cells(notebook)

In [11]:
#export
def _combine_cells_to_code(cells, file_path: str) -> str:
    code = "# AUTOGENERATED FROM: {}\n\n".format(file_path)
    for cell in cells:
        for line in cell["source"]:
            code += line
        # Ensure there are two empty lines between cells
        code += "\n\n\n"
        while code.endswith("\n\n\n\n"):
            code = code[:-2]
    
    # Ensure there is a single new line at file end.
    while code.endswith("\n\n"):
        code = code[:-2]
    return code

In [12]:
#hide
print(_combine_cells_to_code(cells, file_path))

# AUTOGENERATED FROM: notebook_convert.ipynb

# Cell: 0
from typing import List, Dict, Optional
import json
import os
import sys


# Cell: 1
def _get_notebooks(folder: str) -> List[str]:
    nbs = []
    for root, dirs, files in os.walk(folder):
        if ".ipynb_checkpoints" in root:
            continue
        for fname in files:
            if fname.endswith(".ipynb"):
                nbs.append(os.path.join(root, fname))
    return nbs


# Cell: 2
def _get_notebook(file_path: str) -> Dict:
    with open(file_path, "r") as f:
        return json.loads(f.read())


# Cell: 3
def _get_default_exp(data) -> str:
    for cell in data["cells"]:
        if cell["cell_type"] == "code" and len(cell["source"]) > 0 and cell["source"][0].startswith("#default_exp "):
            return cell["source"][0].replace("#default_exp ", "")


# Cell: 4
def _get_exported_cells(data) -> List:
    exported_cells = []
    for cell in data["cells"]:
        if cell["cell_type"] == "code" and len(cell["source

#hide

## Writing the code

Now the next step is to write the python file. For this we need to know the project root, the notebook and the filepath where the notebook was saved.
With that info we can create the python code and save it in the correct location relative to the project root.

In [13]:
#export
def _write_py(root: str, notebook, file_path: str) -> None:
    exported_cells = _get_exported_cells(notebook)
    py_package = _get_default_exp(notebook)
    py_path = os.path.join(root, py_package.replace(".", os.sep) + ".py")
    code = _combine_cells_to_code(cells, file_path)
    package_path = os.path.join(root, os.sep.join(py_package.split(".")[:-1]))
    os.makedirs(package_path, exist_ok=True)
    with open(py_path, "w") as f:
        f.write(code)

In [14]:
#hide
_write_py("..", notebook, file_path)

## Convert all Notebooks in a Folder to Python

One core feature of this library is to convert all notebook files located beneath the `nb_root` folder into python files relative to the `project_root`.
For this conversion the notebook is scanned for:

1. A code cell which contians `#default_exp python_package.for.this.notebook`
2. All code cells where the first line is `#export`

Then using the cells marked with `#export` a python file in `python_package/for/this/notebook.py` is created.
The generated python file should not be modified and the cell and AUTOGENERATED comments must not be deleted.

In [15]:
#export
def notebook2py(project_root: str, nb_root: str) -> None:
    """
    Convert all notebooks in the folder.
    
    :param project_root: The root directory of the project. The default exp path is relative to this folder.
    :param nb_root: The root directory of all the notebooks. Only notebooks in this or any subfolder will be considered.
    """
    notebooks = _get_notebooks(nb_root)
    for nb_path in notebooks:
        clean_nb_path = nb_path.replace(nb_root+os.sep, "")
        print("Converting to py: {}".format(clean_nb_path))
        notebook = _get_notebook(nb_path)
        _write_py(project_root, notebook, clean_nb_path)

In [16]:
notebook2py("..", ".")

Converting to py: notebook_convert.ipynb


#hide

# Markdown Conversion

This is actually more complicated. For this we will need to find all markdown cells and extract the doc from all #export cells and convert all non export cells to markdown.

In [17]:
#export
def _extract_doc(source:str) -> str:
    lines = source.split("\n")
    docs = []
    mode = 0
    for line in lines:
        if mode == 0:
            if ("def " in line or "class " in line) and not ("def _" in line or "class _" in line):
                current_doc = [line, ""]
                mode = 1
        elif mode == 1:
            if "\"\"\"" in line:
                mode = 2
            else:
                mode = 0
                docs.append(current_doc)
        elif mode == 2:
            if "\"\"\"" in line:
                mode = 0
                docs.append(current_doc)
            else:
                current_doc[1] += line.lstrip() + "\n"                
        
    if len(docs) == 0:
        return ""
    
    output = ""
    for doc in docs:
        output += "**" + doc[0] + "**\n\n"
        output += doc[1].replace(":param", "*")
        output += "\n\n"
        
    return output

In [18]:
#export
def _get_doc(data) -> str:
    doc = ""
    title = None
    for cell in data["cells"]:
        # Example Cell
        if cell["cell_type"] == "code" and len(cell["source"]) > 0 and not cell["source"][0].startswith("#export") and not cell["source"][0].startswith("#hide") and not cell["source"][0].startswith("#default_exp"):
            doc += "Example:\n"
            doc += "```python\n"
            for line in cell["source"]:
                doc += line
            doc += "\n```\n"
            doc += "Output:\n"
            doc += "```\n"
            for outp in cell["outputs"]:
                if "text" in outp:
                    for entry in outp["text"]:
                        doc += entry
                if "traceback" in outp:
                    for entry in outp["traceback"]:
                        while entry.find('\x1b') >= 0:
                            start = entry.find('\x1b')
                            end = entry.find("m", start)
                            entry = entry[:start] + entry[end+1:]
                        doc += entry + "\n"
            doc += "\n```\n"
            doc += "\n"

        # Export Cell
        if cell["cell_type"] == "code" and len(cell["source"]) > 0 and cell["source"][0].startswith("#export"):
            source = "".join(cell["source"])
            doc += _extract_doc(source)
            
        # Regular Markdown Cell
        if cell["cell_type"] == "markdown" and len(cell["source"]) > 0 and not cell["source"][0].startswith("#hide"):
            for line in cell["source"]:
                if line.startswith("# ") and title is None:
                    title = line[2:]
                doc += line
            doc += "\n\n"
    return doc, title

In [19]:
#hide
from IPython.display import Markdown as md
notebook = _get_notebook(file_path)
md(_get_doc(notebook)[0])

# 01. Convert Notebooks to *.py and *.md

> This tool allows conversion from an ipynb notebook into .py or .md files.

## Convert all Notebooks in a Folder to Python

One core feature of this library is to convert all notebook files located beneath the `nb_root` folder into python files relative to the `project_root`.
For this conversion the notebook is scanned for:

1. A code cell which contians `#default_exp python_package.for.this.notebook`
2. All code cells where the first line is `#export`

Then using the cells marked with `#export` a python file in `python_package/for/this/notebook.py` is created.
The generated python file should not be modified and the cell and AUTOGENERATED comments must not be deleted.

**def notebook2py(project_root: str, nb_root: str) -> None:**

Convert all notebooks in the folder.

* project_root: The root directory of the project. The default exp path is relative to this folder.
* nb_root: The root directory of all the notebooks. Only notebooks in this or any subfolder will be considered.


Example:
```python
notebook2py("..", ".")
```
Output:
```
Converting to py: notebook_convert.ipynb

```

## Convert all Notebooks in a Folder to Markdown

One core feature of this library is to convert all notebook files located beneath the `nb_root` folder into python files relative to the `project_root`.
For this conversion the notebook is scanned for:

1. A code cell which contians `#default_exp python_package.for.this.notebook` is searched to find the output file name.
2. All cells are scanned and based on their type an action is taken:
    * `#hide` -> Cell is ignored.
    * markdown -> The cell is 1 to 1 copied into the doc.
    * `#export` code cell -> The cell is scanned for any public function and class. They are added with their docstring to the markdown.
    * other code cell -> The cell is treated as an example and the code and the output are inserted into the markdown.


**def notebook2md(project_root: str, nb_root: str) -> None:**

Convert all notebooks in the folder.

* project_root: The root directory of the project. The default exp path is relative to this folder.
* nb_root: The root directory of all the notebooks. Only notebooks in this or any subfolder will be considered.


Example:
```python
notebook2md("..", ".")
```
Output:
```
Converting to md: notebook_convert.ipynb

```



In [20]:
#export
def _write_md(root: str, notebook) -> str:
    doc_path = os.path.join(root, "docs")
    os.makedirs(doc_path, exist_ok=True)
    
    md_name = _get_default_exp(notebook) + ".md"
    md_path = os.path.join(root, "docs", md_name)
    doc, title = _get_doc(notebook)
    with open(md_path, "w") as f:
        f.write(doc)
    return md_name, title

In [21]:
#hide
_write_md("..", _get_notebook("notebook_convert.ipynb"))

('professional_notebooks.notebook_convert.md',
 '01. Convert Notebooks to *.py and *.md\n')

## Convert all Notebooks in a Folder to Markdown

One core feature of this library is to convert all notebook files located beneath the `nb_root` folder into python files relative to the `project_root`.
For this conversion the notebook is scanned for:

1. A code cell which contians `#default_exp python_package.for.this.notebook` is searched to find the output file name.
2. All cells are scanned and based on their type an action is taken:
    * `#hide` -> Cell is ignored.
    * markdown -> The cell is 1 to 1 copied into the doc.
    * `#export` code cell -> The cell is scanned for any public function and class. They are added with their docstring to the markdown.
    * other code cell -> The cell is treated as an example and the code and the output are inserted into the markdown.


In [22]:
#export
README_TEMPLATE = """# Documentation

## Documentation

{}
"""

In [23]:
#export
def notebook2md(project_root: str, nb_root: str) -> None:
    """
    Convert all notebooks in the folder.
    
    :param project_root: The root directory of the project. The default exp path is relative to this folder.
    :param nb_root: The root directory of all the notebooks. Only notebooks in this or any subfolder will be considered.
    """
    readme_template = README_TEMPLATE
    if os.path.exists(os.path.join(nb_root, "README_TEMPLATE.md")):
        with open(os.path.join(nb_root, "README_TEMPLATE.md"), "r") as f:
            readme_template = f.read()
    else:
        print("No index.md found, using default template.")
    
    notebooks = _get_notebooks(nb_root)
    index = []
    for nb_path in notebooks:
        clean_nb_path = nb_path.replace(nb_root+os.sep, "")
        print("Converting to md: {}".format(clean_nb_path))
        notebook = _get_notebook(nb_path)
        index.append(_write_md(project_root, notebook))
    
    index = sorted(index, key=lambda x: x[1])
    
    if len(index) > 0:
        with open(os.path.join(project_root, "README.md"), "w") as f:
            toc = ""
            for i in index:
                toc += "* [{}](docs/{})\n".format(i[1], i[0])
            readme_template = readme_template.format(toc)
            f.write(readme_template)

In [24]:
notebook2md("..", ".")

Converting to md: notebook_convert.ipynb
