# This Jupyter Notebook generates this website.

I wanted to be able to generate my website from a collection of Jupyter Notebooks.
There's already tools for this.
However, I thought it would be fun if the site was generated from one of its own notebooks. 
This is that notebook.

This is still a work in progress, (missing features + needs some tidying up).

Some things this notebook does:
- Read the Jupyter Notebook (it's json data, so easy to load).
- Get the text content of markdown/code cells.
- Get the text output or image output of cells.
- Enable "cell parameters", e.g. for flagging when to hide cells, etc.
- Put it into html
- Use pygments to convert code to highlighted html
- Use markdown to convert the markdown to html
- Convert the base64 string images into images, to enable caching/smaller sizes.
- Specify the styling.

In [12]:
import json
from pathlib import Path
import re

from markdown import markdown
import pygments
from pygments import highlight
from tqdm import tqdm

python_lexer = pygments.lexers.PythonLexer()
text_lexer = pygments.lexers.TextLexer()
html_formatter = pygments.formatters.HtmlFormatter(linenos=False)

In [13]:
try:
    nb_dir = Path(__file__).resolve()  # if running as a script
except NameError:
    nb_dir = Path().resolve()  # if running as a Jupyter Notebook
print(nb_dir.resolve())

WEBSITE_DIR = nb_dir.parent  # generated content goes here

/Users/sidneyradcliffe/repos/nbs-to-website/_notebooks


### Cell parameters

Make it possible to put parameters in the Jupyter Notebook cells, that can be looked at when building the website.

In [14]:
def get_cell_flag(x: str) -> dict[str, bool | str]:
    """Get parameters in Jupyter cells, that are comments in the form:

    #{{my_flag}}
    #{{my_key=my_value}}
    x = 4
    #{{foo=bar}}

    """
    params = re.findall(r"^#\{\{.+\}\}", x, flags=re.MULTILINE)
    params = (x[3:-2].split("=") for x in params)  # remove '{{'/'}}' from the strings
    result = {}
    for x in params:
        if len(x) == 1:
            result[x[0]] = True
        elif len(x) == 2:
            result[x[0]] = x[1]
        else:
            raise RuntimeError(f"Invalid param length, '{len(x) = }', {x = }")
    return result


# An example:
get_cell_flag(
    """#{{my_flag}}
#{{my_key=my_value}}
This is a simulation of a ipynb cell.
#{{foo=bar}}
"""
)

{'my_flag': True, 'my_key': 'my_value', 'foo': 'bar'}

### Parsing the notebook

In [15]:

def nb_to_html(nb_path: str) -> str:
    nb_data = json.loads(Path(nb_path).read_text())
    cells_as_html = (cell_to_html(cell) for cell in nb_data["cells"])
    html = "".join(cells_as_html)
    return f'<div class="notebook">{html}</div>'


def cell_to_html(cell: dict) -> list[list[str]]:
    result = []
    content = "".join(cell.get("source", "")).strip()
    if not content:
        return ""
    if cell["cell_type"] == "markdown":
        html = f'<div class="markdown">{markdown(content)}</div>'
        result.append(html)
    elif cell["cell_type"] == "code":
        html = (
            '<div class="code">'
            + highlight(content, python_lexer, html_formatter)
            + "</div>"
        )
        result.append(html)
        if outputs := "".join(cell_outputs_to_html(cell["outputs"])).strip():
            result.append(f'<div class="outputs">{outputs}</div>')

    cell_flag = get_cell_flag(content)
    result = f'<div class="block">{"".join(result)}</div>'
    return result


def cell_outputs_to_html(cell_outputs: list[dict]) -> list[str]:
    result = []
    for cell_output in cell_outputs:
        match cell_output["output_type"]:
            case "stream":
                if text := "".join(cell_output["text"]).strip():
                    content = highlight(text, text_lexer, html_formatter)
                    html = f'<div class="outtext">{content}</div>'
                    result.append(html)
            case "display_data":
                if imdata := cell_output["data"]["image/png"].strip():
                    img = "data:image/png;base64," + imdata
                    html = f'<img src="{img}" class="out_img"/>'
                    result.append(html)
            case _:
                pass
    return result


In [16]:

mathjax = '''
<script type="text/javascript" async
  src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML">
</script>

<!-- MathJax configuration -->
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
    tex2jax: {
        inlineMath: [ ['$','$'], ],
        displayMath: [ ['$$','$$'], ],
        processEscapes: true,
        processEnvironments: true
    },
    // Center justify equations in code and markdown cells. Elsewhere
    // we use CSS to left justify single line equations in code cells.
    displayAlign: 'center',
    "HTML-CSS": {
        styles: {'.MathJax_Display': {"margin": 0}},
        linebreaks: { automatic: true }
    }
});
</script>
'''

style = html_formatter.get_style_defs('.highlight') + """
.notebook {
  margin: auto;
  max-width: 666px;
  /*border: 1px solid green;*/
  padding: 4px;
}

.markdown {
    font-family: 'Open Sans', sans-serif;
}

.code.outputs * {
  font-family: 'Overpass Mono';
  font-style: normal;
}

.code * {
    /*padding: 0px 8px 0px 8px;*/
    background-color: #f8f8f8;
    font-family: 'Overpass Mono', monospace;
    font-size: 15px;
}

.outputs {
    padding: 0px 16px 0px 16px;
}

.outputs * {
    font-family: 'Overpass Mono', monospace;
    font-size: 15px;
}

/* override pygments */
.highlight {
    background-color: rgba(0,0,0,0);
}

pre {
    font-family: 'Overpass Mono', monospace;
    overflow: auto;
}

.highlight pre code * {
    white-space: nowrap;    // this sets all children inside to nowrap
}

.highlight pre {
    overflow-x: auto;       // this sets the scrolling in x
}

.highlight pre code {
    white-space: pre;       // forces <code> to respect <pre> formatting
}

/*
^- thanks:
https://david-kerwick.github.io/2012-05-03-get-prettify-to-behave-in-firefox/
*/
"""

### Site directory structure:

```
generated_site/
    index.html
    stylesheet.css
    script.js
    posts/
        notebook_1/
            index.html
            image_1.jpg
            image_2.jpg
        notebook_2/
            index.html
            image_1.jpg
```

To see it:

```bash
cd generated_site 
python -m http.server
```

In [17]:
def mkdir(path: Path) -> Path:
    path.mkdir(exist_ok=True) 
    return path

root = WEBSITE_DIR
posts = mkdir(root / "posts/")

(root / "stylesheet.css").write_text(style)

(root / "script.js").write_text("""
console.log("hi from my script")
""")

210

In [18]:
links = []
for notebook in tqdm(list(nb_dir.glob("*.ipynb"))):
    post_dir = mkdir(posts / notebook.stem)
    (post_dir / "index.html").write_text(f"""
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Test page</title>
    <link rel="stylesheet" href="../../stylesheet.css">
    <script src="../../script.js"></script>
    {mathjax}
  </head>
  <body>
    {nb_to_html(notebook)}
  </body>
</html>
    """)
    links.append(str(post_dir.relative_to(root))) 

links: str = '\n'.join(f'<br/><a href="{x}">{x}</a>' for x in links)

(root / "index.html").write_text(f"""
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>sid's site</title>
    <link rel="stylesheet" href="./stylesheet.css">
    <script src="./script.js"></script>
  </head>
  <body>
    <h1>Home page</h1>
    <a href="/posts/test">Test page</a>
    {links}
  </body>
</html>
""")


100%|██████████| 2/2 [00:00<00:00, 140.40it/s]


402