# This Jupyter Notebook generates this website.

I wanted to be able to generate my website from a collection of Jupyter Notebooks.
There's already tools for this.
However, I thought it would be fun if the site was generated from one of its own notebooks. 
This is that notebook.

This is still a work in progress, (missing features + needs some tidying up).

Some things this notebook does:
- Read the Jupyter Notebook (it's json data, so easy to load).
- Get the text content of markdown/code cells.
- Get the text output or image output of cells.
- Enable "cell parameters", e.g. for flagging when to hide cells, etc.
- Put it into html
- Use pygments to convert code to highlighted html
- Use markdown to convert the markdown to html
- Convert the base64 string images into images, to enable caching/smaller sizes.
- Specify the styling.

Requirements:

```
markdown=3.3.6
pygments=2.11.2
tqdm=4.62.3
```

In [25]:
import base64
import hashlib
import json
from pathlib import Path
import re

from marko import convert as markdown
from pygments import formatters, lexers, highlight
from pygments.styles import STYLE_MAP
from tqdm import tqdm

IMG_NAME_HASH_LEN = 5  # use first `IMG_NAME_HASH_LEN` of hash for filename

python_lexer = lexers.PythonLexer()
text_lexer = lexers.TextLexer()
html_formatter = formatters.HtmlFormatter(linenos=False, style="default")

In [26]:
print(STYLE_MAP.keys())

dict_keys(['default', 'emacs', 'friendly', 'friendly_grayscale', 'colorful', 'autumn', 'murphy', 'manni', 'material', 'monokai', 'perldoc', 'pastie', 'borland', 'trac', 'native', 'fruity', 'bw', 'vim', 'vs', 'tango', 'rrt', 'xcode', 'igor', 'paraiso-light', 'paraiso-dark', 'lovelace', 'algol', 'algol_nu', 'arduino', 'rainbow_dash', 'abap', 'solarized-dark', 'solarized-light', 'sas', 'stata', 'stata-light', 'stata-dark', 'inkpot', 'zenburn', 'gruvbox-dark', 'gruvbox-light', 'dracula', 'one-dark', 'lilypond'])


In [27]:
try:
    # if running as a script
    nb_dir = Path(__file__).resolve().parent
except NameError:
    # if running as a Jupyter Notebook
    nb_dir = Path().resolve()

WEBSITE_DIR = nb_dir.parent
print(f"{WEBSITE_DIR = }")

WEBSITE_DIR = PosixPath('/Users/sidneyradcliffe/repos/nbs-to-website')


### Cell parameters

Make it possible to put parameters in the Jupyter Notebook cells, that can be looked at when building the website.

In [28]:
def get_cell_flag(x: str) -> dict[str, bool | str]:
    """Get parameters in Jupyter cells, that are comments in the form:

    # {{my_flag}}
    # {{my_key=my_value}}
    x = 4
    # {{foo=bar}}

    """
    params = re.findall(r"^# \{\{.+\}\}", x, flags=re.MULTILINE)
    params = (x[4:-2].split("=") for x in params)  # remove '{{'/'}}' from the strings
    result = {}
    for x in params:
        if len(x) == 1:
            result[x[0]] = True
        elif len(x) == 2:
            result[x[0]] = x[1]
        else:
            raise RuntimeError(f"Invalid param length, '{len(x) = }', {x = }")
    return result


example = """# {{my_flag}}
# {{my_key=my_value}}
This is a simulation of a ipynb cell.
# {{foo=bar}}
"""
print(get_cell_flag(example))

{'my_flag': True, 'my_key': 'my_value', 'foo': 'bar'}


### Parsing the notebook

In [29]:
def nb_to_html(nb_path: str, post_dir: Path) -> str:
    nb_data = json.loads(Path(nb_path).read_text())
    cells_as_html = (cell_to_html(cell, post_dir) for cell in nb_data["cells"])
    html = "<br>".join(cells_as_html)
    return f'<div class="notebook">{html}</div>'


def cell_to_html(cell: dict, post_dir: Path) -> list[list[str]]:
    result = []
    content = "".join(cell.get("source", "")).strip()
    if not content:
        return ""
    if cell["cell_type"] == "markdown":
        html = f'<div class="markdown">{markdown(content)}</div>'
        result.append(html)
    elif cell["cell_type"] == "code":
        html = (
            '<div class="code">'
            + highlight(content, python_lexer, html_formatter)
            + "</div>"
        )
        result.append(html)
        if outputs := "".join(cell_outputs_to_html(cell["outputs"], post_dir)).strip():
            result.append(f'<div class="outputs">{outputs}</div>')

    cell_flag = get_cell_flag(content)
    return "".join(result)


def cell_outputs_to_html(cell_outputs: list[dict], post_dir: Path) -> list[str]:
    result = []
    for cell_output in cell_outputs:
        match cell_output["output_type"]:
            case "stream":
                if text := "".join(x for x in cell_output["text"] if x.strip()).strip():
                    content = highlight(text, text_lexer, html_formatter)
                    html = f'<div class="outtext">{content}</div>'
                    result.append(html)
            case "display_data":
                if imdata := cell_output["data"]["image/png"].strip():
                    imdata = imdata.encode("utf-8")
                    hash = hashlib.sha256(imdata).hexdigest()
                    fname = f"{hash[:IMG_NAME_HASH_LEN]}.png"
                    (post_dir / fname).write_bytes(base64.decodebytes(imdata))
                    html = f'<img src="./{fname}" class="out_img"/>'
                    result.append(html)
            case _:
                pass
    return result

In [30]:
mathjax = """
<script type="text/javascript" async
  src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML">
</script>

<!-- MathJax configuration -->
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
    tex2jax: {
        inlineMath: [ ['$','$'], ],
        displayMath: [ ['$$','$$'], ],
        processEscapes: true,
        processEnvironments: true
    },
    // Center justify equations in code and markdown cells. Elsewhere
    // we use CSS to left justify single line equations in code cells.
    displayAlign: 'center',
    "HTML-CSS": {
        styles: {'.MathJax_Display': {"margin": 0}},
        linebreaks: { automatic: true }
    }
});
</script>
"""

style = (
    html_formatter.get_style_defs(".highlight")
    + """
.code, .markdown, .outputs {
  margin: auto;
  max-width: 600px;
  padding: 4px 4px 4px 8px;
  box-sizing: border-box;
}
.code, .outputs {
  max-width: 720px;
  overflow: auto;
}
.code {
  background-color: #F8F8F8;
  /*box-shadow: inset 3px 3px 4px 3px #F0F0F0;*/
}
.outputs {
  /*
  box-shadow: inset 0px 0px 1px 4px #F8F8F8;
  background-color: lightgray;
  background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAAAAABX3VL4AAAADklEQVQI12P4/5/hPwMACvsC/tmfKMUAAAAASUVORK5CYII=);
  */
  box-shadow: inset 4px -4px 3px #F0F0F0;
  border: 6px solid #F8F8F8;
}
.notebook {
  font-family: sans-serif;
}
.code *, .outputs *, code {
  font-family: monospace;
  background-color: rgba(0,0,0,0);
}
"""
)

### Site directory structure:

```
generated_site/
    index.html
    stylesheet.css
    script.js
    posts/
        notebook_1/
            index.html
            image_1.jpg
            image_2.jpg
        notebook_2/
            index.html
            image_1.jpg
```

To see it:

```bash
cd generated_site 
python -m http.server
```

In [31]:
def mkdir(path: Path) -> Path:
    path.mkdir(exist_ok=True)
    return path


root = WEBSITE_DIR
posts = mkdir(root / "posts/")

(root / "stylesheet.css").write_text(style)

(root / "script.js").write_text(
    """
console.log("hi from my script")
"""
)

34

In [32]:
links = []
for notebook in tqdm(list(nb_dir.glob("*.ipynb"))):
    post_dir = mkdir(posts / notebook.stem)
    (post_dir / "index.html").write_text(
        f"""
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Test page</title>
    <link rel="stylesheet" href="../../stylesheet.css">
    <script src="../../script.js"></script>
    {mathjax}
  </head>
  <body>
    {nb_to_html(notebook, post_dir)}
  </body>
</html>
    """
    )
    links.append(str(post_dir.relative_to(root)))

links: str = "\n".join(f'<br/><a href="{x}">{x}</a>' for x in links)

(root / "index.html").write_text(
    f"""
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>sid's site</title>
    <link rel="stylesheet" href="./stylesheet.css">
    <script src="./script.js"></script>
  </head>
  <body>
    <h1>Home page</h1>
    {links}
  </body>
</html>
"""
)

100%|██████████| 2/2 [00:00<00:00, 92.89it/s]


437