In [14]:
import re

from nbconvert.preprocessors import Preprocessor
from traitlets import Bool, List, Unicode


class StringsToMetaDataGroupPreprocessor(Preprocessor):
    strings = List(Unicode(), default_value=[]).tag(config=True)
    prefix = Unicode(default_value="#").tag(config=True)
    metadata_group = Unicode(default_value="tags").tag(config=True)
    remove_line = Bool(default_value=True).tag(config=True)

    def _write_pattern_as_tag(self, pattern, cell):
        m = pattern.search(cell.source)
        if m is not None:
            tag = m.group(1)
            tags = cell.setdefault("metadata", {}).setdefault(self.metadata_group, [])
            if tag not in tags:
                tags.append(tag)
            cell["metadata"][self.metadata_group] = tags
        return cell

    def _build_regex_patterns(self):
        prefix = re.escape(self.prefix)
        escaped_strings = [re.escape(s) for s in self.strings]
        patterns = [
            re.compile(
                rf"""
                ^ # match start of each line
                \s*{prefix}\s* # allow whitespace before and after prefix
                ({string}) # pattern to capture
                \s* # allow any number of whitespaces after command
                $ # match end of each line (excludes \n in MULTILINE)
                [\r\n]* # Capture current and all following empty newlines
            """,
                re.VERBOSE | re.MULTILINE,
            )
            for string in escaped_strings
        ]
        return patterns

    def preprocess_cell(self, cell, resource, index):
        if cell["cell_type"] == "markdown":
            return
        for pattern in self._build_regex_patterns():
            cell = self._write_pattern_as_tag(pattern, cell)
            if self.remove_line:
                cell.source = pattern.sub("", cell.source)
        return cell, resource

In [15]:
import nbconvert
import nbformat
import traitlets
from nbconvert.exporters.html import HTMLExporter

In [22]:
nb = nbformat.v4.new_notebook()
nb.cells.append(nbformat.v4.new_code_cell("# hide\nimport os"))
nb

{'nbformat': 4,
 'nbformat_minor': 5,
 'metadata': {},
 'cells': [{'id': '6800ebfe',
   'cell_type': 'code',
   'metadata': {},
   'execution_count': None,
   'source': '# hide\nimport os',
   'outputs': []}]}

In [26]:
nb, _ = StringsToMetaDataGroupPreprocessor(strings=("hide",), prefix="#").preprocess(
    nb, None
)
assert nb.cells[0]["source"] == "import os"
assert nb.cells[0]["metadata"]["tags"][0] == "hide"

TODO:
- Write some serious tests
    - hypothesis would be awesome!
- Add some small documentation

# Blocknote to shortcode

In [38]:
sc_name = "warning"

In [50]:
class ConvertBlockNotesToShortCodes(Preprocessor):
    short_code_names = List(Unicode(), default_value=[]).tag(config=True)

    def _pattern_to_short_code(self, pattern, cell):
        cell.source = pattern.sub(r"{{% \1 %}}\2{{% /\1 %}}", cell.source)
        return cell

    def _build_regex_patterns(self):
        escaped_sc_names = [re.escape(s) for s in self.short_code_names]
        patterns = [
            re.compile(
                rf"""
                ^\s*>\s*     # allow whitespaces before and after >
                ({sc_name})      # Short-code name
                :\s*         # : then any number of whitespace
                ([^\n]*)     # Catching group for anything but a new line character
                (?:\n|$)     # Non-catching group for either a new line or the end of the text
                """, re.VERBOSE | re.MULTILINE
            )
            for sc_name in escaped_sc_names
        ]
        return patterns

    def preprocess_cell(self, cell, resource, index):
        if cell.cell_type != "markdown":
            return
        for pattern in self._build_regex_patterns():
            cell = self._pattern_to_short_code(pattern, cell)
        return cell, resource

In [51]:
nb = nbformat.v4.new_notebook()
nb.cells.append(nbformat.v4.new_markdown_cell("> warning: This is a warning"))
nb

{'nbformat': 4,
 'nbformat_minor': 5,
 'metadata': {},
 'cells': [{'id': 'f3a17e22',
   'cell_type': 'markdown',
   'metadata': {}}]}

In [55]:
nb, _ = ConvertBlockNotesToShortCodes(short_code_names=["warning"]).preprocess(
    nb, None
)
assert nb.cells[0]["source"] == r"{{% warning %}}This is a warning{{% /warning %}}"
nb

{'nbformat': 4,
 'nbformat_minor': 5,
 'metadata': {},
 'cells': [{'id': 'f3a17e22',
   'cell_type': 'markdown',
   'metadata': {}}]}

## Tag to Collapsible Hide

In [72]:
from nbconvert.preprocessors import TagRemovePreprocessor