In [1]:
import nbconvert
import traitlets
import nbformat
from nbconvert.exporters.html import HTMLExporter
from nbconvert.preprocessors import Preprocessor

In [2]:
import re

from traitlets import List, Unicode, Bool


class StringsToMetaDataGroupPreprocessor(Preprocessor):
    # Do with capturing group
    strings = List(Unicode(), default_value=[]).tag(config=True)
    prefix = Unicode(default_value="#").tag(config=True)
    comment = Unicode().tag(config=True)
    metadata_group = Unicode(default_value="tags").tag(config=True)
    remove_line = Bool(default_value=True).tag(config=True)

    def _write_pattern_as_tag(self, pattern, cell):
        m = pattern.search(cell.source)
        if m is not None:
            tag = m.group(1)
            tags = cell.setdefault("metadata", {}).setdefault(self.metadata_group, [])
            if tag not in tags:
                tags.append(tag)
            cell["metadata"][self.metadata_group] = tags
        return cell
    
    def _build_regex_patterns(self):
        prefix = re.escape(self.prefix)
        escaped_strings = [re.escape(s) for s in self.strings]
        patterns = [
            re.compile(
                rf"""
                # {self.comment}
                ^ # match start of each line
                \s*{prefix}\s* # allow whitespace before and after prefix
                ({string}) # pattern to capture
                \s* # allow any number of whitespaces after command
                $ # match end of each line (excludes \n in MULTILINE)
                [\r\n]* # Capture current and all following empty newlines
            """,
                re.VERBOSE | re.MULTILINE,
            )
            for string in escaped_strings
        ]
        return patterns

    def preprocess_cell(self, cell, resource, index):
        # maybe only check code
        for pattern in self._build_regex_patterns():
            cell = self._write_pattern_as_tag(pattern, cell)
            if self.remove_line:
                cell.source = pattern.sub("", cell.source)
        return cell, resource

In [3]:
nb = nbformat.from_dict({"cell_type": "markdown", "metadata": {}, "source": "# hide\nThis is the next line"})

In [4]:
nb = nbformat.v4.new_notebook()
nb.cells.append(nbformat.v4.new_markdown_cell("# hide\nThis is the next line"))
nb

{'nbformat': 4,
 'nbformat_minor': 5,
 'metadata': {},
 'cells': [{'id': 'd019fa34',
   'cell_type': 'markdown',
   'source': '# hide\nThis is the next line',
   'metadata': {}}]}

In [6]:
nb, _ = StringsToMetaDataGroupPreprocessor(
    strings=("hide",), prefix="#"
).preprocess(nb, None)
nb.cells[0]

{'id': 'd019fa34',
 'cell_type': 'markdown',
 'source': 'This is the next line',
 'metadata': {'tags': ['hide']}}

TODO:
- Write some serious tests
- Let it break hard
- Add some small documentation

# Blockquote to shortcode

In [None]:
def add_jekyll_notes(cell):
    "Convert block quotes to jekyll notes in `cell`"
    styles = get_config().get('jekyll_styles', 'note,warning,tip,important').split(',')
    def _inner(m):
        title,text = m.groups()
        if title.lower() not in styles: return f"> {title}:{text}"
        return '{% include '+title.lower()+".html content=\'"+_to_html(text)+"\' %}"
    if cell['cell_type'] == 'markdown':
        cell['source'] = _re_block_notes.sub(_inner, cell['source'])
    return cell

_re_block_notes.sub(_inner, cell["source"])

In [None]:
class BlockQuoteToNikolaNotesPreprocessor(Preprocessor):
    # Do with capturing group
    strings = List(Unicode(), default_value=[]).tag(config=True)
    prefix = Unicode(default_value="#").tag(config=True)
    comment = Unicode().tag(config=True)
    metadata_group = Unicode(default_value="tags").tag(config=True)
    remove_line = Bool(default_value=True).tag(config=True)

    def _write_pattern_as_tag(self, pattern, cell):
        m = pattern.search(cell.source)
        if m is not None:
            tag = m.group(1)
            tags = cell.setdefault("metadata", {}).setdefault(self.metadata_group, [])
            if tag not in tags:
                tags.append(tag)
            cell["metadata"][self.metadata_group] = tags
        return cell
    
    def _build_regex_patterns(self):
        prefix = re.escape(self.prefix)
        escaped_strings = [re.escape(s) for s in self.strings]
        patterns = [
            re.compile(
                rf"""
                # {self.comment}
                ^ # match start of each line
                \s*{prefix}\s* # allow whitespace before and after prefix
                ({string}) # pattern to capture
                \s* # allow any number of whitespaces after command
                $ # match end of each line (excludes \n in MULTILINE)
                [\r\n]* # Capture current and all following empty newlines
            """,
                re.VERBOSE | re.MULTILINE,
            )
            for string in escaped_strings
        ]
        return patterns

    def preprocess_cell(self, cell, resource, index):
        # maybe only check code
        for pattern in self._build_regex_patterns():
            cell = self._write_pattern_as_tag(pattern, cell)
            if self.remove_line:
                cell.source = pattern.sub("", cell.source)
        return cell, resource

## Tag to Collapsible Hide

In [72]:
from nbconvert.preprocessors import TagRemovePreprocessor