In [1]:
#| export parse

# Exporting notes to parsed HTML
> Taking a -> b

How it should work:

1. Parse notebook forwards
2. Find 1st and 2nd code cells
3. Check if `#|explain` is in between them
4. Gather all `#|explain`
5. for each `#|explain` extract the starting and ending lines of code from the above code cell, and create a dictionary of code -> explaination
6. Create a tabset cell based on the explaination that copies the code and highlights lines of it with explainations below -- mimic what currently exists. Maybe throw them in as captions under the code?

::: {.panel-tabset}

## Code

In [None]:
def log_softmax(x):
    return (x.exp() / 
            (x.exp().sum(-1, keepdim=True))
           ).log()

## Code + Explaination

In [1]:
def log_softmax(x):
    return (x.exp() / 
            (x.exp().sum(-1, keepdim=True))
           ).log()

```{.python}
log_softmax
```

Log softmax is simply taking the exponential of x, dividing it by the sum of all the exponentials, and then taking the log of that result

```{.python}
.log()
```

We take the log because negative log likelihood expects a log, not a negative

:::

In [1]:
#| export
from execnb.nbio import *
from nbdev.process import NBProcessor
from nbdev.processors import Processor, mk_cell
from fastcore.all import *
import shlex

In [2]:
nb = read_nb("99_poc.ipynb")

In [3]:
#| export
def make_panel_tabset():
    cells = [
        mk_cell("::: {.panel-tabset}\n\n## Code", cell_type="markdown"),
        # Original goes here
        mk_cell("## Code + Explaination", cell_type="markdown"),
        # All explainations go here
        mk_cell(":::", cell_type="markdown")
    ]
    return cells    

In [4]:
#| export
def convert_explanation(explanation_cell, source):
    _py, newline = "{.python}", "\n"
    content = f"```{_py}{newline}{source}{newline}```"
    content += f"{newline}{explanation_cell.source}"
    return mk_cell(content, cell_type="markdown")

In [110]:
#| export
def extract_code(start_code, end_code, source, instance_num, end_instance_num=0):
    "Finds code between start and finish potentially with instances to check"
    start_match = list(re.finditer(f'[ \t]*{start_code}', source))[int(instance_num)]
    start_char = start_match.span()[0]
    end_match = list(re.finditer(f'[ \t]*{end_code}', source))[int(end_instance_num)]
    end_char = end_match.span()[1]
    return source[start_char:end_char]

In [111]:
#| export
def parse_code(code_cell, markdown_cell):
    "Parses directives to extract the code needed to be highlighted"
    directives = markdown_cell.directives_["explain"]
    directives = shlex.split(" ".join(directives))
    multiline = "multiline" in directives
    if multiline:
        directives = directives[1:]
        if len(directives) == 4:
            start_code, start_instance_num, end_code, end_instance_num = directives
        else:
            (start_code, start_instance_num, end_code), (end_instance_num) = directives, 0
        start_code, end_code = re.escape(start_code), re.escape(end_code)
        return extract_code(start_code, end_code, code_cell.source, start_instance_num, end_instance_num)
    else:
        return directives[0]

In [112]:
#| export
class NoteExportProc(Processor):
    "A proc that checks and reorganizes cells for documentation for proper explainations"
    def begin(self):
        self.explanations = []
        self._code = None
        self.end_link = False
        self.results = make_panel_tabset()
        self._idx = None
    
    def cell(self, cell):
        if cell.cell_type == "code":
            if self._idx is None:
                self._code = cell
                self.end_link = False
                self._idx = cell.idx_
            else:
                self.end_link = True
        elif cell.cell_type == "markdown":
            if self._idx is not None:
                self.explanations.append(cell)       
        
        if self.end_link:
            # Assume we have all code + explainations
            _idx = 1
            self.results.insert(_idx, self._code)
            _idx += 2
            self.nb.cells.remove(self._code)
            self.results.insert(_idx, self._code)
            _idx += 1
            for explanation in self.explanations:
                source = parse_code(self._code, explanation)
                self.results.insert(_idx, convert_explanation(explanation, source))
                _idx += 1
                self.nb.cells.remove(explanation)
            self.nb.cells = self.nb.cells[:self._idx] + self.results + self.nb.cells[self._idx:]
            self._idx = None
            self.end_link = False

In [None]:
#|export
def parse_notes():