# Notebook handeling

In [18]:
#| default_exp notebook

## Notebook object

The underlying file structure of a notebook is JSON, which directly matches python dictionaries.  
As such, it is treated by `nbformat` as spcieal kind of `dict`.  

In [25]:
#| export

from nbformat import NotebookNode
from nbformat import read as read_nb, write as write_nb, validate as validate_nb, from_dict
import jq

Due to that, all of `jq`s' power is available to us: 

In [92]:
#| export
from typing import Iterable
from enum import Enum

# TODO: about filter, how about:
# nb.cells = jq.compile(filter).input(nb.cells).all()?
class Notebook:
    def __init__(self, path):
        self.path = path
        self.nb = read_nb(path, as_version=4)

    def apply_jq_pattern(self, pattern):
        return from_dict(jq.compile(pattern).input(self.nb).first())

    def clean_execution_counts(self):
        self.nb = self.apply_jq_pattern('.cells[].execution_count = null')
        
    def clean_outputs(self):
        self.nb = self.apply_jq_pattern('.cells[].outputs = []')
    
    def clean_all_metadata(self):
        self.nb = self.apply_jq_pattern('.cells[].metadata = {}')
        self.nb = self.apply_jq_pattern('.metadata = {}')
        
    # TODO: circular dependency - notebooks.clean needs clean operations and vice versa
    def clean(self, clean_ops: Iterable[Enum]):        
        for op in clean_ops:
            getattr(self, op.value)()
        return self
        
    def save(self, path=None):
        path = path if path else self.path
        write_nb(self.nb, path)
        

In [93]:
#| export

class CleanOperations(Enum):
    METADATA = Notebook.clean_all_metadata.__name__
    OUTPUTS = Notebook.clean_outputs.__name__
    EXECUTION_COUNT = Notebook.clean_execution_counts.__name__


### Tests

#### Base Clean Notebook

In [94]:
#| hide

# tests the "clean" nb is indeed clean
nb = read_nb('../tests/data/clean.ipynb', as_version=4)
validate_nb(nb)

# contains a singel cell 
assert nb.cells == [{'cell_type': 'code',
   'execution_count': None,
   'metadata': {},
   'outputs': [],
   'source': ''}]

#### Cleaning a clean notebook produces same result

In [95]:
#| hide
assert nb == Notebook('../tests/data/clean.ipynb').clean(*[op for op in CleanOperations]).nb

clean ops:  (<CleanOperations.METADATA: 'clean_all_metadata'>, <CleanOperations.OUTPUTS: 'clean_outputs'>, <CleanOperations.EXECUTION_COUNT: 'clean_execution_counts'>)


## export

In [98]:
from nbdev.export import nb_export

nb_export('nb_definition.ipynb')