# Notebook handeling

In [3]:
#| default_exp notebook

## Notebook object

The underlying file structure of a notebook is JSON, which directly matches python dictionaries.  
As such, it is treated by `nbformat` as spcieal kind of `dict`.  

In [4]:
#| export

from nbformat import NotebookNode
from nbformat import read as read_nb, write as write_nb, validate as validate_nb, from_dict
import jq

Due to that, all of `jq`s' power is available to us: 

In [9]:
#| export
from typing import Iterable
from enum import Enum

DEFAULT_NB_METADATA = '{"kernelspec": {"language": "python"}}'

# TODO: about filter, how about:
# nb.cells = jq.compile(filter).input(nb.cells).all()?
class Notebook:
    def __init__(self, path):
        self.path = path
        self.nb = read_nb(path, as_version=4)

    def apply_jq_pattern(self, pattern):
        return from_dict(jq.compile(pattern).input(self.nb).first())

    def clean_execution_counts(self):
        self.nb = self.apply_jq_pattern('.cells[].execution_count = null')
        
    def clean_outputs(self):
        self.nb = self.apply_jq_pattern('.cells[].outputs = []')
    
    def clean_all_metadata(self):
        self.nb = self.apply_jq_pattern('.cells[].metadata = {}')
        self.nb = self.apply_jq_pattern(f'.metadata = {DEFAULT_NB_METADATA}')
    
    def clean_all(self):
        self.clean_execution_counts()
        self.clean_outputs()
        self.clean_all_metadata()
        
    # TODO: circular dependency - notebooks.clean needs clean operations and vice versa
    def clean(self, clean_ops: Iterable[Enum]):        
        for op in clean_ops:
            getattr(self, op.value)()
        return self
        
    def save(self, path=None):
        path = path if path else self.path
        write_nb(self.nb, path)
        

In [10]:
#| export

class CleanOperations(Enum):
    METADATA = Notebook.clean_all_metadata.__name__
    OUTPUTS = Notebook.clean_outputs.__name__
    EXECUTION_COUNT = Notebook.clean_execution_counts.__name__
    ALL = Notebook.clean_all.__name__ 


## Tests

### Base Clean Notebook

In [18]:
#| hide

from pathlib import Path
test_data_dir = Path('../tests/data')

# tests the "clean" nb is indeed clean
cln_nb = read_nb(test_data_dir/'clean.ipynb', as_version=4)

#### Cleaning a clean notebook produces same result

In [19]:
#| hide

# TODO: enable clean to recieve __all__ clean operations - maybe make an __all__ flag
assert Notebook(test_data_dir/'clean.ipynb').clean([CleanOperations.ALL]).nb == cln_nb

### Clean All Attributes

In [21]:
from unittest import TestCase
tc = TestCase()

cln = Notebook(test_data_dir/'dirty.ipynb').clean([CleanOperations.ALL]).nb

tc.assertDictEqual(cln, cln_nb)

### Clean Outputs

In [22]:
no_out = Notebook(test_data_dir/'dirty.ipynb').clean([CleanOperations.OUTPUTS]).nb
tc.assertDictEqual(no_out, read_nb(test_data_dir/'no_outputs.ipynb', as_version=4))

### Clean Execution Counts

In [23]:
no_ex_cnt = Notebook(test_data_dir/'dirty.ipynb').clean([CleanOperations.EXECUTION_COUNT]).nb
tc.assertDictEqual(no_ex_cnt, read_nb(test_data_dir/'no_execution_counts.ipynb', as_version=4))

### Clean Metadata

In [25]:
no_meta = Notebook(test_data_dir/'dirty.ipynb').clean([CleanOperations.METADATA]).nb
tc.assertDictEqual(no_meta, read_nb(test_data_dir/'no_metadata.ipynb', as_version=4))

### Reseting Empty Metadata

In [26]:
reset_nb_metadata = Notebook(test_data_dir/'clean_empty_nb_metadata.ipynb').clean([CleanOperations.METADATA]).nb
tc.assertDictEqual(reset_nb_metadata, cln_nb)

## export

In [29]:
from nbdev.export import nb_export

nb_export('nb_definition.ipynb')