# Clean notebooks

Outputs are removed from notebooks to make a cleaner notebook available for git diff.

In [1]:
# Tag that points to cells where all outputs are removed
# Set this string as [cell tag] in target notebooks

TAG_CONDITION = "remove_when_contains:image"

tag_dict = {
    'remove_all_outputs': "nbconvert_instruction:remove_all_outputs",
    'remove_single_output': "nbconvert_instruction:remove_single_output", 
    'remove_cell': "nbconvert_instruction:remove_full_cell", 
}

In [2]:
from traitlets.config import Config
import nbformat as nbf
from nbconvert.exporters import NotebookExporter
from nbconvert.preprocessors import TagRemovePreprocessor
import os



def clean_nb(fn, tags = tag_dict, VERBOSE = True):
    nb_in = fn.replace('.ipynb', '.smudge.ipynb')
    nb_out = nb_in.replace('smudge.ipynb', 'clean.ipynb')
    nb_bup = fn.replace('.ipynb', '.bup.ipynb')
    
    if VERBOSE:
        print('-- File names')
        print('input:\t\t', fn)
        print('backup:\t\t',nb_bup)
        print('to be cleaned:\t', nb_in)
        print('cleaned:\t',nb_out)
        
    # Back-up original
    if VERBOSE:
        print(f'backup:\n\t{fn} ->\n\t{nb_bup}')
    !cp -p {fn} {nb_bup}
    
    # Setup config
    c = Config()

    # Configure tag removal - be sure to tag your cells to remove  using the
    # words remove_cell to remove cells. You can also modify the code to use
    # a different tag word
    c.TagRemovePreprocessor.remove_all_outputs_tags = (tags['remove_all_outputs'],)
    c.TagRemovePreprocessor.remove_single_output_tags = (tags['remove_single_output'],)
    c.TagRemovePreprocessor.remove_cell_tags = (tags['remove_cell'],)
    c.TagRemovePreprocessor.enabled = True
    #c.TemplateExporter.exclude_markdown = True 
    #c.TemplateExporter.exclude_code_cell = True 
    #c.TemplateExporter.exclude_raw = True 
    #c.TemplateExporter.exclude_unknown = True 
    #c.TemplateExporter.exclude_foo = True 

    if VERBOSE:
        print('-- Config')
        display(c)
    
    # Prepare input
    if VERBOSE:
        print(f'smudge:\n\t{fn} ->\n\t{nb_in}')
    !cp -p {fn} {nb_in}

    # Process
    if VERBOSE:
        print(f'clean (process):\n\t{nb_in} ->\n\t{nb_out}')
    out = NotebookExporter(config=c).from_filename(nb_in)
    with open(nb_out,  "w") as f:
        f.write(out[0])
    
    # Overwrite original
    if VERBOSE:
        print(f'save:\n\t{nb_out} ->\n\t{fn}')
    !cp -p {nb_out} {fn}

In [3]:
# Notebooks of phase 1: collect data from auction

path = "."
for short_fn in [
    'scrape-drz-auction-results.ipynb',
    'add-rdw-info-to-drz.ipynb',
    'download-images.ipynb',
    'explore-auction-results.ipynb'
]:
    fn = os.path.join(path, short_fn)
    print(fn)
    clean_nb(fn, VERBOSE=False)

./scrape-drz-auction-results.ipynb
./add-rdw-info-to-drz.ipynb
./download-images.ipynb
./explore-auction-results.ipynb
