<a href="https://colab.research.google.com/github/WittmannF/course-v4/blob/master/utils/ExportNotebook2Script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Export
Utility for exporting cells with scripts that I want to reuse

In [0]:
# Adapted from: https://github.com/fastai/course-v3/blob/master/nbs/dl2/notebook2script.py
import json,re
from pathlib import Path
import io

def is_export(cell):
    if cell['cell_type'] != 'code': return False
    src = cell['source']
    if len(src) == 0 or len(src[0]) < 7: return False
    #import pdb; pdb.set_trace()
    return re.match(r'^\s*#\s*export\s*$', src[0], re.IGNORECASE) is not None

def getSortedFiles(allFiles, upTo=None):
    '''Returns all the notebok files sorted by name.
       allFiles = True : returns all files
                = '*_*.ipynb' : returns this pattern
       upTo = None : no upper limit
            = filter : returns all files up to 'filter' included
       The sorting optioj is important to ensure that the notebok are executed in correct order.
    '''
    import glob
    ret = []
    if (allFiles==True): ret = glob.glob('*.ipynb') # Checks both that is bool type and that is True
    if (isinstance(allFiles,str)): ret = glob.glob(allFiles)
    if 0==len(ret): 
        print('WARNING: No files found')
        return ret
    if upTo is not None: ret = [f for f in ret if str(f)<=str(upTo)]
    return sorted(ret)

def notebook2script(fname=None, allFiles=None, upTo=None):
    '''Finds cells starting with `#export` and puts them into a new module
       + allFiles: convert all files in the folder
       + upTo: convert files up to specified one included
       
       ES: 
       notebook2script --allFiles=True   # Parse all files
       notebook2script --allFiles=nb*   # Parse all files starting with nb*
       notebook2script --upTo=10   # Parse all files with (name<='10')
       notebook2script --allFiles=*_*.ipynb --upTo=10   # Parse all files with an '_' and (name<='10')
    '''
    # initial checks
    if (allFiles is None) and (upTo is not None): allFiles=True # Enable allFiles if upTo is present
    if (fname is None) and (not allFiles): print('Should provide a file name')
    if not allFiles: notebook2scriptSingle(fname)
    else:
        print('Begin...')
        [notebook2scriptSingle(f) for f in getSortedFiles(allFiles,upTo)]
        print('...End')
        
        
def notebook2scriptSingle(fname):
    "Finds cells starting with `#export` and puts them into a new module"
    fname = Path(fname)
    fname_out = f'nb_{fname.stem.split("_")[0]}.py'
    main_dic = json.load(open(fname,'r',encoding="utf-8"))
    code_cells = [c for c in main_dic['cells'] if is_export(c)]
    module = f'''
#################################################
### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
#################################################
# file to edit: dev_nb/{fname.name}
'''
    for cell in code_cells: module += ''.join(cell['source'][1:]) + '\n\n'
    # remove trailing spaces
    module = re.sub(r' +$', '', module, flags=re.MULTILINE)
    if not (fname.parent/'exp').exists(): (fname.parent/'exp').mkdir()
    output_path = fname.parent/'exp'/fname_out
    with io.open(output_path, "w", encoding="utf-8") as f:
        f.write(module[:-2])
    print(f"Converted {fname} to {output_path}")

In [3]:
!git clone https://github.com/WittmannF/course-v4.git

Cloning into 'course-v4'...
remote: Enumerating objects: 36, done.[K
remote: Counting objects: 100% (36/36), done.[K
remote: Compressing objects: 100% (33/33), done.[K
remote: Total 383 (delta 13), reused 7 (delta 3), pack-reused 347[K
Receiving objects: 100% (383/383), 47.59 MiB | 46.23 MiB/s, done.
Resolving deltas: 100% (115/115), done.


In [4]:
%cd course-v4/nbs/

/content/course-v4/nbs


In [0]:
fname='02_production_colab.ipynb'

In [0]:
fname = Path(fname)

In [0]:
main_dic = json.load(open(fname,'r',encoding="utf-8"))

In [0]:
code_cells = [c for c in main_dic['cells'] if is_export(c)]

In [0]:
module = f'''#################################################
### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
#################################################
# file to edit: dev_nb/{fname.name}
'''

In [0]:
for cell in code_cells: 
    module += ''.join(cell['source'][1:]) + '\n\n'

In [0]:
module = re.sub(r' +$', '', module, flags=re.MULTILINE)

In [15]:
print(module)

#################################################
### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
#################################################
# file to edit: dev_nb/02_production_colab.ipynb
import os
REQUIREMENTS_PIP = """pip install azure-cognitiveservices-search-imagesearch
pip install git+https://github.com/fastai/fastai2
pip install git+https://github.com/fastai/fastcore
pip install nbdev"""

    from colab_utils import *
    !{REQUIREMENTS_PIP}
    !{GIT_CLONE_REPOSITORY}
    %cd {FASTAI_NB_PATH}
"""

GIT_CLONE_REPOSITORY = 'git clone https://github.com/fastai/course-v4/'

FASTAI_NB_PATH = "course-v4/nbs/"
def install_requirements():
    print("Installing requirements...")
    os.system(REQUIREMENTS_PIP)
    print("Done!")

def clone_repository():
    print("Cloning FastAI Repository...")
    os.system(GIT_CLONE_REPOSITORY)
    print("Done!")

def open_nb_folder():
    print(f"Opening folder {FASTAI_NB_PATH} with nbs and utils files...")
    os.chdir(FASTAI_NB_PATH)
    p

In [0]:
output_path='colab_utils.py'

In [20]:
with io.open(output_path, "w", encoding="utf-8") as f:
    f.write(module[:-2])
print(f"Converted {fname} with #export tag to {output_path}")

Converted 02_production_colab.ipynb with #export tag to colab_utils.py


In [21]:
ls

01_intro_colab.ipynb       08_collab.ipynb         17_foundations.ipynb
01_intro.ipynb             09_tabular.ipynb        18_CAM.ipynb
02_production_colab.ipynb  10_nlp.ipynb            19_learner.ipynb
02_production.ipynb        11_midlevel_data.ipynb  20_conclusion.ipynb
03_ethics.ipynb            12_nlp_dive.ipynb       app_blog.ipynb
04_mnist_basics.ipynb      13_convolutions.ipynb   app_jupyter.ipynb
05_pet_breeds.ipynb        14_resnet.ipynb         colab_utils.py
06_multicat.ipynb          15_arch_details.ipynb   [0m[01;34mimages[0m/
07_sizing_and_tta.ipynb    16_accel_sgd.ipynb      utils.py
