# Parsing Jupyter Notebooks

In [1]:
import nbformat
import io

# Reading Notebooks

In [14]:
def read_nb(nb):
    with io.open(nb, 'r', encoding='utf-8') as f:
        nb = nbformat.read(f, nbformat.NO_CONVERT)
    return nb

In [15]:
#source_nb = 'atoms/visualization/choropleth_classification.ipynb'
source_nb = 'atoms/foundations/Functions.ipynb'

inb = read_nb(source_nb)

In [16]:
type(inb)

nbformat.notebooknode.NotebookNode

In [17]:
inb.keys()

[u'nbformat_minor', u'cells', u'nbformat', u'metadata']

In [18]:
inb.metadata

{u'anaconda-cloud': {},
 u'kernelspec': {u'display_name': u'Python [Root]',
  u'language': u'python',
  u'name': u'Python [Root]'},
 u'language_info': {u'codemirror_mode': {u'name': u'ipython', u'version': 2},
  u'file_extension': u'.py',
  u'mimetype': u'text/x-python',
  u'name': u'python',
  u'nbconvert_exporter': u'python',
  u'pygments_lexer': u'ipython2',
  u'version': u'2.7.12'}}

In [19]:
cells = inb['cells']

In [20]:
type(cells)

list

In [21]:
len(cells)

45

In [10]:
type(cells)

list

In [12]:
for c in xrange(0, 10):
    print(cells[c]['cell_type'])

markdown
markdown
code
markdown
code
code
code
code
code
markdown


## Let's replace the cells in the in-memory notebook to create a new one

In [11]:
# extract only every other cell
new_cells = [ c for i,c in enumerate(cells) if i%2]

In [12]:
len(new_cells)

49

In [13]:
inb['cells'] = new_cells

In [14]:
with io.open('smaller.ipynb', 'w', encoding='utf8') as f:
    nbformat.write(inb, f, nbformat.NO_CONVERT)

In [15]:
snb = read_nb('smaller.ipynb')

In [16]:
len(snb.cells)

49

## Notebook Cells

In [17]:
c0 = snb.cells[0]

In [18]:
type(c0)

nbformat.notebooknode.NotebookNode

In [19]:
c0.keys()

dict_keys(['cell_type', 'metadata', 'source'])

In [20]:
c0['cell_type']

'markdown'

In [21]:
c0['source']

'## Introduction\n\n* what is classification\n* role in choropleth mapping\n* explore classification using PySAL'

In [22]:
c0['metadata']

{'collapsed': True}

## Types of cells in  this notebook

In [23]:
from collections import defaultdict
def get_structure(cells):
    cell_types = defaultdict(list)
    for i, cell in enumerate(cells):
        cell_types[cell.cell_type].append(i)
    return cell_types
            

In [24]:
cell_types = get_structure(snb.cells)

In [25]:
cell_types.keys()

dict_keys(['code', 'markdown'])

In [26]:
for ct, cells in cell_types.items():
    print('Cell Type: %s\t %d cells'% (ct, len(cells)))

Cell Type: code	 38 cells
Cell Type: markdown	 11 cells


In [27]:
code_cell_idx = cell_types['code'][0]
code_cell_idx

2

In [28]:
snb.cells[code_cell_idx]

{'cell_type': 'code',
 'execution_count': 3,
 'metadata': {'collapsed': False},
 'outputs': [{'data': {'text/plain': '{\'description\': \'Mexican states regional income 1940-2000\',\n \'explanation\': [\'Data used in   Rey, S.J. and M.L.  Sastre Gutierrez. (2010) "Interregional inequality\',\n  \'dynamics in Mexico." Spatial Economic Analysis, 5: 277-298\',\n  \'* mexico.csv: attribute data\',\n  \'* mexico.gal: spatial weights in GAL format\',\n  \'Polygon data, n=32, k=13\'],\n \'name\': \'mexico\'}'},
   'execution_count': 3,
   'metadata': {},
   'output_type': 'execute_result'}],
 'source': "ps.examples.explain('mexico')"}

In [29]:
mkd_cell_idx = cell_types['markdown'][0]
mkd_cell_idx

0

In [30]:
snb.cells[mkd_cell_idx]

{'cell_type': 'markdown',
 'metadata': {'collapsed': True},
 'source': '## Introduction\n\n* what is classification\n* role in choropleth mapping\n* explore classification using PySAL'}

## Turning Output Cells OFF

In [31]:
def remove_outputs(nb):
    """Set output attribute of all code cells to be empty"""
    for cell in nb.cells:
        if cell.cell_type == 'code':
            cell.outputs = []

def clear_notebook(old_ipynb, new_ipynb):
    with io.open(old_ipynb, 'r') as f:
        nb = nbformat.read(f, nbformat.NO_CONVERT)

    remove_outputs(nb)
    
    with io.open(new_ipynb, 'w', encoding='utf8') as f:
        nbformat.write(nb, f, nbformat.NO_CONVERT)

source_nb = 'atoms/visualization/choropleth_classification.ipynb'

new_nb = 'nout.ipynb'
clear_notebook(source_nb, new_nb)