# Extracting from Jupyter Notebooks

In [1]:
import nbformat
import io
xrange = range

In [2]:
import md2py

# Reading Notebooks

In [3]:
def read_nb(nb):
    with io.open(nb, 'r', encoding='utf8') as f:
        nb = nbformat.read(f, nbformat.NO_CONVERT)
    return nb

def write_nb(nb, fn):
    if not fn.endswith('.ipynb'):
        fn += '.ipynb'
    with io.open(fn, 'w', encoding='utf8') as f:
        nbformat.write(nb, f, nbformat.NO_CONVERT)

def dump_nb(nb, cells=5, lines=5):
    for c in xrange(0, cells):
        print("====== " + nb.cells[c]['cell_type'] + " ======")
        src = nb.cells[c]['source'].splitlines()
        if len(src) > lines:
            print('\n'.join(src[0:lines]))
            print("...")
        else:
            print(nb.cells[c]['source'])

In [4]:
source_nb = 'atoms/foundations/Dictionaries.ipynb'

inb = read_nb(source_nb)

In [5]:
type(inb)

nbformat.notebooknode.NotebookNode

In [6]:
inb.keys()

dict_keys(['nbformat', 'cells', 'nbformat_minor', 'metadata'])

In [7]:
inb.metadata

{'anaconda-cloud': {},
 'kernelspec': {'display_name': 'Python [conda root]',
  'language': 'python',
  'name': 'conda-root-py'},
 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
  'file_extension': '.py',
  'mimetype': 'text/x-python',
  'name': 'python',
  'nbconvert_exporter': 'python',
  'pygments_lexer': 'ipython3',
  'version': '3.5.2'}}

In [8]:
cells = inb['cells']

In [9]:
type(cells)

list

In [10]:
len(cells)

41

In [11]:
type(cells)

list

In [12]:
dump_nb(inb)

# Notebook-6: Dictionaries
### Lesson Content 

Welcome back to the fifth Code Camp notebook! In this lesson we'll contiune our exploration of more advanced data structures. Last time we took a peek at a way to represent ordered collections of items via **lists**.

This time we'll use **dictionaries** to create collections of unordered items (this is just an easy distinction - there's much more to it - but it's a good way to start wrapping your head around the subject).
...
# Dictionaries
----

Dictionaries are another kind of data structure that is frequently used in Python. Like lists, the dictionary is also found in other programming languages, often under a different name. For instance, Python dictionaries might be referred to elsewhere as "maps", "hashes", or "associative arrays").

...
myDict = {
    "key1": "Value 1",
    "key2": "2nd Value",
    3: "3rd Value",
    "Fourth Key": [4.0, 'Jon']
...
Did you notice that when we printed out `myDict` it didn't print out the elements o

In [38]:
import re
rh1 = re.compile('(?<!#)# ')
rh2 = re.compile('(?<!#)## ')
rh3 = re.compile('(?<!#)### ')
rh4 = re.compile('(?<!#)#### ')

rhs = rh1, rh2, rh3, rh4

from collections import defaultdict

class NoteBook(object):
    def __init__(self, ipynb):
        self.nb = read_nb(ipynb)
        self.structure = self.get_structure()
    
    def get_structure(self):
        cell_types = defaultdict(list)
        for i, cell in enumerate(self.nb.cells):
            cell_types[cell.cell_type].append(i)
        return cell_types
        
    def get_cells_by_type(self, cell_type=None):
        if cell_type:
            cell_type = cell_type.lower()
            return [self.nb.cells[i] for i in self.structure[cell_type]]
        else:
            return self.nb.cells
    
    def get_cells_by_id(self, ids=[]):
        return [self.nb.cells[i] for i in ids]
    
    def get_header_cells(self):
        hs = {1:[], 2:[], 3: [], 4: []}
        idxs = self.structure['markdown']
        cells = self.get_cells_by_id(idxs)
        pairs = zip(idxs, cells)
        for idx, cell in pairs:
            for j, rh in enumerate(rhs):
                fa = rh.findall(cell['source'])
                if fa:
                    for match in fa:
                        hs[j+1].append(idx)
        return hs
        
        
    

In [39]:
inb = NoteBook(source_nb)

In [40]:
h_cells = inb.get_header_cells()

In [41]:
h_cells

{1: [0, 2, 35], 2: [6, 12, 14, 25], 3: [1, 1, 20, 22, 25, 29, 31, 40], 4: [8]}

In [42]:
inb.get_cells_by_id([8])

[{'cell_type': 'markdown',
  'metadata': {},
  'source': 'Notice how now we just jump straight to the item we want? We don\'t need to think about "Was that the fourth item in the list? Or the fifth?", we just use a sensible key and we can ask for the associated value directly.\n\n#### A challenge for you!\n\nHow would you print out "2nd Value" from `myDict`?'}]

In [58]:
# print out the child markdown cell and its parent markdown cell
levels = range(4, 1, -1)
for level in levels:
    children = h_cells[level]
    #print(level, children)
    parents = []
    for child in children:
        #print(child)
        parent_level = level - 1
        while parent_level > 0:
            candidates = [parent for parent in h_cells[parent_level]]
            parents.extend([c for c in candidates if c < child])
            parent_level -= 1
                
        parent = max(parents)
        print(child, parent)


8 6
1 0
1 0
20 14
22 14
25 14
29 25
31 25
40 35
6 2
12 2
14 2
25 2
