## Data Format: JSON

In [140]:
from nose.tools import assert_equal
import os
import json
import requests

In [141]:
r = requests.get('https://raw.githubusercontent.com/UI-DataScience/info490-fa15/master/Week2/notebooks/intro2ipy.ipynb')
with open('intro2ipy.ipynb', 'w') as f:
    f.write(r.text)

### Function: get_keys()

In [154]:
def get_keys(filename):
    '''
    Takes the file name (str) of a JSON text file and returns the keys of the top-level dictionary.
    
    Parameters
    ----------
    filename (str): a JSON file.
    
    Returns
    -------
    A list of strings.
    '''
    
    with open(filename, 'r') as jd:
        data = json.load(jd)
    return list(data.keys())

In [157]:
test1 = get_keys('intro2ipy.ipynb')
answer1 = ['cells', 'nbformat_minor', 'metadata', 'nbformat']

assert_equal(len(test1), len(answer1))
assert_equal(set(test1), set(answer1))

test2 = {
    'A': 1,
    'B': {'C': 2, 'D': 3},
    'C': {
        'E': {'F': 4},
        'G': {'H': 5, 'I': 6}
    }
}

answer2 = ['A', 'B', 'C']

with open('test.json', 'w') as f:
    json.dump(test2, f)

assert_equal(len(get_keys('test.json')), len(answer2))
assert_equal(set(get_keys('test.json')), set(answer2))

os.remove('test.json')

### Function: get_version()

In [174]:
def get_version(filename):
    '''
    Takes a file name (str) of a JSON file.
    Returns a tuple of ("the programming language of the kernel", "the version of the language", nbformat).
    
    Parameters
    ----------
    filename (str): a JSON file.
    
    Returns
    -------
    A tuple of (str, str, int)
    '''
    
    with open(filename, 'r') as jd:
        data = json.load(jd)
        name=data['metadata']['language_info']['name']
        vers=data['metadata']['language_info']['version']
        nfmt=data['nbformat']
    result=(name, vers, nfmt)
    return result

In [177]:
print(get_version('intro2ipy.ipynb'))

('python', '3.4.0', 4)


In [178]:
test1 = get_version('intro2ipy.ipynb')
answer1 = ('python', '3.4.0', 4)

assert_equal(test1, answer1)

test2 = {
  "metadata" : {
    "signature": "hex-digest", # used for authenticating unsafe outputs on load
    "kernel_info": {
        # if kernel_info is defined, its name field is required.
        "name" : "the name of the kernel"
    },
    "language_info": {
        # if language_info is defined, its name field is required.
        "name" : "the programming language of the kernel",
        "version": "the version of the language",
        "codemirror_mode": "The name of the codemirror mode to use [optional]"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0,
  "cells" : [
      # list of cell dictionaries, see below
  ],
}

answer2 = ("the programming language of the kernel", "the version of the language", 4)

with open('test.json', 'w') as f:
    json.dump(test2, f)
    
assert_equal(get_version('test.json'), answer2)

os.remove('test.json')

### Function: count_code_cells()

In [225]:
def count_code_cells(filename, cell_type):
    '''
    Takes a filename and a cell type, and returns the number count of that cell type.
    
    Parameters
    ----------
    filename (str): a JSON file.
    cell_type (str): "code", "markdown", etc.
    
    Returns
    -------
    An int.
    '''
    
    alltype=[]
    counts=0
    with open(filename, 'r') as jd:
        data = json.load(jd)
    for i in data['cells']:
        j=i["cell_type"]
        alltype.append(j)
    for j in alltype:
        if j==cell_type:
            counts+=1
    return counts

In [230]:
n_code = count_code_cells('intro2ipy.ipynb', 'code')
n_markdown = count_code_cells('intro2ipy.ipynb', 'markdown')

print('There are {} code cells and {} markdown cells.'.format(n_code, n_markdown))

assert_equal(count_code_cells('intro2ipy.ipynb', 'code'), 5)
assert_equal(count_code_cells('intro2ipy.ipynb', 'markdown'), 16)

test = {
  "cells" : [
    {
      "cell_type" : "type1",
      "metadata" : {},
      "source" : "single string or [list, of, strings]",
      },
    {
      "cell_type" : "type1",
      "metadata" : {},
      "source" : "single string or [list, of, strings]",
      },
    {
      "cell_type" : "type2",
      "metadata" : {},
      "source" : "single string or [list, of, strings]",
      }
  ],
} 

with open('test.json', 'w') as f:
    json.dump(test, f)

assert_equal(count_code_cells('test.json', 'type1'), 2)
assert_equal(count_code_cells('test.json', 'type2'), 1)
assert_equal(count_code_cells('test.json', 'type3'), 0)

os.remove('test.json')

There are 5 code cells and 16 markdown cells.
