In [1]:
!date

Wed Nov 23 15:00:29 CST 2016


# Running recipes in Jupyter notebook

In this notebook we will demostrate how to use the chef module to create a dataset

In [2]:
import os
import pandas as pd
import json
import yaml
import numpy as np

from ddf_utils import chef

import logging
logging.basicConfig(level=logging.DEBUG)

## loading recipes

recipes can be either yaml or json files. Because recipe may include other recipes and translation dictionaries, chef module provides a build_recipe() function to read recipe and expand the recipe with other included files.

In [3]:
print(open("../tests/recipes/recipe_cme.yaml").read())  # Note: you should change the ddf_dir to correct path


info:
    id: cme-sg-dataset
    base:
        - &d1 ddf--gapminder--gapminder_world
        - &d2 ddf--gapminder--geo_entity_domain
        - &d3 ddf--cme

config:
    ddf_dir: /Users/semio/src/work/Gapminder
    recipes_dir: ./
    dictionary_dir: ../translation_dictionaries/
        
include:
    - recipe_gw_common.yaml

ingredients:
    # CME
    - id: cme-datapoints
      dataset: *d3
      key: "country,year"
      value: ['u5mr_median', 'nmr_median', 'neonatal_deaths_median']
    - id: cme-countries
      dataset: *d3
      key:
          - country
      value: "*"

    # GW
    - id: gw-entities
      dataset: *d2
      key: [geo]
      value: "*"
    - id: gw-countries
      dataset: *d2
      key: [country]
      value: "*"
    - id: gw-concepts-cme
      dataset: *d1
      key: concept
      value: "*"
      filter:
          concept:
              - newborn_deaths
              - newborn_mortality_rate_per_1000
              - under_five_mortality_from_cme_per_1000_born

c

In [4]:
recipe = chef.build_recipe("../tests/recipes/recipe_cme.yaml")

In [5]:
from pprint import pprint

pprint(recipe)

{'config': {'ddf_dir': '/Users/semio/src/work/Gapminder',
            'dictionary_dir': '../translation_dictionaries/',
            'recipes_dir': './'},
 'cooking': {'concepts': [{'ingredients': ['gw-concepts-cme',
                                           'gw-concepts-geo',
                                           'gw-concepts-discrete'],
                           'options': AttrDict([('deep', True)]),
                           'procedure': 'merge',
                           'result': 'cme-concepts-final'}],
             'datapoints': [{'ingredients': ['cme-datapoints'],
                             'options': {'dictionary': {'neonatal_deaths_median': 'newborn_deaths',
                                                        'nmr_median': 'newborn_mortality_rate_per_1000',
                                                        'u5mr_median': 'under_five_mortality_from_cme_per_1000_born'}},
                             'procedure': 'translate_header',
                           

## running the recipe

the run_recipe() function from chef module actually run the recipe and return the result DDF as a dictionary of ingredients.

In [6]:
res = chef.run_recipe(recipe)

INFO:root:path for searching DDF: /Users/semio/src/work/Gapminder
INFO:ddf_utils.chef.procedure:merge: ['gw-concepts-cme', 'gw-concepts-geo', 'gw-concepts-discrete']
INFO:ddf_utils.chef.procedure:merge: doing deep merge


running concepts


INFO:ddf_utils.chef.procedure:translate_header: cme-datapoints
INFO:ddf_utils.chef.procedure:translate_header: cme-datapoints-indicators-translated


running entities
running datapoints


In [7]:
res.keys()

dict_keys(['entities', 'concepts', 'datapoints'])

### entities

In [8]:
entities = res['entities'].get_data()

In [9]:
entities.keys()  # in the recipe we include all geo entities from GW, so they are.

dict_keys(['main_religion_2008', 'world_4region', 'g77_and_oecd_countries', 'country', 'landlocked', 'world_6region', 'income_groups', 'global'])

In [10]:
entities['main_religion_2008']

Unnamed: 0,main_religion_2008,rank,name,gwid,is--main_religion_2008
0,muslim,2,Muslim,i280,True
1,eastern_religions,1,Eastern religions,i281,True
2,christian,3,Christian,i279,True


### datapoints

In [11]:
dps = res['datapoints'].get_data()

In [12]:
dps.keys()

dict_keys(['newborn_mortality_rate_per_1000', 'newborn_deaths', 'under_five_mortality_from_cme_per_1000_born'])

In [13]:
dps['under_five_mortality_from_cme_per_1000_born'].head()

Unnamed: 0,geo,time,under_five_mortality_from_cme_per_1000_born
0,afg,1961,356.5
1,afg,1962,350.6
2,afg,1963,345.0
3,afg,1964,339.7
4,afg,1965,334.1


### concepts

In [14]:
concepts = res['concepts'].get_data()

In [15]:
concepts.keys()

dict_keys(['concept'])

In [16]:
concepts['concept'].concept.values

array(['age', 'alt_5', 'alternative_1', 'alternative_2', 'alternative_3',
       'alternative_4_cdiac', 'arb1', 'arb2', 'arb3', 'arb4', 'arb5',
       'arb6', 'code', 'color', 'country', 'description', 'domain',
       'drill_up', 'g77_and_oecd_countries', 'gapminder_list', 'geo',
       'global', 'god_id', 'gwid', 'income_groups', 'indicator_url',
       'interpolation', 'landlocked', 'latitude', 'longitude',
       'main_religion_2008', 'name', 'name_long', 'name_short',
       'newborn_deaths', 'newborn_mortality_rate_per_1000', 'number',
       'pandg', 'scales', 'time',
       'under_five_mortality_from_cme_per_1000_born', 'unit',
       'upper_case_name', 'world_4region'], dtype=object)