In [1]:
import pandas as pd
import numpy as np
import os

from ddf_utils.str import to_concept_id
from ddf_utils.index import create_index_file

In [2]:
source = 'source/FoodBalanceSheets_E_All_Data_(Norm).zip'

In [4]:
data = pd.read_csv(source, encoding='latin-1')

In [5]:
data.head()

Unnamed: 0,Country Code,Country,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag
0,351,China,2501,Population,511,Total Population - Both sexes,1961,1961,1000,675173.0,A
1,351,China,2501,Population,511,Total Population - Both sexes,1962,1962,1000,686277.0,A
2,351,China,2501,Population,511,Total Population - Both sexes,1963,1963,1000,698324.0,A
3,351,China,2501,Population,511,Total Population - Both sexes,1964,1964,1000,712071.0,A
4,351,China,2501,Population,511,Total Population - Both sexes,1965,1965,1000,728010.0,A


In [6]:
data.Element.unique()

array(['Total Population - Both sexes', 'Production', 'Import Quantity',
       'Stock Variation', 'Export Quantity', 'Domestic supply quantity',
       'Feed', 'Seed', 'Waste', 'Processing', 'Other uses', 'Food',
       'Food supply quantity (kg/capita/yr)',
       'Food supply (kcal/capita/day)',
       'Protein supply quantity (g/capita/day)',
       'Fat supply quantity (g/capita/day)'], dtype=object)

In [32]:
data['concept_name'] = data['Item'] + ': ' + data['Element']

In [35]:
# country

In [38]:
country = data[['Country Code', 'Country']].drop_duplicates().copy()

In [39]:
country.columns = ['country', 'name']

In [41]:
country.to_csv('../ddf--entities--country.csv', index=False)

In [None]:
# items

In [60]:
item = data[['Item Code', 'Item']].drop_duplicates().copy()

In [61]:
item.columns = ['item', 'name']

In [65]:
item[item['name'].str.contains('Total')]

Unnamed: 0,item,name
59307,2901,Grand Total


In [83]:
item.to_csv('../ddf--entities--item.csv', index=False)

In [42]:
# concepts

In [68]:
concs = data['Element'].unique()

In [69]:
cdf = pd.DataFrame([],columns=['concept', 'name', 'concept_type'])

In [70]:
cdf['name'] = ['Name', 'Country', 'Item', 'Year', *concs] 

In [71]:
cdf['concept'] = cdf['name'].map(to_concept_id)

In [72]:
cdf.concept_type = 'measure'

cdf.loc[0, 'concept_type'] = 'string'
cdf.loc[1, 'concept_type'] = 'entity_domain'
cdf.loc[2, 'concept_type'] = 'entity_domain'
cdf.loc[3, 'concept_type'] = 'time'

In [73]:
cdf.to_csv('../ddf--concepts.csv', index=False)

In [74]:
cdf

Unnamed: 0,concept,name,concept_type
0,name,Name,string
1,country,Country,entity_domain
2,item,Item,entity_domain
3,year,Year,time
4,total_population_both_sexes,Total Population - Both sexes,measure
5,production,Production,measure
6,import_quantity,Import Quantity,measure
7,stock_variation,Stock Variation,measure
8,export_quantity,Export Quantity,measure
9,domestic_supply_quantity,Domestic supply quantity,measure


In [76]:
data_ = data[['Country Code', 'Item Code', 'Element', 'Year Code', 'Value']]

In [78]:
gs = data_.groupby('Element').groups

In [82]:
for k, idx in gs.items():
    cid = to_concept_id(k)
    df = data_.ix[idx].copy()
    df = df.drop('Element', axis=1)
    df.columns = ['country', 'item', 'year', cid]
    
    path = '../ddf--datapoints--{}--by--country--item--year.csv'.format(cid)
    df.to_csv(path, index=False)

In [84]:
create_index_file('../')

Unnamed: 0,key,value,file
0,concept,name,ddf--concepts.csv
1,concept,concept_type,ddf--concepts.csv
0,"country,item,year",domestic_supply_quantity,ddf--datapoints--domestic_supply_quantity--by-...
0,"country,item,year",export_quantity,ddf--datapoints--export_quantity--by--country-...
0,"country,item,year",fat_supply_quantity_g_capita_day,ddf--datapoints--fat_supply_quantity_g_capita_...
0,"country,item,year",feed,ddf--datapoints--feed--by--country--item--year...
0,"country,item,year",food,ddf--datapoints--food--by--country--item--year...
0,"country,item,year",food_supply_kcal_capita_day,ddf--datapoints--food_supply_kcal_capita_day--...
0,"country,item,year",food_supply_quantity_kg_capita_yr,ddf--datapoints--food_supply_quantity_kg_capit...
0,"country,item,year",import_quantity,ddf--datapoints--import_quantity--by--country-...


In [17]:
import ddf_utils.ddf_reader as ddf

In [18]:
ddf.SEARCH_PATH = '../../'

In [19]:
ddf.ddf_datapoint('ddf--gapminder--gapminder_world', 'food_supply_kilocalories_per_person_and_day')\
.query('geo == "afg"')

Unnamed: 0,food_supply_kilocalories_per_person_and_day,geo,time
0,2234,alb,1961
1,2248,alb,1962
2,2163,alb,1963
3,2276,alb,1964
4,2258,alb,1965
5,2259,alb,1966
6,2266,alb,1967
7,2346,alb,1968
8,2408,alb,1969
9,2419,alb,1970
