In [378]:
import requests, json
import pandas as pd
import numpy as np
from datetime import datetime

In [379]:
# read excel with country names and codes
# return list with country codes relevant for UCDP
country_code_pathfile = '/Users/sabinejoseph/Downloads/kfe-sabinejo-patch-1/Country_codes_NAMO.xlsx'
sheet = 'Sheet1'

def country_codes_from_excel(country_codes, sheet_num, column_name):
    countries = pd.ExcelFile(country_codes)
    countries = countries.parse(sheet_num)
    return list(countries[column_name]) # UCDP uses Gleditsch and Ward country codes

CCS = country_codes_from_excel(country_code_pathfile, sheet, 'UCDP_country_codes')
CC3 = country_codes_from_excel(country_code_pathfile, sheet, 'Country_3')
CC2 = country_codes_from_excel(country_code_pathfile, sheet, 'Country_2')
FIPS = country_codes_from_excel(country_code_pathfile, sheet, 'FIPS')

In [380]:
# create empty df (each year * 16 countries)
new_format = '%Y'
start_year = 1989
end_year = 2018
CC_col_names = ['CC3', 'CCS', 'CC2', 'FIPS']
CC_lists = [CC3, CCS, CC2, FIPS]

def create_empty_df(date_format, start_year, end_year, CC_col_names, CC_lists):
    all_days = pd.date_range(start_year, end_year, freq='D')

    df = pd.DataFrame(index = sorted(list(range(start_year, end_year)) * len(CC3)))
    #df.date_start = [datetime.strptime(str(df.date_start[i]), new_format) for i in range(0, len(df.date_start)) if i is not None]
    
    for i in range(0, len(CC_col_names)):
        temp = CC_lists[i] * (len(df.index)/len(CC3))
        df[CC_col_names[i]] = temp
        df.is_copy = False
        
    return df

df = create_empty_df(new_format, start_year, end_year, CC_col_names, CC_lists)
df.head()
df = df.reset_index()
df = df.rename(columns={'index': 'date_start'})

In [381]:
# worldbank - World Development Indicators:
##Intentional homicides (per 100,000 people), VC.IHR.PSRC.P5
##Mortality rate, under-5 (per 1,000 live births), SH.DYN.MORT

indicators = ['VC.IHR.PSRC.P5', 'SH.DYN.MORT']
url = 'http://api.worldbank.org/countries/'

results = []
for i in range(0, len(CC2)):
    #if i != 8:
        for j in range(0, len(indicators)): 
            r = requests.get(url + CC2[i].lower() + '/indicators/'
                             + indicators[j] 
                             + '/?date=' + str(start_year) + ':' + str(end_year)
                             + '&format=json')
                    
            results.append(r.json())

# worldbank - Worldwide Governance Indicators #not available through API
## Control of Corruption Estimate 

In [382]:
# create empty columns to extend combined df based on indicators
cols_to_append = [str(results[0][1][0]['indicator']['value']), str(results[1][1][0]['indicator']['value'])] 

def append_empty_cols_to_df(df, col_names):
    for i in col_names:
        df[i] = np.nan
    return df
        
df = append_empty_cols_to_df(df, cols_to_append)

In [392]:
# results to df
def json_to_df(results):
    for k in range(0, 31, len(indicators)): # all countries 
        for i in range(0, int(results[1][0]['total'])): # all yrs - single country
            if results[k][0]['page'] != 0:
                temp_idx = df[(df.date_start == int(results[k][1][i]['date'])) & 
                              (df.CC2 == str(results[k][1][i]['country']['id']))].index
                for j in range(0, len(indicators)): # all indicators
                    if results[k+j][1][i]['value'] is not None:
                        df[results[k+j][1][i]['indicator']['value']][temp_idx] = results[k+j][1][i]['value'] 
    return df

df = json_to_df(results)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [None]:
# next: impute data

In [393]:
df[250:300]

Unnamed: 0,date_start,CC3,CCS,CC2,FIPS,"Intentional homicides (per 100,000 people)","Mortality rate, under-5 (per 1,000 live births)"
250,2004,LBN,660.0,LB,LE,,15.2
251,2004,SYR,652.0,SY,SY,2.4,19.6
252,2004,EGY,651.0,EG,EG,0.4,37.3
253,2004,IRN,630.0,IR,IR,,27.3
254,2004,TUR,640.0,TR,TU,4.4,29.7
255,2004,IRQ,645.0,IQ,IZ,,41.5
256,2005,KWT,690.0,KW,KU,,11.7
257,2005,BHR,692.0,BH,BA,0.5,10.9
258,2005,OMN,698.0,OM,MU,,12.7
259,2005,QAT,694.0,QA,QA,,10.3


In [391]:
df.to_csv('Worldbank_indicators.csv')