In [1]:
import pandas as pd 
import wbdata

## Access WB Data

In [2]:
countries = {"Burkina Faso":"BF", "Congo, Dem. Rep.":"CD", "Ethiopia":"ET", "Kenya":"KE", "Nigeria":"NG", 
          "Senegal":"SN", "Tanzania":"TZ", "Uganda":"UG", "South Africa":"ZA", "Zambia":"ZM"}

country_code = list({v for (k,v) in countries.items()})

In [3]:
def collect():
    # generate a dict from the indicators file
    takwimu_indicators = pd.read_csv('Takwimu_indicators.csv',
                                     index_col=0, squeeze=True).to_dict()
    # Gather indicator data on the selected countries
    wb_data = wbdata.get_dataframe(takwimu_indicators, 
                                 country=country_code, convert_date=False)
    return wb_data    

## Structure into Hurumap format

In [15]:
def process():
    data = pd.read_csv('data/takwimu_wb_data.csv')
    
#     population
    def population():
        df = data[['country', 'date','Population Male', 'PopulationFemale' ]]
        df = df[df['date']==2017].drop('date', axis=1)
        df.columns = ['name','male','female']


        df = df.melt(id_vars='name', value_vars=['male','female'], 
                var_name='sex', value_name='total')

        df['geo_code'] = df['name'].map(countries)
        df['geo_level'] = "country"
        df['geo_version'] = 2017

        population = df[['geo_level','geo_code','name','geo_version','sex','total']].sort_values('name')

        return population
    
#     basic services
    def basic_services():
        df = data[['country', 'date','access to basic services - Electricity','access to basic services - Water' ]]
        df = df[df['date']==2015].drop('date', axis=1)
        df.columns = ['name','electricity','water']


        df = df.melt(id_vars='name', value_vars=['electricity','water'], 
                var_name='service', value_name='total')

        df['geo_code'] = df['name'].map(countries)
        df['geo_level'] = "country"
        df['geo_version'] = 2015

        basic_services = df[['geo_level','geo_code','name',
                             'geo_version','service','total']].sort_values('name')

        return basic_services
    
#     youth unemployment
    def youth_unemployment(): 
             
        df = data[['country', 'date','Youth unemployment-Male','Youth unemployment - Female' ]]
        df = df[df['date']==2015].drop('date', axis=1)
        df.columns = ['name','male','female']


        df = df.melt(id_vars='name', value_vars=['male','female'], 
                var_name='sex', value_name='total')

        df['geo_code'] = df['name'].map(countries)
        df['geo_level'] = "country"
        df['geo_version'] = 2017

        youth_unemployment = df[['geo_level','geo_code','name',
                                 'geo_version','sex','total']].sort_values('name')

        return youth_unemployment
    
#     Life expectancy
    def life_expectancy():
    
        df = data[['country', 'date','Life expectancy-Male','Life expectancy-Female']]
        df = df[df['date']==2016].drop('date', axis=1)
        df.columns = ['name','male','female']


        df = df.melt(id_vars='name', value_vars=['male','female'], 
                var_name='sex', value_name='age')

        df['geo_code'] = df['name'].map(countries)
        df['geo_level'] = "country"
        df['geo_version'] = 2016

        life_expectancy = df[['geo_level','geo_code','name',
                              'geo_version','sex','age']].sort_values('name')

        return life_expectancy
    
#     infant & Under-5 motality (per 1000)
    def infant_under_5_mortality():
        df = data[['country', 'date','Infant Mortality','Under 5 Mortality rates']]
        df = df[df['date']== df['date'].max()]

        df.columns = ['name','date','infant','under_5']

        df = df.melt(id_vars=['name','date'], value_vars=['infant','under_5'], 
                var_name='mortality', value_name='rate')

        df['geo_code'] = df['name'].map(countries)
        df['geo_level'] = "country"
        df = df.rename(columns={"date": "geo_version"})
        infant_under_5_mortality = df[['geo_level','geo_code','name',
                                       'geo_version','mortality','rate']].sort_values('name')

        return infant_under_5_mortality
    
#     Prevalence of HIV
    def hiv_prevalence():
        df = data[['country', 'date','Prevalence of HIV, male (% ages 15-24)','Prevalence of HIV, female (% ages 15-24)']]
        df = df[df['date']== df['date'].max()]

        df.columns = ['name','date','male','female']

        df = df.melt(id_vars=['name','date'], value_vars=['male','female'], 
                var_name='sex', value_name='rate')

        df['geo_code'] = df['name'].map(countries)
        df['geo_level'] = "country"
        df = df.rename(columns={"date": "geo_version"})
        hiv_prevalence = df[['geo_level','geo_code','name',
                             'geo_version','sex','rate']].sort_values('name')

        return hiv_prevalence
    
#     Primary completion rate
    def primary_completion():
        df = data[['country', 'date','Primary completion rate, male (%)','Primary completion rate, female (%)']].dropna(axis=0) 
        df = df[df['date']== df['date'].max()]

        df.columns = ['name','date','male','female']

        df = df.melt(id_vars=['name','date'], value_vars=['male','female'], 
                var_name='sex', value_name='rate')

        df['geo_code'] = df['name'].map(countries)
        df['geo_level'] = "country"
        df = df.rename(columns={"date": "geo_version"})
        primary_completion = df[['geo_level','geo_code','name','geo_version','sex','rate']].sort_values('name')

        return primary_completion
    
#     Employment to population ratio
    def employment_to_population():
        df = data[['country', 'date','Employment to population ratio male (%)','Employment to population ratio female (%)']].dropna(axis=0) 
        df = df[df['date']== df['date'].max()]

        df.columns = ['name','date','male','female']

        df = df.melt(id_vars=['name','date'], value_vars=['male','female'], 
                var_name='sex', value_name='rate')

        df['geo_code'] = df['name'].map(countries)
        df['geo_level'] = "country"
        df = df.rename(columns={"date": "geo_version"})
        employment_to_population = df[['geo_level','geo_code','name','geo_version','sex','rate']].sort_values('name')

        return employment_to_population
    
#     Physicians ,Nurses and Mid wives per 1000
    def health_staff():
        df = data[['country', 'date','Physicians per 1000','Nurses and Mid wives']].dropna(axis=0) 
        df = df[df['date']== df['date'].max()]

        df.columns = ['name','date','physicians','nurses and mid wives']

        df = df.melt(id_vars=['name','date'], value_vars=['physicians','nurses and mid wives'], 
                var_name='role', value_name='rate')

        df['geo_code'] = df['name'].map(countries)
        df['geo_level'] = "country"
        df = df.rename(columns={"date": "geo_version"})
        health_staff = df[['geo_level','geo_code','name',
                                       'geo_version','role','rate']].sort_values('name')

        return health_staff
    
#     Account ownership
    def acc_ownership():
        df = data[['country', 'date','Account ownership,male (% of population ages 15+)','Account ownership,female (% of population ages 15+)']].dropna(axis=0) 
        df = df[df['date']== df['date'].max()]

        df.columns = ['name','date','male','female']

        df = df.melt(id_vars=['name','date'], value_vars=['male','female'], 
                var_name='sex', value_name='rate')

        df['geo_code'] = df['name'].map(countries)
        df['geo_level'] = "country"
        df = df.rename(columns={"date": "geo_version"})
        acc_ownership = df[['geo_level','geo_code','name',
                                       'geo_version','sex','rate']].sort_values('name')

        return acc_ownership

    
        


    
       