In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go 
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot as py
import matplotlib.pyplot as plt

from os import path

data_path = 'data'

pd.set_option('mode.chained_assignment', None)

In [2]:
init_notebook_mode(connected=True)

<center>
    <h1>Worldwide Analysis on Extreme Poverty</h1>
    <h4>Authors: João Guilherme Mattos, Pedro Torres</h4>
    <h4>Sources: <a href="https://data.oecd.org/">OECD</a>, <a href="https://www.worldbank.org/">The World Bank</a></h4> 
</center>

<h2>1 - Extreme Poverty around the world</h2>
<ul>
    <li>Extreme poverty is defined as living on less than $1.90 international per day
</ul>
<h3>1.1 - World population living in Extreme Poverty</h3>
<ul>
    <li>How have we been evolving on eradicating Extreme Poverty? 
</ul>

In [3]:
data_poverty = pd.read_csv(path.join(data_path, 'OUR_WORLD_IN_DATA', 'world-population-in-extreme-poverty-absolute.csv'), encoding='utf-8')
data_poverty[['Year', 'Number of people not in extreme poverty (people)', 'Number of people living in extreme poverty (people)']] = data_poverty[['Year', 'Number of people not in extreme poverty (people)', 'Number of people living in extreme poverty (people)']].apply(pd.to_numeric)
data_poverty = data_poverty.sort_values(by=['Year'])
data_poverty = data_poverty[data_poverty['Year'] > 1980]
data_poverty['Number of people'] = data_poverty['Number of people not in extreme poverty (people)'] + data_poverty['Number of people living in extreme poverty (people)']
data_poverty['Number of people not in extreme poverty (people)'] = data_poverty['Number of people not in extreme poverty (people)'] / 10**9
data_poverty['Number of people living in extreme poverty (people)'] = data_poverty['Number of people living in extreme poverty (people)'] / 10**9
data_poverty['Number of people'] = data_poverty['Number of people'] / 10**9
data_poverty = data_poverty.round(2)

In [4]:
trace = go.Scatter(
    x=data_poverty['Year'],
    y=data_poverty['Number of people'],
    name='Number of people',
    hoverinfo='y'
)

trace1 = go.Scatter(
    x = data_poverty['Year'],
    y = data_poverty['Number of people not in extreme poverty (people)'],
    name='Not in extreme poverty',
    hoverinfo='y'
)

trace2 = go.Scatter(
    x = data_poverty['Year'],
    y = data_poverty['Number of people living in extreme poverty (people)'],
    name='Living in extreme poverty',
    hoverinfo='y'
)

data = [trace, trace1, trace2]

layout = go.Layout(
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.concatenate((np.arange(1981, 2012, step=6), np.array([2015])), axis=0),
        title='Year',
    ),
    yaxis = go.layout.YAxis(
        title='Population (in billions)',
        ticksuffix='B ',       
    ),
    title='World population living in extreme poverty (1981-2015)'
)

fig = go.Figure(
    data = data,
    layout = layout
)

py(fig)

In [5]:
trace1 = go.Scatter(
    x=data_poverty['Year'],
    y=data_poverty['Number of people living in extreme poverty (people)'],
    stackgroup='one',
    mode='none',
    name='Living in extreme poverty',
    hoverinfo='y'
)

trace2 = go.Scatter(
    x=data_poverty['Year'],
    y=data_poverty['Number of people not in extreme poverty (people)'],
    stackgroup='one',
    mode='none',
    name='Not in extreme poverty ',
    hoverinfo='y'
)

data = [trace1, trace2]

layout = go.Layout(
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.concatenate((np.arange(1981, 2012, step=6), np.array([2015])), axis=0),
        title='Year',
        ticklen=5
    ),
    yaxis = go.layout.YAxis(
        title='Population (in billions)',
        ticksuffix='B ',       
    ),
    title='World population living in extreme poverty (1981-2015)'
)

fig = go.Figure(
    data = data,
    layout = layout
)

py(fig, filename='stacked-area-char')

In [6]:
def createWBDataFrame(path):
    df = pd.read_csv(path, header=2, encoding='utf-8')
    df = pd.merge(df, country_metadata, how='left', on=['Country Code'])
    return df.drop(['Indicator Name', 'Unnamed: 63'], axis=1)

def tideWBDataFrame(df):
    df = pd.melt(df, id_vars=['Country Name', 'Country Code', 'Region', 'Indicator Code'], var_name='Year', value_name='observation')
    df = pd.pivot_table(df, columns=['Indicator Code'], index=['Country Name', 'Country Code', 'Region', 'Year'], values='observation', aggfunc=np.mean)
    return df.reset_index()

def keepMetrics(df, metrics):
    standard_columns = ['Country Name', 'Country Code', 'Region', 'Year']
    return df.loc[:, standard_columns + metrics]

In [7]:
country_metadata = pd.read_csv(path.join(data_path, 'GDP_BY_COUNTRY', 'Metadata_Country_API_NY.GDP.MKTP.CD_DS2_en_csv_v2_10515210.csv'))
country_metadata.drop(['IncomeGroup', 'SpecialNotes', 'TableName', 'Unnamed: 5'], axis=1, inplace=True)

social_env_path = path.join(data_path, 'SOCIAL_ENV_ECON_FACTORS_BY_COUNTRY')
extreme_poverty_by_country = createWBDataFrame(path.join(social_env_path, 'Environment', 'API_19_DS2_en_csv_v2_10515758.csv'))
extreme_poverty_by_country = tideWBDataFrame(extreme_poverty_by_country)

# Limiting data for the last 30 years
extreme_poverty_by_country = extreme_poverty_by_country[(extreme_poverty_by_country['Year'] >= '1990') & (extreme_poverty_by_country['Year'] <= '2017')]
extreme_poverty_by_country = keepMetrics(extreme_poverty_by_country, ['SI.POV.DDAY','SP.POP.TOTL'])

# Renaming observation columns
extreme_poverty_by_country.rename(columns={'SI.POV.DDAY': 'Extreme Poverty(% pop)','SP.POP.TOTL': 'Total Pop.'}, inplace=True)

In [8]:
# Manipulating data to bring a better understanding of the situation.
extreme_poverty_by_country['last_obs_year'] = np.nan
extreme_poverty_by_country['last_obs_year'] = extreme_poverty_by_country[extreme_poverty_by_country['Extreme Poverty(% pop)'].notnull()]['Year']
extreme_poverty_by_country.sort_values(['Country Code', 'Year'], inplace=True)

aux = pd.DataFrame(columns=extreme_poverty_by_country.columns)
for code in extreme_poverty_by_country['Country Code'].unique():
    aux = pd.concat([aux, extreme_poverty_by_country[extreme_poverty_by_country['Country Code'] == code].fillna(method='ffill')])
extreme_poverty_by_country = aux

extreme_poverty_by_country['Total Pop. in Extreme Poverty'] = extreme_poverty_by_country['Total Pop.']*extreme_poverty_by_country['Extreme Poverty(% pop)']/100
extreme_poverty_by_country['text'] = ["{one}<br>Extreme Poverty: {two} % <br>Extreme Poverty: {three} millions<br>Last obtained: {four}".format(one=row['Country Name'], two=row['Extreme Poverty(% pop)'], three=np.round(row['Total Pop. in Extreme Poverty']/10**6, 3), four=row['last_obs_year']) for index, row in extreme_poverty_by_country.iterrows()]     

<h3>1.2 - Extreme poverty distribution around the world</h3>
<h4>1.2.1 - By region</h4>
<ul>
    <li>Which regions are most critical and how have they evolved on facing the problem?</li>
</ul>
    

In [9]:
extreme_poverty_by_region = extreme_poverty_by_country.loc[:, ['Region', 'Year', 'Total Pop. in Extreme Poverty', 'Total Pop.']]
extreme_poverty_by_region.dropna(inplace=True)
#Total-population-in-extreme-poverty,-by-countries(inplace=True)

extreme_poverty_by_region = extreme_poverty_by_region.groupby(['Region', 'Year'], as_index=False).agg({'Total Pop. in Extreme Poverty': 'sum', 'Total Pop.': 'sum'})
extreme_poverty_by_region['Mean Pop. in Extreme Poverty'] = np.round(100*extreme_poverty_by_region['Total Pop. in Extreme Poverty']/extreme_poverty_by_region['Total Pop.'], 2)

extreme_poverty_by_region = extreme_poverty_by_region[(extreme_poverty_by_region['Year'] >= '1990') & (extreme_poverty_by_region['Year'] < '2018')]
extreme_poverty_by_region.sort_values(by=['Year', 'Region'], inplace=True)

In [10]:
layout = go.Layout( 
    title = go.layout.Title(
        text = 'Evolution of extreme poverty by region (1990-2017)'
    ),
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.arange(1981, 2018, step=1),
        tickangle=45,
        title='Year'
    ),
    yaxis = go.layout.YAxis(
        autorange=True,
        title='Pop. in extreme poverty (%)'
    )
)

fig = go.Figure(
    layout=layout
)
        
for region in extreme_poverty_by_region['Region'].unique():
    ext_pov_region = extreme_poverty_by_region[extreme_poverty_by_region['Region'] == region]
    fig.add_scatter(x=ext_pov_region['Year'], y=ext_pov_region['Mean Pop. in Extreme Poverty'], name=region, mode='lines')
    
py(fig, filename='axes-lines')

<h4>1.2.2 - By country</h4>
<ul>
    <li>Which countries are most critical and how have they evolved?</li>
</ul>

In [11]:
data_slider = []
for year in extreme_poverty_by_country['Year'].unique():
    extreme_poverty_year = extreme_poverty_by_country[extreme_poverty_by_country['Year'] == year]
    
    data = go.Choropleth(
                locationmode='ISO-3',
                locations=extreme_poverty_year['Country Code'],
                text=extreme_poverty_year['text'],
                z=extreme_poverty_year['Extreme Poverty(% pop)'],
                colorbar={'title':'Extreme poverty (% of population)','ticksuffix': '%'},
                hoverinfo='text'
        )
    data_slider.append(data)

steps = []
for i in range(len(data_slider)):
    correct_value = [False] * len(data_slider)
    correct_value[i] = True
    
    step = go.layout.slider.Step(
        method='restyle',
        args=['visible', correct_value],
        label="{}".format(1990+i)
    )
    steps.append(step)

layout = go.Layout(
            title = go.layout.Title(
                text = 'Evolution of extreme poverty (1990-2017)'
            ),
            geo = go.layout.Geo(
                scope='world',
                showframe = False,
                coastlinewidth = 0,
                projection = go.layout.geo.Projection(
                    type = 'equirectangular'
                )
            ),
            sliders = [go.layout.Slider(
                steps = steps,
                active = 0,
                currentvalue = {"prefix": "Year: "},
                pad = {"t": 1}
            )]
        )
fig = go.Figure(data=data_slider, layout=layout)
py(fig)

<h2>2 - Gross domestic product (GDP) and extreme poverty</h2>
<h3>2.1 - GDP per capita and extreme poverty by region</h3>
<ul>
    <li>How has the GDP per capita evolved compared with extreme poverty?</li>
</ul>

In [12]:
hbc_path = path.join(social_env_path, 'Health', 'API_8_DS2_en_csv_v2_10576655.csv')
health_by_country = createWBDataFrame(hbc_path)
health_by_country = tideWBDataFrame(health_by_country)
health_by_country = keepMetrics(health_by_country, ['SH.DYN.MORT', 'SH.DYN.AIDS.ZS', 'SH.TBS.INCD', 'SH.DTH.COMM.ZS', 'SN.ITK.DEFC.ZS', 'SP.DYN.TFRT.IN', 'SH.XPD.CHEX.GD.ZS'])
health_by_country.rename(columns={'SH.DYN.MORT': 'Mortality Rate Under 5(per 1.000 births)', 'SH.DYN.AIDS.ZS':'HIV - 15 to 49 (%)', 'SH.TBS.INCD': 'Inc. Tuberculosis(per 100.000)', 'SH.DTH.COMM.ZS': 'Death by Infectious Disease(5 of total)', 'SN.ITK.DEFC.ZS':'Undernourishment(% pop)', 'SP.DYN.TFRT.IN': 'Total Births per Women', 'SH.XPD.CHEX.GD.ZS': 'Expenditure on Health(% of GDP)'}, inplace=True)
health_by_country = health_by_country[health_by_country['Year'] >= '1990']

ulbc_path = path.join(social_env_path, 'Labour', 'API_10_DS2_en_csv_v2_10597117.csv')
underage_labour_by_country = createWBDataFrame(ulbc_path)
underage_labour_by_country = tideWBDataFrame(underage_labour_by_country)
underage_labour_by_country = keepMetrics(underage_labour_by_country, ['SL.TLF.0714.ZS', 'SL.TLF.0714.WK.ZS', 'SL.TLF.0714.WK.TM', 'SL.UEM.NEET.ZS'])
underage_labour_by_country.rename(columns={'SL.TLF.0714.ZS': 'Children in employment 7-14 (% of children)', 'SL.TLF.0714.WK.ZS': 'Children in employment(Work only) - 7 to 14(% of chd in emp.)', 'SL.TLF.0714.WK.TM': 'Avg. Working Hours of work only children', 'SL.UEM.NEET.ZS': 'Not studying, not working (% of youth)'}, inplace=True)
underage_labour_by_country = underage_labour_by_country[underage_labour_by_country['Year'] >= '1990']

edbc_path = path.join(social_env_path, 'Education', 'API_4_DS2_en_csv_v2_10577018.csv')
education_by_country = createWBDataFrame(edbc_path)
education_by_country = tideWBDataFrame(education_by_country)
education_by_country = keepMetrics(education_by_country, ['SE.PRM.CMPT.ZS', 'SE.PRM.UNER.ZS', 'SE.PRM.ENRL.TC.ZS', 'SE.ADT.1524.LT.ZS', 'SE.XPD.TOTL.GD.ZS'])
education_by_country.rename(columns={'SE.PRM.CMPT.ZS': 'Primary Completion Rate (% of relevant age group)', 'SE.PRM.UNER.ZS': 'Children out of School(% primary school)',  'SE.PRM.ENRL.TC.ZS': 'Pupil Teacher ratio', 'SE.ADT.1524.LT.ZS': 'Youth Literacy Rate 15-24 (% pop)', 'SE.XPD.TOTL.GD.ZS': 'Expenditure on Education (% of GDP)'}, inplace=True)
education_by_country = education_by_country[education_by_country['Year'] >= '1990']

ecbc_path = path.join(social_env_path, 'Economics', 'API_3_DS2_en_csv_v2_10577958.csv')
economics_by_country = createWBDataFrame(ecbc_path)
economics_by_country = tideWBDataFrame(economics_by_country)
economics_by_country = keepMetrics(economics_by_country, ['NY.GDP.PCAP.CD', 'NY.GDP.MKTP.KD.ZG', 'FP.CPI.TOTL.ZG', 'DT.ODA.ODAT.PC.ZS'])
economics_by_country.rename(columns={'NY.GDP.PCAP.CD': 'GDP per Capita(US$)', 'NY.GDP.MKTP.KD.ZG': 'GDP Growth(%)', 'FP.CPI.TOTL.ZG': 'Inflation Rate(%)', 'DT.ODA.ODAT.PC.ZS': 'ODA per capita(US$)'}, inplace=True)
economics_by_country = economics_by_country[economics_by_country['Year'] >= '1990']
economics_by_country['text'] = ["{one}<br>GDP Growth: {two}% <br>GDP per capita: US${three} <br>Inflation Rate: {four}% <br>ODA per capita: US${five}".format(one=row['Country Name'], two=np.round(row['GDP Growth(%)'],2), three=np.round(row['GDP per Capita(US$)'],2), four=np.round(row['Inflation Rate(%)'],2), five=np.round(row['ODA per capita(US$)'],2)) for index, row in economics_by_country.iterrows()] 

envbc_path = path.join(social_env_path, 'Environment', 'API_19_DS2_en_csv_v2_10515758.csv')
environment_by_country = createWBDataFrame(envbc_path)
environment_by_country = tideWBDataFrame(environment_by_country)
environment_by_country = keepMetrics(environment_by_country, ['AG.LND.PRCP.MM','EN.CLC.MDAT.ZS','AG.LND.ARBL.ZS','AG.LND.IRIG.AG.ZS','NV.AGR.TOTL.ZS','AG.YLD.CREL.KG', 'EG.ELC.ACCS.ZS'])
environment_by_country.rename(columns={'AG.LND.PRCP.MM': 'Avg precipitation(mm)','EN.CLC.MDAT.ZS': '% Pop affected by extreme conditions(Droughts, Flood, high temps)','AG.LND.ARBL.ZS': '% Arable Land','AG.LND.IRIG.AG.ZS': '% Agric. Irrigated Land','NV.AGR.TOTL.ZS': 'Agric., Florestry and Fishing(% of GDP)','AG.YLD.CREL.KG': 'Cereal Yield(Kg/hectare)','EG.ELC.ACCS.ZS': 'Access Electricity(% pop)'}, inplace=True)
environment_by_country = environment_by_country[environment_by_country['Year'] >= '1990']

In [13]:
econ_per_capita = economics_by_country.loc[:, ['Year', 'Region', 'Country Code', 'Country Name', 'GDP per Capita(US$)', 'ODA per capita(US$)']]
econ_per_capita_by_region = econ_per_capita.groupby(['Year', 'Region'], as_index=False).agg({'GDP per Capita(US$)': np.mean, 'ODA per capita(US$)':np.mean})
econ_per_capita_by_region['GDP per Capita(US$)'] = np.round(econ_per_capita_by_region['GDP per Capita(US$)'], 2)
econ_per_capita_by_region['ODA per capita(US$)'] = np.round(econ_per_capita_by_region['ODA per capita(US$)'], 2)

grouped_econ_ext_poverty = pd.merge(extreme_poverty_by_region, econ_per_capita_by_region, on=['Year', 'Region'])
grouped_econ_ext_poverty['Text'] = ['GDP: {} USD<br>Pop. Ext. Poverty: {} %'.format(
                                        row['GDP per Capita(US$)'], 
                                        row['Mean Pop. in Extreme Poverty']
                                    ) for index, row in grouped_econ_ext_poverty.iterrows()]

In [14]:
grouped_econ_ext_poverty.sort_values(by=['Region', 'Year'], inplace=True)

fig = go.Figure()
fig = tools.make_subplots(
        rows=4, 
        cols=2, 
        subplot_titles=[reg for reg in grouped_econ_ext_poverty['Region'].unique()],
        vertical_spacing=0.15,
        horizontal_spacing=0.20,
        print_grid=False)

perc_pop_ext_poverty = []
gdp_per_capita = []
for region in grouped_econ_ext_poverty['Region'].unique():
    g_econ_ext_pov_region = grouped_econ_ext_poverty[grouped_econ_ext_poverty['Region'] == region]
    
    gdp_per_capita.append(
        go.Scatter(
            x=g_econ_ext_pov_region['Year'], 
            y=g_econ_ext_pov_region['GDP per Capita(US$)'], 
            name='GDP per capita (US$)', 
            mode='lines',
            showlegend=False,
            marker=go.scatter.Marker(color='Orange')
        )
    )
    
    perc_pop_ext_poverty.append(
        go.Scatter(
            x=g_econ_ext_pov_region['Year'], 
            y=g_econ_ext_pov_region['Mean Pop. in Extreme Poverty'], 
            name='Extreme poverty pop. (%)', 
            mode='lines',
            showlegend=False,
            marker=go.scatter.Marker(color='Blue')
        )
    )

fig['layout'].update(
    go.Layout(
        height=1000,
        width=950,
        title='GDP per capita by population living in extreme poverty',
        xaxis1= go.layout.XAxis(title='Year', ticklen=5),
        xaxis2= go.layout.XAxis(title='Year', ticklen=5),
        xaxis3= go.layout.XAxis(title='Year', ticklen=5),
        xaxis4= go.layout.XAxis(title='Year', ticklen=5),
        xaxis5= go.layout.XAxis(title='Year', ticklen=5),
        xaxis6= go.layout.XAxis(title='Year', ticklen=5),
        xaxis7= go.layout.XAxis(title='Year', ticklen=5),
        yaxis1= go.layout.YAxis(title='Pop. in ext. poverty (%)', range=[0,65]),
        yaxis2= go.layout.YAxis(title='Pop. in ext. poverty (%)', range=[0,10]),
        yaxis3= go.layout.YAxis(title='Pop. in ext. poverty (%)', range=[0,20]),
        yaxis4= go.layout.YAxis(title='Pop. in ext. poverty (%)', range=[0,10]),
        yaxis5= go.layout.YAxis(title='Pop. in ext. poverty (%)', range=[0,2]),
        yaxis6= go.layout.YAxis(title='Pop. in ext. poverty (%)', range=[0,60]),
        yaxis7= go.layout.YAxis(title='Pop. in ext. poverty (%)', range=[0,60]),
        yaxis8= go.layout.YAxis(title='GDP per capita', overlaying='y1', anchor='x1', side='right', range=[0, 20000], showgrid=False),
        yaxis9= go.layout.YAxis(title='GDP per capita', overlaying='y2', anchor='x2', side='right', range=[0, 40000], showgrid=False),
        yaxis10=go.layout.YAxis(title='GDP per capita', overlaying='y3', anchor='x3', side='right', range=[0, 15000], showgrid=False),
        yaxis11=go.layout.YAxis(title='GDP per capita', overlaying='y4', anchor='x4', side='right', range=[0, 20000], showgrid=False),
        yaxis12=go.layout.YAxis(title='GDP per capita', overlaying='y5', anchor='x5', side='right', range=[0, 80000], showgrid=False),
        yaxis13=go.layout.YAxis(title='GDP per capita', overlaying='y6', anchor='x6', side='right', range=[0, 3000], showgrid=False),
        yaxis14=go.layout.YAxis(title='GDP per capita', overlaying='y7', anchor='x7', side='right', range=[0, 3000], showgrid=False),
        margin=dict(t=200),
        legend=dict(x=0.35, y=1.12),
    )
)

for i in range(7):
    col = i%2 + 1
    row = i//2 + 1
    if i == 0:
        perc_pop_ext_poverty[i]['showlegend'] = True
        gdp_per_capita[i]['showlegend'] = True
        
    fig.append_trace(perc_pop_ext_poverty[i], row, col)
    fig.append_trace(gdp_per_capita[i], row, col)

for i in range(7):
    fig['data'][2*i+1].update(yaxis='y{one}'.format(one=8+i))

py(fig, filename='Small Multiples: GDP per capita and Extreme Poverty Pop.(%)')

<h3>2.1 - GDP expenditure on education and health by region</h3>
<ul>
    <li>How large are the investments on health and education by region?</li>
</ul>

In [15]:
ed_expenditure = education_by_country.loc[:, ['Year', 'Region', 'Country Code', 'Country Name', 'Expenditure on Education (% of GDP)']]
health_expenditure = health_by_country.loc[:, ['Year', 'Region', 'Country Code', 'Country Name', 'Expenditure on Health(% of GDP)']]
econ_growth = economics_by_country.loc[:, ['Year', 'Region', 'Country Code', 'Country Name', 'GDP Growth(%)']]

grouped_ed_health_exp = pd.merge(ed_expenditure, health_expenditure, on=['Year', 'Region', 'Country Code', 'Country Name'])
grouped_ed_health_econ_exp = pd.merge(grouped_ed_health_exp, econ_growth, on=['Year', 'Region', 'Country Code', 'Country Name'])

grouped_ed_health_econ_exp = grouped_ed_health_econ_exp.groupby(['Year', 'Region'], as_index=False).agg({'Expenditure on Education (% of GDP)':'mean', 'Expenditure on Health(% of GDP)': 'mean', 'GDP Growth(%)': 'mean'})
grouped_ed_health_econ_exp = grouped_ed_health_econ_exp[grouped_ed_health_econ_exp['Year'] > '1999']

In [16]:
id_region = dict({'South Asia': 0, 'Sub-Saharan Africa': 0.5, 'Latin America & Caribbean': 1})

education_by_country_2000_2016 = education_by_country[(education_by_country['Year'].isin(['2000','2016'])) & (education_by_country['Region'].isin(['Latin America & Caribbean', 'South Asia', 'Sub-Saharan Africa']))]
#education_by_country_2016.dropna(subset=['Primary Completion Rate (% of relevant age group)', 'Children out of School(% primary school)', 'Pupil Teacher ratio'], inplace=True)

health_by_country_2000_2016 = health_by_country[(health_by_country['Year'].isin(['2000','2016'])) & (health_by_country['Region'].isin(['Latin America & Caribbean', 'South Asia', 'Sub-Saharan Africa']))]
#health_by_country_2016.dropna(subset=['Mortality Rate Under 5(per 1.000 births)', 'HIV - 15 to 49 (%)', 'Inc. Tuberculosis(per 100.000)', 'Undernourishment(% pop)'], inplace=True)

education_by_country_2000_2016['id_region'] = [id_region[region] for region in education_by_country_2000_2016['Region']]
health_by_country_2000_2016['id_region'] = [id_region[region] for region in health_by_country_2000_2016['Region']]

In [17]:
fig = go.Figure()
fig = tools.make_subplots(
        rows=4, 
        cols=2, 
        subplot_titles=[reg for reg in grouped_ed_health_econ_exp['Region'].unique()],
        vertical_spacing=0.15,
        print_grid=False)

exp_education = []
exp_health = []
gdp_growth = []
for reg in grouped_ed_health_econ_exp['Region'].unique():
    reg_grouped_ed_health_econ_exp = grouped_ed_health_econ_exp[grouped_ed_health_econ_exp['Region'] == reg]

    reg_exp_education = go.Bar(
            x=reg_grouped_ed_health_econ_exp['Year'], 
            y=reg_grouped_ed_health_econ_exp['Expenditure on Education (% of GDP)'], 
            name='Education (% of GDP)',
            showlegend=False,
            marker=go.bar.Marker(
                color='Blue'))

    reg_exp_health = go.Bar(
            x=reg_grouped_ed_health_econ_exp['Year'], 
            y=reg_grouped_ed_health_econ_exp['Expenditure on Health(% of GDP)'], 
            name='Health (% of GDP)',
            showlegend=False,
            marker=go.bar.Marker(
                color='Orange'))

    reg_gdp_growth = go.Scatter(
            x=reg_grouped_ed_health_econ_exp['Year'], 
            y=reg_grouped_ed_health_econ_exp['GDP Growth(%)'], 
            name='GDP Growth (%)', 
            showlegend=False,
            mode='lines',
            marker=go.scatter.Marker(
                color='Green'))

    exp_education.append(reg_exp_education)
    exp_health.append(reg_exp_health)
    gdp_growth.append(reg_gdp_growth)

for i in range(8):
    col = i%2 + 1
    row = i//2 + 1
    
    exp_education[0]['showlegend'] = True
    exp_health[0]['showlegend'] = True
    gdp_growth[0]['showlegend'] = True
    
    if i < 7:
        fig.append_trace(exp_education[i], row, col)
        fig.append_trace(exp_health[i], row, col)
        fig.append_trace(gdp_growth[i], row, col)

fig['layout'].update(go.Layout(
                        title='Health and education expenditures compared to GDP growth',
                        height=1100,
                        width=950,
                        barmode='stack',
                        margin=dict(t=200), 
                        legend=dict(x=0.38, y=1.12),
    
                        xaxis1= go.layout.XAxis(title='Year'),
                        xaxis2= go.layout.XAxis(title='Year'),
                        xaxis3= go.layout.XAxis(title='Year'),
                        xaxis4= go.layout.XAxis(title='Year'),
                        xaxis5= go.layout.XAxis(title='Year'),
                        xaxis6= go.layout.XAxis(title='Year'),
                        xaxis7= go.layout.XAxis(title='Year')
                        )
                    )        
              
py(fig)

<h3>2.2 - Health and education related issues on poorest regions</h3>
<h4>2.2.1 - Evolution of educational indicators (2000 vs 2016)</h4>
<ul>
    <li>Do the indicators comparison point out improvements on the last fifteen years?</li>
</ul>

In [18]:
education_by_country_2000 = education_by_country_2000_2016[education_by_country_2000_2016['Year'] == '2000']

data = [
    go.Parcoords(
        name='2000 indicators',
                
        ids=education_by_country_2000['Country Name'],
        line=dict(color=education_by_country_2000['id_region'],
                 colorscale=[[0,'#D7C16B'],[0.5,'#23D8C3'],[1,'#F3F10F']]),
        
        dimensions = list([
            dict(range=[0,150],
                 label='Primary Completion Rate(%)', 
                 values=np.round(education_by_country_2000['Primary Completion Rate (% of relevant age group)'], 2)),
            
            dict(range=[0,100],
                 label='Children out of School(%)', 
                 values=np.round(education_by_country_2000['Children out of School(% primary school)'], 2)),
            
            dict(range=[0,150],
                 label='Pupil Teacher Ratio', 
                 values=np.round(education_by_country_2000['Pupil Teacher ratio']))
        ])
    )
]

layout = go.Layout(
    title='Educational indicators (2000)',
    height=500,
    width=950,
    margin=dict(t=150)
)

fig = go.Figure(
    data=data,
    layout=layout
)

py(fig)

In [19]:
education_by_country_2016 = education_by_country_2000_2016[education_by_country_2000_2016['Year'] == '2016']

data = [
    go.Parcoords(
        name='2016 indicators',        
        ids=education_by_country_2016['Country Name'],
        line=dict(color=education_by_country_2016['id_region'],
                  colorscale=[[0,'#D7C16B'],[0.5,'#23D8C3'],[1,'#F3F10F']]),
        
        dimensions = list([
            dict(range=[0,150],
                 label='Primary Completion Rate(%)', 
                 values=np.round(education_by_country_2016['Primary Completion Rate (% of relevant age group)'], 2)),
            
            dict(range=[0,100],
                 label='Children out of School(%)', 
                 values=np.round(education_by_country_2016['Children out of School(% primary school)'], 2)),
            
            dict(range=[0,150],
                 label='Pupil Teacher Ratio', 
                 values=np.round(education_by_country_2016['Pupil Teacher ratio']))
        ])
    )
]

layout = go.Layout(
    title='Educational indicators (2016)',
    height=500,
    width=950,
    margin=dict(t=150)
)

fig = go.Figure(
    data=data,
    layout=layout
)

py(fig)

<h4>2.2.2 - Evolution of health indicators (2000 vs 2016)</h4>
<ul>
    <li>Do the indicators comparison point out improvements on the last fifteen years?</li>
</ul>

In [20]:
health_by_country_2000 = health_by_country_2000_2016[health_by_country_2000_2016['Year'] == '2000']

data = [
    go.Parcoords(
        name='2000 indicators',
        ids=health_by_country_2000['Country Name'],
        line=dict(color=health_by_country_2000['id_region'],
                 colorscale=[[0,'#D7C16B'],[0.5,'#23D8C3'],[1,'#F3F10F']]),
        
        dimensions = list([
            dict(range=[0,300],
                 label='Mortality Rate Under 5(per 1.000 births)', 
                 values=health_by_country_2000['Mortality Rate Under 5(per 1.000 births)']),
            
            dict(range=[0,100],
                 label='Undernourishment(%)', 
                 values=np.round(health_by_country_2000['Undernourishment(% pop)'], 2)),
            
            dict(range=[0,100],
                 label='HIV - 15 to 49 (%)', 
                 values=np.round(health_by_country_2000['HIV - 15 to 49 (%)'], 2)),
            
            dict(range=[0,1000],
                 label='Inc. Tuberculosis(per 100.000)', 
                 values=health_by_country_2000['Inc. Tuberculosis(per 100.000)'])
        ])
    )
]

layout = go.Layout(
    title='Health indicators (2000)',
    height=500,
    width=950,
    margin=dict(t=150)
)

fig = go.Figure(
    data=data,
    layout=layout
)

py(fig)

In [21]:
health_by_country_2016 = health_by_country_2000_2016[health_by_country_2000_2016['Year'] == '2016']  

data = [
    go.Parcoords(
        name='2016 indicators',        
        ids=health_by_country_2016['Country Name'],
        line=dict(color=health_by_country_2016['id_region'],
                  colorscale=[[0,'#D7C16B'],[0.5,'#23D8C3'],[1,'#F3F10F']]),
        
        dimensions = list([
            dict(range=[0,300],
                 label='Mortality Rate Under 5(per 1.000 births)', 
                 values=health_by_country_2016['Mortality Rate Under 5(per 1.000 births)']),
            
            dict(range=[0,100],
                 label='Undernourishment(%)', 
                 values=np.round(health_by_country_2016['Undernourishment(% pop)'], 2)),
            
            dict(range=[0,100],
                 label='HIV - 15 to 49 (%)', 
                 values=np.round(health_by_country_2016['HIV - 15 to 49 (%)'], 2)),
            
            dict(range=[0,1000],
                 label='Inc. Tuberculosis(per 100.000)', 
                 values=health_by_country_2016['Inc. Tuberculosis(per 100.000)'])
        ])
    )
]

layout = go.Layout(
    title='Health indicators (2016)',
    height=500,
    width=950,
    margin=dict(t=150)
)

fig = go.Figure(
    data=data,
    layout=layout
)

py(fig)

<h3>2.3 - Official development assistance (ODA) and extreme poverty</h3>
<ul>
    <li>Have the most critical countries received largest shares of financial assistance?</li>
</ul>

In [22]:
oda_gdp_ext_poverty = pd.merge(econ_per_capita, extreme_poverty_by_country, how='inner', on=['Country Name', 'Country Code', 'Region', 'Year'])
oda_gdp_ext_poverty.dropna(inplace=True)

oda_gdp_ext_poverty['text'] = [
    "{one}<br>Extreme Poverty: {two} %<br>Extreme Poverty: {three} millions<br>GDP per Capita: US${four}<br>ODA per Capita: US${five}<br>Last obtained: {six}"
    .format(one=row['Country Name'], 
            two=row['Extreme Poverty(% pop)'], 
            three=np.round(row['Total Pop. in Extreme Poverty']/10**6, 3),
            four=np.round(row['GDP per Capita(US$)'], 2),
            five=np.round(row['ODA per capita(US$)'], 2),
            six=row['last_obs_year']) for index, row in oda_gdp_ext_poverty.iterrows()]

oda_gdp_ext_poverty = oda_gdp_ext_poverty[oda_gdp_ext_poverty['ODA per capita(US$)']>1]
oda_gdp_ext_poverty.sort_values(by=['Year'], inplace=True)

oda_gdp_ext_poverty_2000_2015 = oda_gdp_ext_poverty[oda_gdp_ext_poverty['Year'].isin(['2000', '2005', '2010', '2015'])]

In [23]:
fig = go.Figure()
fig = tools.make_subplots(
        rows=2, 
        cols=2, 
        subplot_titles=oda_gdp_ext_poverty_2000_2015['Year'].unique(),
        vertical_spacing=0.15,
        print_grid=False)

bubbles = []
for year in oda_gdp_ext_poverty_2000_2015['Year'].unique():
    yearly_oda_gdp_ext_pov = oda_gdp_ext_poverty_2000_2015[oda_gdp_ext_poverty_2000_2015['Year'] == year]
    
    year_bubble = go.Scatter(
        x=yearly_oda_gdp_ext_pov['Extreme Poverty(% pop)'],
        y=yearly_oda_gdp_ext_pov['GDP per Capita(US$)'],
        mode='markers',
        text=yearly_oda_gdp_ext_pov['text'],
        marker=dict(
            symbol='circle',
            sizemode='area',
            size=yearly_oda_gdp_ext_pov['ODA per capita(US$)'],
            line=dict(
                width=2,
                
            ),
        ),
        showlegend=False,
        name='ODA per capita (US$)'
    )
    bubbles.append(year_bubble)

for i in range(4):
    col = i%2 + 1
    row = i//2 + 1
    bubbles[0].showlegend=True
#     a = bubbles[0]
#     print(type(a.))
    fig.append_trace(bubbles[i], row, col)

    
fig['layout'].update(
    go.Layout(
        title='ODA per capita by extreme poverty population and GDP per capita',
        width=950,
        height=1000,
        xaxis1=go.layout.XAxis(title='Pop. in Extreme Poverty (%)', range=[0, 100], zerolinewidth=1, ticklen=5, gridwidth=2),
        xaxis2=go.layout.XAxis(title='Pop. in Extreme Poverty (%)', range=[0, 100], zerolinewidth=1, ticklen=5, gridwidth=2),
        xaxis3=go.layout.XAxis(title='Pop. in Extreme Poverty (%)', range=[0, 100], zerolinewidth=1, ticklen=5, gridwidth=2),
        xaxis4=go.layout.XAxis(title='Pop. in Extreme Poverty (%)', range=[0, 100], zerolinewidth=1, ticklen=5, gridwidth=2),

        yaxis1=go.layout.YAxis(title='GDP per Capita (US$)', range=[0, 16000], ticklen=5,gridwidth=2),
        yaxis2=go.layout.YAxis(title='GDP per Capita (US$)', range=[0, 16000], ticklen=5,gridwidth=2),
        yaxis3=go.layout.YAxis(title='GDP per Capita (US$)', range=[0, 16000], ticklen=5,gridwidth=2),
        yaxis4=go.layout.YAxis(title='GDP per Capita (US$)', range=[0, 16000], ticklen=5,gridwidth=2),
        margin=dict(t=200),
        legend=dict(x=0.38, y=1.12)
    )
)

py(fig)

<h2>3 - South America countries and extreme poverty eradication</h2>
<p>We chose four countries in South America to compare indicators related to educational investments and mortality rates with extreme poverty</p>

<h3>3.1 - GDP expenditure in education</h3>
<ul>
  <li>How have these countries been investing in education?</li>
</ul>

In [24]:
# List of SA countries
sa_countries = ['Argentina', 'Bolivia', 'Brazil', 'Chile', 'Colombia', 'Ecuador', 'Guyana', 'Paraguay', 'Peru', 'Uruguay', 'Venezuela, RB', 'Suriname']

In [25]:
extreme_poverty_by_country = createWBDataFrame(path.join(social_env_path, 'Environment', 'API_19_DS2_en_csv_v2_10515758.csv'))
extreme_poverty_by_country = tideWBDataFrame(extreme_poverty_by_country)

# Limiting data for the last 30 years
extreme_poverty_by_country = extreme_poverty_by_country[(extreme_poverty_by_country['Year'] >= '2000') & (extreme_poverty_by_country['Year'] < '2018')]
extreme_poverty_by_country = keepMetrics(extreme_poverty_by_country, ['SI.POV.DDAY','SP.POP.TOTL'])

# Renaming observation columns
extreme_poverty_by_country.rename(columns={'SI.POV.DDAY': 'Extreme Poverty(% pop)','SP.POP.TOTL': 'Total Pop.'}, inplace=True)
extreme_poverty_by_country = extreme_poverty_by_country[extreme_poverty_by_country['Region'] == 'Latin America & Caribbean']
extreme_poverty_by_country = extreme_poverty_by_country[extreme_poverty_by_country['Country Name'].isin(sa_countries)]

In [26]:
extreme_poverty_by_country = createWBDataFrame(path.join(social_env_path, 'Environment', 'API_19_DS2_en_csv_v2_10515758.csv'))
extreme_poverty_by_country = tideWBDataFrame(extreme_poverty_by_country)

# Limiting data for the last 30 years
extreme_poverty_by_country = extreme_poverty_by_country[(extreme_poverty_by_country['Year'] >= '2000') & (extreme_poverty_by_country['Year'] < '2018')]
extreme_poverty_by_country = keepMetrics(extreme_poverty_by_country, ['SI.POV.DDAY','SP.POP.TOTL'])

# Renaming observation columns
extreme_poverty_by_country.rename(columns={'SI.POV.DDAY': 'Extreme Poverty(% pop)','SP.POP.TOTL': 'Total Pop.'}, inplace=True)
## Educationextreme_poverty_by_country = extreme_poverty_by_country[extreme_poverty_by_country['Region'] == 'Latin America & Caribbean']
extreme_poverty_by_country = extreme_poverty_by_country[extreme_poverty_by_country['Country Name'].isin(sa_countries)]

In [27]:
## Education
edbc_path = path.join(social_env_path, 'Education', 'API_4_DS2_en_csv_v2_10577018.csv')
education_by_country = createWBDataFrame(edbc_path)
education_by_country = tideWBDataFrame(education_by_country)
education_by_country = keepMetrics(education_by_country, ['SE.PRM.CMPT.ZS', 
                                                          'SE.XPD.TOTL.GD.ZS',
                                                          'SE.XPD.PRIM.PC.ZS',
                                                          'SE.XPD.SECO.PC.ZS',
                                                          'SE.XPD.TERT.PC.ZS',
                                                          'SE.PRM.UNER.ZS',
                                                          'SE.PRM.ENRL.TC.ZS', 
                                                          'SE.ADT.1524.LT.ZS', 
                                                          ])

education_by_country.rename(columns={'SE.PRM.CMPT.ZS': 'Primary Completion Rate (% of relevant age group)', 
                                     'SE.XPD.PRIM.PC.ZS': 'Government expenditure per student, primary (% of GDP)',
                                     'SE.XPD.SECO.PC.ZS': 'Government expenditure per student, secondary (% of GDP)',
                                     'SE.XPD.TERT.PC.ZS': 'Government expenditure per student, tertiary (% of GDP)',
                                     'SE.PRM.UNER.ZS': 'Children out of School(% primary school)',  
                                     'SE.PRM.ENRL.TC.ZS': 'Pupil Teacher ratio', 
                                     'SE.ADT.1524.LT.ZS': 'Youth Literacy Rate 15-24 (% pop)', 
                                     'SE.XPD.TOTL.GD.ZS': 'Expenditure on Education (% of GDP)'}, inplace=True)
education_by_country = education_by_country[(education_by_country['Year'] >= '2000') & (education_by_country['Year'] < '2018')]

In [28]:
selected_sa_countries = ['Argentina', 'Brazil', 'Chile', 'Colombia']
education_by_country_selected = education_by_country[education_by_country['Country Name'].isin(selected_sa_countries)]

In [29]:
layout = go.Layout( 
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.arange(2000, 2018, step=1),
        tickangle=45,
        title='Year',
        ticklen=5
    ),
    yaxis = go.layout.YAxis(
        ticksuffix='% ',
        title='% of GDP',
        range=[0, 7]
    ),
    title='Expenditure in education (2000-2017)'
)

fig = go.Figure(
    layout=layout
)
        
for country in education_by_country_selected['Country Name'].unique():
    ext_pov_region = education_by_country_selected[education_by_country_selected['Country Name'] == country]
    x = ext_pov_region['Year']
    y = ext_pov_region['Expenditure on Education (% of GDP)'].fillna(method='ffill')
    fig.add_scatter(x=x, y=y, name=country, mode='lines')
    
py(fig, filename='gdp-edu-line')

In [30]:
fig = tools.make_subplots(
        rows=2, 
        cols=2, 
        subplot_titles=[str(c) for c in education_by_country_selected['Country Name'].unique()],
        vertical_spacing=0.35,
        horizontal_spacing=0.20,
        print_grid=False)

exp_education = []
ext_pov = []

for country_name in education_by_country_selected['Country Name'].unique():
    
    edu_by_region = education_by_country_selected[education_by_country_selected['Country Name'] == country_name]
    ext_pov_by_region = extreme_poverty_by_country[extreme_poverty_by_country['Country Name'] == country_name]
   
    trace1 = go.Scatter(
        x=ext_pov_region['Year'],
        y=ext_pov_by_region['Extreme Poverty(% pop)'].fillna(method='ffill'),
        name='Extreme poverty',
        mode='lines',
        showlegend=False,
        marker=go.scatter.Marker(color='darkorange'),
        yaxis='y2',
        hoverinfo='y',
    )
    
    trace2 = go.Scatter(
        x=ext_pov_region['Year'],
        y=edu_by_region['Expenditure on Education (% of GDP)'].fillna(method='ffill'),
        name='Expenditure in education',
        mode='lines',
        showlegend=False,
        marker=go.scatter.Marker(color='Blue'),
        hoverinfo='y'
    )

    
    exp_education.append(trace1)
    ext_pov.append(trace2)

for i in range(4):
    col = i % 2 + 1
    row = i // 2 + 1
    
    exp_education[0]['showlegend'] = True
    ext_pov[0]['showlegend'] = True
    
    if i < 4:
        fig.append_trace(exp_education[i], row, col)
        fig.append_trace(ext_pov[i], row, col)
        
for i in range(4):
    fig['data'][2*i+1].update(yaxis='y{one}'.format(one=5+i)) 

fig['layout'].update(go.Layout(
                        title='Expenditure in education by extreme poverty',
                        height=650,
                        width=950,
    
                        xaxis1 = go.layout.XAxis(title='Year', tickmode='array', tickvals=np.arange(2000, 2018, step=5), ticklen=5),
                        xaxis2 = go.layout.XAxis(title='Year', tickmode='array', tickvals=np.arange(2000, 2018, step=5), ticklen=5),
                        xaxis3 = go.layout.XAxis(title='Year', tickmode='array', tickvals=np.arange(2000, 2018, step=5), ticklen=5),
                        xaxis4 = go.layout.XAxis(title='Year', tickmode='array', tickvals=np.arange(2000, 2018, step=5), ticklen=5),
                        
                        yaxis1 = go.layout.YAxis(title='Pop. in extreme poverty (%)', range=[0, 15]),
                        yaxis2 = go.layout.YAxis(title='Pop. in extreme poverty (%)', range=[0, 30]),
                        yaxis3 = go.layout.YAxis(title='Pop. in extreme poverty (%)', range=[0, 15]),
                        yaxis4 = go.layout.YAxis(title='Pop. in extreme poverty (%)', range=[0, 20]),
                        
                        yaxis5 = go.layout.YAxis(title='GDP (%)', overlaying='y1', anchor='x1', side='right', showgrid=False, range=[0,8]),
                        yaxis6 = go.layout.YAxis(title='GDP (%)', overlaying='y2', anchor='x2', side='right', showgrid=False, range=[0,8]),
                        yaxis7 = go.layout.YAxis(title='GDP (%)', overlaying='y3', anchor='x3', side='right', showgrid=False, range=[0,8]),
                        yaxis8 = go.layout.YAxis(title='GDP (%)', overlaying='y4', anchor='x4', side='right', showgrid=False, range=[0,8]),
    
                        margin=dict(t=200),
                        legend=dict(x=0.35, y=1.25),
                        
                        )
                    )

py(fig)

<h3> 3.2 - Segmentation of educational resources</h3>
<ul>
  <li>How are the educational resources distributed from primary to tertiary?</li>
</ul>

In [31]:
x_years = ['2009', '2011', '2013', '2015']
education_by_country_selected = education_by_country_selected[education_by_country_selected['Year'].isin(x_years)]

In [32]:
# years = 2011, 2013, 2015
resources_dist_brazil = education_by_country[education_by_country['Country Name'] == 'Brazil']
resources_dist_brazil = resources_dist_brazil[(resources_dist_brazil['Year'] == '2009') | (resources_dist_brazil['Year'] == '2011') | (resources_dist_brazil['Year'] == '2013') | (resources_dist_brazil['Year'] == '2015')]

In [33]:
fig = tools.make_subplots(
        rows=2, 
        cols=2, 
        subplot_titles=[str(c) for c in education_by_country_selected['Country Name'].unique()],
        vertical_spacing=0.25,
        horizontal_spacing=0.20,
        print_grid=False)

primary = []
secondary = []
tertiary = []

for country_name in education_by_country_selected['Country Name'].unique():
    
    edu_by_region = education_by_country_selected[education_by_country_selected['Country Name'] == country_name]
    
    
    trace1 = go.Bar(
        x=x_years,
        y=edu_by_region['Government expenditure per student, primary (% of GDP)'],
        name='Primary',
        showlegend=False,
        marker=go.bar.Marker(color='Blue'),
    )
    
    trace2 = go.Bar(
        x=x_years,
        y=edu_by_region['Government expenditure per student, secondary (% of GDP)'],
        name='Secondary',
        showlegend=False,
        marker=go.bar.Marker(color='darkorange'),
    )

    trace3 = go.Bar(
        x=x_years,
        y=edu_by_region['Government expenditure per student, tertiary (% of GDP)'],
        name='Tertiary',
        showlegend=False,
        marker=go.bar.Marker(color='Green'),

    )
    
    primary.append(trace1)
    secondary.append(trace2)
    tertiary.append(trace3)

for i in range(4):
    col = i % 2 + 1
    row = i // 2 + 1
    
    primary[0]['showlegend'] = True
    secondary[0]['showlegend'] = True
    tertiary[0]['showlegend'] = True
    
    if i < 4:
        fig.append_trace(primary[i], row, col)
        fig.append_trace(secondary[i], row, col)
        fig.append_trace(tertiary[i], row, col)

fig['layout'].update(go.Layout(
                        title='Investiment by educational level',
                        height=600,
                        width=950,
    
                        xaxis1 = go.layout.XAxis(title='Year', tickmode='array', ticklen=5),
                        xaxis2 = go.layout.XAxis(title='Year', tickmode='array', ticklen=5),
                        xaxis3 = go.layout.XAxis(title='Year', tickmode='array', ticklen=5),
                        xaxis4 = go.layout.XAxis(title='Year', tickmode='array', ticklen=5),
                        
                        yaxis1 = go.layout.YAxis(title='Investiment (%)', range=[0, 35]),
                        yaxis2 = go.layout.YAxis(title='Investiment (%)', range=[0, 35]),
                        yaxis3 = go.layout.YAxis(title='Investiment (%)', range=[0, 35]),
                        yaxis4 = go.layout.YAxis(title='Investiment (%)', range=[0, 35]),
                    
                        
                        )
                    )

py(fig)

<h3>3.3 - Infant mortality rate (under-5 years per 1,000 live births)</h3>
<ul>
  <li>What is the impact of extreme poverty on the infant mortality rate?</li>
</ul>

In [34]:
env_path = path.join(social_env_path, 'Environment', 'API_19_DS2_en_csv_v2_10515758.csv')
access_to_eletricity_by_country = createWBDataFrame(env_path)
access_to_eletricity_by_country = tideWBDataFrame(access_to_eletricity_by_country)

# Limiting data for the last 30 years
access_to_eletricity_by_country = access_to_eletricity_by_country[access_to_eletricity_by_country['Year'] > '1980']
access_to_eletricity_by_country = keepMetrics(access_to_eletricity_by_country, ['EG.ELC.ACCS.ZS', 'SH.DYN.MORT'])

# Renaming observation columns
access_to_eletricity_by_country.rename(columns={'EG.ELC.ACCS.ZS': 'Access to electricity (% of pop)',
                                                'SH.DYN.MORT': 'Mortality rate, under-5 (per 1,000 live births)'
                                               }, inplace=True)

access_to_eletricity_by_country = access_to_eletricity_by_country[(access_to_eletricity_by_country['Year'] >= '2000') & (access_to_eletricity_by_country['Year'] < '2018')]
access_to_eletricity_by_country = access_to_eletricity_by_country[access_to_eletricity_by_country['Region'] == 'Latin America & Caribbean']
access_to_eletricity_by_country = access_to_eletricity_by_country[access_to_eletricity_by_country['Country Name'].isin(selected_sa_countries)]

In [35]:
layout = go.Layout( 
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.arange(1990, 2018, step=1),
        tickangle=45,
        title='Year',
        ticklen=5
    ),
    yaxis = go.layout.YAxis(
        range=[0, 40],
        ticksuffix='% ',
#         title='Mortality rate (%)'
    ),
    title='Infant mortality rate (2000-2017)'
)

fig = go.Figure(
    layout=layout
)
        
for country in access_to_eletricity_by_country['Country Name'].unique():
    ate_region = access_to_eletricity_by_country[access_to_eletricity_by_country['Country Name'] == country]
    x = ate_region['Year']
    y = ate_region['Mortality rate, under-5 (per 1,000 live births)'].fillna(method='ffill')
    fig.add_scatter(x=x, y=y, name=country, mode='lines')
    
py(fig, filename='gdp-edu-line')

In [36]:
fig = tools.make_subplots(
        rows=2, 
        cols=2, 
        subplot_titles=[str(c) for c in access_to_eletricity_by_country['Country Name'].unique()],
        vertical_spacing=0.35,
        horizontal_spacing=0.20,
        print_grid=False)

mortality_rate = []
ext_pov = []

for country_name in access_to_eletricity_by_country['Country Name'].unique():
    
    mortal_by_region = access_to_eletricity_by_country[access_to_eletricity_by_country['Country Name'] == country_name]
    ext_pov_by_region = extreme_poverty_by_country[extreme_poverty_by_country['Country Name'] == country_name]
    
    trace1 = go.Scatter(
        x=ext_pov_region['Year'],
        y=ext_pov_by_region['Extreme Poverty(% pop)'].fillna(method='ffill'),
        name='Extreme poverty',
        mode='lines',
        showlegend=False,
        marker=go.scatter.Marker(color='darkorange'),
        yaxis='y2',
        hoverinfo='y',
    )

    trace2 = go.Scatter(
        x=ext_pov_region['Year'],
        y=mortal_by_region['Mortality rate, under-5 (per 1,000 live births)'].fillna(method='ffill'),
        name='Mortality rate',
        mode='lines',
        showlegend=False,
        marker=go.scatter.Marker(color='Blue'),
        hoverinfo='y'
    )
    
    mortality_rate.append(trace1)
    ext_pov.append(trace2)

for i in range(4):
    col = i % 2 + 1
    row = i // 2 + 1
    
    mortality_rate[0]['showlegend'] = True
    ext_pov[0]['showlegend'] = True

    fig.append_trace(mortality_rate[i], row, col)
    fig.append_trace(ext_pov[i], row, col)
        
for i in range(4):
    fig['data'][2*i+1].update(yaxis='y{one}'.format(one=5+i)) 

fig['layout'].update(go.Layout(
                        title=go.layout.Title(text='Infant mortality rate and extreme poverty'),
                        height=650,
                        width=950,
    
                        xaxis1 = go.layout.XAxis(title='Year', tickmode='array', tickvals=np.arange(2000, 2018, step=5), ticklen=5),
                        xaxis2 = go.layout.XAxis(title='Year', tickmode='array', tickvals=np.arange(2000, 2018, step=5), ticklen=5),
                        xaxis3 = go.layout.XAxis(title='Year', tickmode='array', tickvals=np.arange(2000, 2018, step=5), ticklen=5),
                        xaxis4 = go.layout.XAxis(title='Year', tickmode='array', tickvals=np.arange(2000, 2018, step=5), ticklen=5),
                        
                        yaxis1 = go.layout.YAxis(title='Pop. in extreme poverty (%)', range=[0, 15]),
                        yaxis2 = go.layout.YAxis(title='Pop. in extreme poverty', range=[0, 30]),
                        yaxis3 = go.layout.YAxis(title='Pop. in extreme poverty', range=[0, 15]),
                        yaxis4 = go.layout.YAxis(title='Pop. in extreme poverty', range=[0, 20]),
                        
                        yaxis5 = go.layout.YAxis(title='Mortality rate (%)', overlaying='y1', anchor='x1', side='right', showgrid=False, range=[0, 20]),
                        yaxis6 = go.layout.YAxis(title='Mortality rate (%)', overlaying='y2', anchor='x2', side='right', showgrid=False, range=[0, 40]),
                        yaxis7 = go.layout.YAxis(title='Mortality rate (%)', overlaying='y3', anchor='x3', side='right', showgrid=False, range=[0, 15]),
                        yaxis8 = go.layout.YAxis(title='Mortality rate (%)', overlaying='y4', anchor='x4', side='right', showgrid=False, range=[0, 40]),
    
                        margin=dict(t=200),
                        legend=dict(x=0.41, y=1.25),
                        
                        )
                    )

py(fig)

<h2>4 - The future of extreme poverty</h2>
<h4>How realistic is the goal of ending global extreme poverty by 2030?</h4><br>
<div>The following visualization shows the historical decline in global extreme poverty rates, from 1981 to 2015, and adds a projection for 2030 assuming: 
    <ol>
        <li> that the average growth rates of consumption in all countries will be the same as the average growth rates from the past 10 years in each country
        <li> that the dispersion in the distribution of consumption in each country will remain unchanged.
    </ol>
</div>

In [37]:
data_poverty_final = pd.read_csv(path.join(data_path, 'share-of-the-world-population-living-in-extreme-poverty.csv'), encoding='utf-8')

In [38]:
trace = go.Scatter(
    x=data_poverty_final['Year'],
    y=data_poverty_final['Share of World Population living in Absolute Poverty'],
    name='Number of people',
)

data = [trace]

layout = go.Layout(
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.concatenate((np.array([1981]), np.arange(1990, 2031, step=10)), axis=0),
        title='Year',
    ),
    yaxis = go.layout.YAxis(
        title='Population (%)',     
    ),
    title='World population living in extreme poverty (1981-2030)'
)

fig = go.Figure(
    data = data,
    layout = layout
)

py(fig)