# Worldwide Analysis on Extreme Poverty  

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
import math as math

import plotly.plotly as py
# import plotly 
# plotly.tools.set_credentials_file(username='jmattos', api_key='b5w4hhi6KXLdApb07r9s')

import plotly.graph_objs as go 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly import tools

from os import path

init_notebook_mode(connected=True)
%matplotlib inline
sns.set()

data_path = 'data'

### 1.1 - Total Population by Extreme Poverty Population

In [2]:
path.join(data_path, 'OUR_WORLD_IN_DATA', 'asd')

'data\\OUR_WORLD_IN_DATA\\asd'

In [3]:
data_poverty = pd.read_csv(path.join(data_path, 'OUR_WORLD_IN_DATA', 'world-population-in-extreme-poverty-absolute.csv'), encoding='utf-8')
data_poverty[['Year', 'Number of people not in extreme poverty (people)', 'Number of people living in extreme poverty (people)']] = data_poverty[['Year', 'Number of people not in extreme poverty (people)', 'Number of people living in extreme poverty (people)']].apply(pd.to_numeric)
data_poverty = data_poverty.sort_values(by=['Year'])
data_poverty = data_poverty[data_poverty['Year'] > 1980]
data_poverty['Number of people'] = data_poverty['Number of people not in extreme poverty (people)'] + data_poverty['Number of people living in extreme poverty (people)']
data_poverty['Number of people not in extreme poverty (people)'] = data_poverty['Number of people not in extreme poverty (people)'] / 10**9
data_poverty['Number of people living in extreme poverty (people)'] = data_poverty['Number of people living in extreme poverty (people)'] / 10**9
data_poverty['Number of people'] = data_poverty['Number of people'] / 10**9
data_poverty = data_poverty.round(2)
data_poverty.head()

Unnamed: 0,Entity,Code,Year,Number of people not in extreme poverty (people),Number of people living in extreme poverty (people),Number of people
10,World,OWID_WRL,1981,2.61,1.91,4.52
11,World,OWID_WRL,1984,2.9,1.87,4.77
12,World,OWID_WRL,1987,3.25,1.78,5.03
13,World,OWID_WRL,1990,3.41,1.9,5.31
14,World,OWID_WRL,1993,3.68,1.89,5.57


In [4]:
trace = go.Scatter(
    x=data_poverty['Year'],
    y=data_poverty['Number of people'],
    name='Number of people',
    hoverinfo='y'
)

trace1 = go.Scatter(
    x = data_poverty['Year'],
    y = data_poverty['Number of people not in extreme poverty (people)'],
    name='Not in extreme poverty',
    hoverinfo='y'
)

trace2 = go.Scatter(
    x = data_poverty['Year'],
    y = data_poverty['Number of people living in extreme poverty (people)'],
    name='Living in extreme poverty',
    hoverinfo='y'
)

data = [trace, trace1, trace2]

layout = go.Layout(
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.concatenate((np.arange(1981, 2012, step=6), np.array([2015])), axis=0),
        title='Year',
    ),
    yaxis = go.layout.YAxis(
        title='Population (billion)',
        ticksuffix='B ',       
    ),
    title='World population living in extreme poverty, 1981-2015',
        annotations=[
            dict(
            text='Extreme poverty is defined as living on less than $1.90 international per day',
            font=dict(size=13),
            showarrow=False,
            x=0.60,
            y=1.1,
            xref='paper',
            yref='paper'
            )
        ]
)

fig = go.Figure(
    data = data,
    layout = layout
)

py.iplot(fig, filename='basic-line')


Consider using IPython.display.IFrame instead



In [5]:
trace1 = go.Scatter(
    x=data_poverty['Year'],
    y=data_poverty['Number of people living in extreme poverty (people)'],
    stackgroup='one',
    mode='none',
    name='Living in extreme poverty',
    hoverinfo='y'
)

trace2 = go.Scatter(
    x=data_poverty['Year'],
    y=data_poverty['Number of people not in extreme poverty (people)'],
    stackgroup='one',
    mode='none',
    name='Not in extreme poverty ',
    hoverinfo='y'
)

data = [trace1, trace2]

layout = go.Layout(
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.concatenate((np.arange(1981, 2012, step=6), np.array([2015])), axis=0),
        title='Years',
        ticklen=5
    ),
    yaxis = go.layout.YAxis(
        title='Population (billion)',
        ticksuffix='B ',       
    ),
    title='World population living in extreme poverty, 1981-2015',
    annotations=[
        dict(
        text='Extreme poverty is defined as living on less than $1.90 international per day',
        font=dict(size=13),
        showarrow=False,
        x=0.6,
        y=1.1,
        xref='paper',
        yref='paper'

        )
    ]
)

fig = go.Figure(
    data = data,
    layout = layout
)

py.iplot(fig, filename='stacked-area-char')


Consider using IPython.display.IFrame instead



In [6]:
# relative data to build 100% stacked 
data_relative = data_poverty[['Number of people not in extreme poverty (people)', 'Number of people living in extreme poverty (people)']]
data_relative = data_relative.divide(data_relative.sum(axis=1), axis=0)

In [7]:
trace1 = go.Scatter(
    x=data_poverty['Year'],
    y=data_relative['Number of people living in extreme poverty (people)'],
    name='Living in extreme poverty',
    stackgroup='one',
    mode='none',
    hoverinfo='y'
)

trace2 = go.Scatter(
    x=data_poverty['Year'],
    y=data_relative['Number of people not in extreme poverty (people)'],
    name='Not in extreme poverty',
    stackgroup='one',
    mode='none',
    hoverinfo='y'
)

data = [trace1, trace2]

layout = go.Layout(
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.concatenate((np.arange(1981, 2012, step=6), np.array([2015])), axis=0),
        title='Year',
        ticklen=5,
    ),
    yaxis=go.layout.YAxis(
        tickformat=',.0%',
        title='Population (%)',
    ),
    title='World population living in extreme poverty, 1981-2015 (relative)',
    annotations=[
        dict(
            text='Extreme poverty is defined as living on less than $1.90 international per day',
            font=dict(size=13),
            showarrow=False,
            x=0.6,
            y=1.1,
            xref='paper',
            yref='paper'
            
        )
    ]
)

fig = go.Figure(
    data = data,
    layout = layout,
)

py.iplot(fig, filename='stacked-relative-area-char')


Consider using IPython.display.IFrame instead



In [8]:
def createWBDataFrame(path):
    df = pd.read_csv(path, header=2, encoding='utf-8')
    df = pd.merge(df, country_metadata, how='left', on=['Country Code'])
    return df.drop(['Indicator Name', 'Unnamed: 63'], axis=1)

def tideWBDataFrame(df):
    df = pd.melt(df, id_vars=['Country Name', 'Country Code', 'Region', 'Indicator Code'], var_name='Year', value_name='observation')
    df = pd.pivot_table(df, columns=['Indicator Code'], index=['Country Name', 'Country Code', 'Region', 'Year'], values='observation', aggfunc=np.mean)
    return df.reset_index()

def keepMetrics(df, metrics):
    standard_columns = ['Country Name', 'Country Code', 'Region', 'Year']
    return df.loc[:, standard_columns + metrics]

In [9]:
country_metadata = pd.read_csv(path.join(data_path, 'GDP_BY_COUNTRY', 'Metadata_Country_API_NY.GDP.MKTP.CD_DS2_en_csv_v2_10515210.csv'))
country_metadata.drop(['IncomeGroup', 'SpecialNotes', 'TableName', 'Unnamed: 5'], axis=1, inplace=True)

social_env_path = path.join(data_path, 'SOCIAL_ENV_ECON_FACTORS_BY_COUNTRY')
extreme_poverty_by_country = createWBDataFrame(path.join(social_env_path, 'Environment', 'API_19_DS2_en_csv_v2_10515758.csv'))
extreme_poverty_by_country = tideWBDataFrame(extreme_poverty_by_country)

# Limiting data for the last 30 years
extreme_poverty_by_country = extreme_poverty_by_country[extreme_poverty_by_country['Year'] > '1980']
extreme_poverty_by_country = keepMetrics(extreme_poverty_by_country, ['SI.POV.DDAY','SP.POP.TOTL'])

# Renaming observation columns
extreme_poverty_by_country.rename(columns={'SI.POV.DDAY': 'Extreme Poverty(% pop)','SP.POP.TOTL': 'Total Pop.'}, inplace=True)

### 1.2 - Extreme Poverty Distribution

In [10]:
# Manipulating data to bring a better understanding of the situation.
extreme_poverty_by_country['last_obs_year'] = np.nan
extreme_poverty_by_country['last_obs_year'] = extreme_poverty_by_country[extreme_poverty_by_country['Extreme Poverty(% pop)'].notnull()]['Year']
extreme_poverty_by_country.sort_values(['Country Code', 'Year'], inplace=True)

aux = pd.DataFrame(columns=extreme_poverty_by_country.columns)
for code in extreme_poverty_by_country['Country Code'].unique():
    aux = pd.concat([aux, extreme_poverty_by_country[extreme_poverty_by_country['Country Code'] == code].fillna(method='ffill')])
extreme_poverty_by_country = aux

extreme_poverty_by_country['Total Pop. in Extreme Poverty'] = extreme_poverty_by_country['Total Pop.']*extreme_poverty_by_country['Extreme Poverty(% pop)']/100
extreme_poverty_by_country['text'] = ["{one}<br>Extreme Poverty: {two} % <br>Extreme Poverty: {three} millions<br>Last obtained: {four}".format(one=row['Country Name'], two=row['Extreme Poverty(% pop)'], three=np.round(row['Total Pop. in Extreme Poverty']/10**6, 3), four=row['last_obs_year']) for index, row in extreme_poverty_by_country.iterrows()]     

#### First Visualization

In [11]:
data_slider = []
for year in extreme_poverty_by_country['Year'].unique():
    extreme_poverty_year = extreme_poverty_by_country[extreme_poverty_by_country['Year'] == year]
    
    data = go.Choropleth(
                locationmode='ISO-3',
                locations=extreme_poverty_year['Country Code'],
                text=extreme_poverty_year['text'],
                z=extreme_poverty_year['Extreme Poverty(% pop)'],
                colorbar={'title':'Extreme Poverty (% of Population)','ticksuffix': '%'} 
        )
    data_slider.append(data)

steps = []
for i in range(len(data_slider)):
    correct_value = [False] * len(data_slider)
    correct_value[i] = True
    
    step = go.layout.slider.Step(
        method='restyle',
        args=['visible', correct_value],
        label="{}".format(1981+i)
    )
    steps.append(step)

layout = go.Layout(
            title = go.layout.Title(
                text = 'Evolution of Extreme Poverty (From 1981 to 2017)'
            ),
            geo = go.layout.Geo(
                scope='world',
                showframe = False,
                coastlinewidth = 0,
                projection = go.layout.geo.Projection(
                    type = 'equirectangular'
                )
            ),
            sliders = [go.layout.Slider(
                steps = steps,
                active = 0,
                currentvalue = {"prefix": "Year: "},
                pad = {"t": 1}
            )]
        )
fig = go.Figure(data=data_slider, layout=layout)
iplot(fig)

#### Second Visualization

In [12]:
extreme_poverty_by_region = extreme_poverty_by_country.loc[:, ['Region', 'Year', 'Total Pop. in Extreme Poverty', 'Total Pop.']]
extreme_poverty_by_region.dropna(inplace=True)

extreme_poverty_by_region = extreme_poverty_by_region.groupby(['Region', 'Year'], as_index=False).agg({'Total Pop. in Extreme Poverty': 'sum', 'Total Pop.': 'sum'})
extreme_poverty_by_region['Mean Pop. in Extreme Poverty'] = np.round(100*extreme_poverty_by_region['Total Pop. in Extreme Poverty']/extreme_poverty_by_region['Total Pop.'], 2)

extreme_poverty_by_region = extreme_poverty_by_region[extreme_poverty_by_region['Year'] >= '1990']
extreme_poverty_by_region.sort_values(by=['Year', 'Region'], inplace=True)

In [13]:
layout = go.Layout( 
    title = go.layout.Title(
        text = 'Evolution of Extreme Poverty by Region (From 1981 to 2017)'
    ),
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.arange(1990, 2018, step=1),
        tickangle=45,
        title='Years'
    ),
    yaxis = go.layout.YAxis(
        autorange=True,
        title='Pop. in Extreme Poverty(%)'
    )
)

fig = go.Figure(
    layout=layout
)
        
for region in extreme_poverty_by_region['Region'].unique():
    ext_pov_region = extreme_poverty_by_region[extreme_poverty_by_region['Region'] == region]
    fig.add_scatter(x=ext_pov_region['Year'], y=ext_pov_region['Mean Pop. in Extreme Poverty'], name=region, mode='lines')
    
iplot(fig, filename='axes-lines')

### 1.3 - GDP and Extreme Poverty

#### 1.3.1 - Extreme Poverty by GDP growth

In [14]:
hbc_path = path.join(social_env_path, 'Health', 'API_8_DS2_en_csv_v2_10576655.csv')
health_by_country = createWBDataFrame(hbc_path)
health_by_country = tideWBDataFrame(health_by_country)
health_by_country = keepMetrics(health_by_country, ['SH.DYN.MORT', 'SH.DYN.AIDS.ZS', 'SH.TBS.INCD', 'SH.DTH.COMM.ZS', 'SN.ITK.DEFC.ZS', 'SP.DYN.TFRT.IN', 'SH.XPD.CHEX.GD.ZS'])
health_by_country.rename(columns={'SH.DYN.MORT': 'Mortality Rate Under 5(per 1.000 births)', 'SH.DYN.AIDS.ZS':'HIV - 15 to 49 (%)', 'SH.TBS.INCD': 'Inc. Tuberculosis(per 100.000)', 'SH.DTH.COMM.ZS': 'Death by Infectious Disease(5 of total)', 'SN.ITK.DEFC.ZS':'Undernourishment(% pop)', 'SP.DYN.TFRT.IN': 'Total Births per Women', 'SH.XPD.CHEX.GD.ZS': 'Expenditure on Health(% of GDP)'}, inplace=True)
health_by_country = health_by_country[health_by_country['Year'] >= '1990']

ulbc_path = path.join(social_env_path, 'Labour', 'API_10_DS2_en_csv_v2_10597117.csv')
underage_labour_by_country = createWBDataFrame(ulbc_path)
underage_labour_by_country = tideWBDataFrame(underage_labour_by_country)
underage_labour_by_country = keepMetrics(underage_labour_by_country, ['SL.TLF.0714.ZS', 'SL.TLF.0714.WK.ZS', 'SL.TLF.0714.WK.TM', 'SL.UEM.NEET.ZS'])
underage_labour_by_country.rename(columns={'SL.TLF.0714.ZS': 'Children in employment 7-14 (% of children)', 'SL.TLF.0714.WK.ZS': 'Children in employment(Work only) - 7 to 14(% of chd in emp.)', 'SL.TLF.0714.WK.TM': 'Avg. Working Hours of work only children', 'SL.UEM.NEET.ZS': 'Not studying, not working (% of youth)'}, inplace=True)
underage_labour_by_country = underage_labour_by_country[underage_labour_by_country['Year'] >= '1990']

edbc_path = path.join(social_env_path, 'Education', 'API_4_DS2_en_csv_v2_10577018.csv')
education_by_country = createWBDataFrame(edbc_path)
education_by_country = tideWBDataFrame(education_by_country)
education_by_country = keepMetrics(education_by_country, ['SE.PRM.CMPT.ZS', 'SE.PRM.UNER.ZS', 'SE.PRM.ENRL.TC.ZS', 'SE.ADT.1524.LT.ZS', 'SE.XPD.TOTL.GD.ZS'])
education_by_country.rename(columns={'SE.PRM.CMPT.ZS': 'Primary Completion Rate (% of relevant age group)', 'SE.PRM.UNER.ZS': 'Children out of School(% primary school)',  'SE.PRM.ENRL.TC.ZS': 'Pupil Teacher ratio', 'SE.ADT.1524.LT.ZS': 'Youth Literacy Rate 15-24 (% pop)', 'SE.XPD.TOTL.GD.ZS': 'Expenditure on Education (% of GDP)'}, inplace=True)
education_by_country = education_by_country[education_by_country['Year'] >= '1990']

ecbc_path = path.join(social_env_path, 'Economics', 'API_3_DS2_en_csv_v2_10577958.csv')
economics_by_country = createWBDataFrame(ecbc_path)
economics_by_country = tideWBDataFrame(economics_by_country)
economics_by_country = keepMetrics(economics_by_country, ['NY.GDP.PCAP.CD', 'NY.GDP.MKTP.KD.ZG', 'FP.CPI.TOTL.ZG', 'DT.ODA.ODAT.PC.ZS'])
economics_by_country.rename(columns={'NY.GDP.PCAP.CD': 'GDP per Capita(US$)', 'NY.GDP.MKTP.KD.ZG': 'GDP Growth(%)', 'FP.CPI.TOTL.ZG': 'Inflation Rate(%)', 'DT.ODA.ODAT.PC.ZS': 'ODA per capita(US$)'}, inplace=True)
economics_by_country = economics_by_country[economics_by_country['Year'] >= '1990']
economics_by_country['text'] = ["{one}<br>GDP Growth: {two}% <br>GDP per capita: US${three} <br>Inflation Rate: {four}% <br>ODA per capita: US${five}".format(one=row['Country Name'], two=np.round(row['GDP Growth(%)'],2), three=np.round(row['GDP per Capita(US$)'],2), four=np.round(row['Inflation Rate(%)'],2), five=np.round(row['ODA per capita(US$)'],2)) for index, row in economics_by_country.iterrows()] 

envbc_path = path.join(social_env_path, 'Environment', 'API_19_DS2_en_csv_v2_10515758.csv')
environment_by_country = createWBDataFrame(envbc_path)
environment_by_country = tideWBDataFrame(environment_by_country)
environment_by_country = keepMetrics(environment_by_country, ['AG.LND.PRCP.MM','EN.CLC.MDAT.ZS','AG.LND.ARBL.ZS','AG.LND.IRIG.AG.ZS','NV.AGR.TOTL.ZS','AG.YLD.CREL.KG', 'EG.ELC.ACCS.ZS'])
environment_by_country.rename(columns={'AG.LND.PRCP.MM': 'Avg precipitation(mm)','EN.CLC.MDAT.ZS': '% Pop affected by extreme conditions(Droughts, Flood, high temps)','AG.LND.ARBL.ZS': '% Arable Land','AG.LND.IRIG.AG.ZS': '% Agric. Irrigated Land','NV.AGR.TOTL.ZS': 'Agric., Florestry and Fishing(% of GDP)','AG.YLD.CREL.KG': 'Cereal Yield(Kg/hectare)','EG.ELC.ACCS.ZS': 'Access Electricity(% pop)'}, inplace=True)
environment_by_country = environment_by_country[environment_by_country['Year'] >= '1990']

In [15]:
econ_per_capita = economics_by_country.loc[:, ['Year', 'Region', 'Country Code', 'Country Name', 'GDP per Capita(US$)', 'ODA per capita(US$)']]
econ_per_capita_by_region = econ_per_capita.groupby(['Year', 'Region'], as_index=False).agg({'GDP per Capita(US$)': np.mean, 'ODA per capita(US$)':np.mean})
econ_per_capita_by_region['GDP per Capita(US$)'] = np.round(econ_per_capita_by_region['GDP per Capita(US$)'], 2)
econ_per_capita_by_region['ODA per capita(US$)'] = np.round(econ_per_capita_by_region['ODA per capita(US$)'], 2)

grouped_econ_ext_poverty = pd.merge(extreme_poverty_by_region, econ_per_capita_by_region, on=['Year', 'Region'])

#### First Visualization

In [16]:
fig = go.Figure()
fig = tools.make_subplots(
        rows=4, 
        cols=2, 
        subplot_titles=[reg for reg in grouped_econ_ext_poverty['Region'].unique()],
        vertical_spacing=0.10,
        horizontal_spacing=0.20)

perc_pop_ext_poverty = []
gdp_per_capita = []
for region in grouped_econ_ext_poverty['Region'].unique():
    g_econ_ext_pov_region = grouped_econ_ext_poverty[grouped_econ_ext_poverty['Region'] == region]
    gdp_per_capita.append(
        go.Scatter(
            x=g_econ_ext_pov_region['Year'], 
            y=g_econ_ext_pov_region['GDP per Capita(US$)'], 
            name='GDP per Capita(US$)', 
            mode='lines',
            showlegend=False,
            marker=go.scatter.Marker(color='Orange')
        )
    )
    
    perc_pop_ext_poverty.append(
        go.Scatter(
            x=g_econ_ext_pov_region['Year'], 
            y=g_econ_ext_pov_region['Mean Pop. in Extreme Poverty'], 
            name='Extreme Poverty Pop.(%)', 
            mode='lines',
            showlegend=False,
            marker=go.scatter.Marker(color='Blue')
        )
    )

fig['layout'].update(
    go.Layout(
        height=1000,
        width=1000,
        title='GDP per Capita by Population Living in Extreme Poverty',
        xaxis1= go.layout.XAxis(ticklen=5),
        xaxis2= go.layout.XAxis(ticklen=5),
        xaxis3= go.layout.XAxis(ticklen=5),
        xaxis4= go.layout.XAxis(ticklen=5),
        xaxis5= go.layout.XAxis(ticklen=5),
        xaxis6= go.layout.XAxis(ticklen=5),
        xaxis7= go.layout.XAxis(ticklen=5),
        yaxis1= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,60]),
        yaxis2= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,10]),
        yaxis3= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,15]),
        yaxis4= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,10]),
        yaxis5= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,2]),
        yaxis6= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,60]),
        yaxis7= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,60]),
        yaxis8= go.layout.YAxis(title='GDP per Capita', overlaying='y1', anchor='x1', side='right', range=[0, 20000], showgrid=False),
        yaxis9= go.layout.YAxis(title='GDP per Capita', overlaying='y2', anchor='x2', side='right', range=[0, 40000], showgrid=False),
        yaxis10=go.layout.YAxis(title='GDP per Capita', overlaying='y3', anchor='x3', side='right', range=[0, 15000], showgrid=False),
        yaxis11=go.layout.YAxis(title='GDP per Capita', overlaying='y4', anchor='x4', side='right', range=[0, 20000], showgrid=False),
        yaxis12=go.layout.YAxis(title='GDP per Capita', overlaying='y5', anchor='x5', side='right', range=[0, 70000], showgrid=False),
        yaxis13=go.layout.YAxis(title='GDP per Capita', overlaying='y6', anchor='x6', side='right', range=[0, 3000], showgrid=False),
        yaxis14=go.layout.YAxis(title='GDP per Capita', overlaying='y7', anchor='x7', side='right', range=[0, 3000], showgrid=False),
    
        legend= go.Legend(
            x=1.0,
            y=1.05
        ),
        #annotations=[
        #    dict(
        #        text='Pay Attention to the y axis scale on each chart',
        #        font=dict(size=13),
        #        showarrow=False,
        #        x=0.6,
        #        y=1.03,
        #        xref='paper',
        #        yref='paper'
        #   )
        #]
    )
)

for i in range(7):
    col = i%2 + 1
    row = i//2 + 1
    if i == 0:
        perc_pop_ext_poverty[i]['showlegend'] = True
        gdp_per_capita[i]['showlegend'] = True
        
    fig.append_trace(perc_pop_ext_poverty[i], row, col)
    fig.append_trace(gdp_per_capita[i], row, col)

for i in range(7):
    fig['data'][2*i+1].update(yaxis='y{one}'.format(one=8+i))

iplot(fig, filename='Small Multiples: GDP per capita and Extreme Poverty Pop.(%)')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]
[ (3,1) x5,y5 ]  [ (3,2) x6,y6 ]
[ (4,1) x7,y7 ]  [ (4,2) x8,y8 ]




plotly.graph_objs.Legend is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Legend




#### Second Visualization

#### Small Multiples Scatter Plot showing correlation between GDP per Capita and Pop. in Extreme Poverty

In [17]:
fig = go.Figure()
fig = tools.make_subplots(
        rows=4, 
        cols=2, 
        subplot_titles=[reg for reg in grouped_econ_ext_poverty['Region'].unique()],
        vertical_spacing=0.10,
        horizontal_spacing=0.20)

pop_ext_pov_by_gdp_per_capita = []
for region in grouped_econ_ext_poverty['Region'].unique():
    g_econ_ext_pov_region = grouped_econ_ext_poverty[grouped_econ_ext_poverty['Region'] == region]
    pop_ext_pov_by_gdp_per_capita.append(
        go.Scatter(
            y=g_econ_ext_pov_region['Mean Pop. in Extreme Poverty'], 
            x=g_econ_ext_pov_region['GDP per Capita(US$)'], 
            name='GDP per Capita(US$)', 
            mode='markers',
            showlegend=False,
            marker=go.scatter.Marker(color='Blue')
        )
    )

fig['layout'].update(
    go.Layout(
        height=1000,
        width=1000,
        title='GDP per Capita by Population Living in Extreme Poverty',
        xaxis1= go.layout.XAxis(title='GDP per Capita', autorange=True),
        xaxis2= go.layout.XAxis(title='GDP per Capita', range=[0, 40000]),
        xaxis3= go.layout.XAxis(title='GDP per Capita', range=[0, 15000]),
        xaxis4= go.layout.XAxis(title='GDP per Capita', range=[0, 20000]),
        xaxis5= go.layout.XAxis(title='GDP per Capita', range=[0, 70000]),
        xaxis6= go.layout.XAxis(title='GDP per Capita', range=[0, 3000]),
        xaxis7= go.layout.XAxis(title='GDP per Capita', range=[0, 3000]),
        
        yaxis1= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,60], showgrid=False),
        yaxis2= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,10], showgrid=False),
        yaxis3= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,15], showgrid=False),
        yaxis4= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,10], showgrid=False),
        yaxis5= go.layout.YAxis(title='Pop. in Extreme Poverty(%)',  range=[0,2], showgrid=False),
        yaxis6= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,60], showgrid=False),
        yaxis7= go.layout.YAxis(title='Pop. in Extreme Poverty(%)', range=[0,60], showgrid=False),
    )
)

for i in range(7):
    col = i%2 + 1
    row = i//2 + 1
        
    fig.append_trace(pop_ext_pov_by_gdp_per_capita[i], row, col)

iplot(fig, filename='Scatter-Small Multiples GDP per capita and Extreme Poverty Pop.(%)')

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]
[ (3,1) x5,y5 ]  [ (3,2) x6,y6 ]
[ (4,1) x7,y7 ]  [ (4,2) x8,y8 ]



### 1.4 - Educational and Health Standards on Poorest Regions
#### Poorest regions: Sub-Saharan Africa, South Asia and Latin America and Caribbean.
###### The most efficient way to face Extreme Poverty is by improving educational and health standards.

In [18]:
id_region = dict({'South Asia': 0, 'Sub-Saharan Africa': 0.5, 'Latin America & Caribbean': 1})

education_by_country_2000_2016 = education_by_country[(education_by_country['Year'].isin(['2000','2016'])) & (education_by_country['Region'].isin(['Latin America & Caribbean', 'South Asia', 'Sub-Saharan Africa']))]
#education_by_country_2016.dropna(subset=['Primary Completion Rate (% of relevant age group)', 'Children out of School(% primary school)', 'Pupil Teacher ratio'], inplace=True)

health_by_country_2000_2016 = health_by_country[(health_by_country['Year'].isin(['2000','2016'])) & (health_by_country['Region'].isin(['Latin America & Caribbean', 'South Asia', 'Sub-Saharan Africa']))]
#health_by_country_2016.dropna(subset=['Mortality Rate Under 5(per 1.000 births)', 'HIV - 15 to 49 (%)', 'Inc. Tuberculosis(per 100.000)', 'Undernourishment(% pop)'], inplace=True)

education_by_country_2000_2016['id_region'] = [id_region[region] for region in education_by_country_2000_2016['Region']]
health_by_country_2000_2016['id_region'] = [id_region[region] for region in health_by_country_2000_2016['Region']]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



#### 1.4.1 - Health and Education Expenditures of GDP

#### First Visualization

In [19]:
ed_expenditure = education_by_country.loc[:, ['Year', 'Region', 'Country Code', 'Country Name', 'Expenditure on Education (% of GDP)']]
health_expenditure = health_by_country.loc[:, ['Year', 'Region', 'Country Code', 'Country Name', 'Expenditure on Health(% of GDP)']]
econ_growth = economics_by_country.loc[:, ['Year', 'Region', 'Country Code', 'Country Name', 'GDP Growth(%)']]

grouped_ed_health_exp = pd.merge(ed_expenditure, health_expenditure, on=['Year', 'Region', 'Country Code', 'Country Name'])
grouped_ed_health_econ_exp = pd.merge(grouped_ed_health_exp, econ_growth, on=['Year', 'Region', 'Country Code', 'Country Name'])

grouped_ed_health_econ_exp = grouped_ed_health_econ_exp.groupby(['Year', 'Region'], as_index=False).agg({'Expenditure on Education (% of GDP)':'mean', 'Expenditure on Health(% of GDP)': 'mean', 'GDP Growth(%)': 'mean'})
grouped_ed_health_econ_exp = grouped_ed_health_econ_exp[grouped_ed_health_econ_exp['Year']>'1999']

In [21]:
fig = go.Figure()
fig = tools.make_subplots(
        rows=4, 
        cols=2, 
        subplot_titles=[reg for reg in grouped_ed_health_econ_exp['Region'].unique()],
        vertical_spacing=0.10)

exp_education = []
exp_health = []
gdp_growth = []
for reg in grouped_ed_health_econ_exp['Region'].unique():
    reg_grouped_ed_health_econ_exp = grouped_ed_health_econ_exp[grouped_ed_health_econ_exp['Region'] == reg]

    reg_exp_education = go.Bar(
            x=reg_grouped_ed_health_econ_exp['Year'], 
            y=reg_grouped_ed_health_econ_exp['Expenditure on Education (% of GDP)'], 
            name='Education (% of GDP)',
            showlegend=False,
            marker=go.bar.Marker(
                color='Blue'))

    reg_exp_health = go.Bar(
            x=reg_grouped_ed_health_econ_exp['Year'], 
            y=reg_grouped_ed_health_econ_exp['Expenditure on Health(% of GDP)'], 
            name='Health (% of GDP)',
            showlegend=False,
            marker=go.bar.Marker(
                color='Orange'))

    reg_gdp_growth = go.Scatter(
            x=reg_grouped_ed_health_econ_exp['Year'], 
            y=reg_grouped_ed_health_econ_exp['GDP Growth(%)'], 
            name='GDP Growth(%)', 
            showlegend=False,
            mode='lines',
            marker=go.scatter.Marker(
                color='Green'))

    exp_education.append(reg_exp_education)
    exp_health.append(reg_exp_health)
    gdp_growth.append(reg_gdp_growth)

for i in range(8):
    col = i%2 + 1
    row = i//2 + 1
    
    exp_education[0]['showlegend'] = True
    exp_health[0]['showlegend'] = True
    gdp_growth[0]['showlegend'] = True
    if i < 7:
        fig.append_trace(exp_education[i], row, col)
        fig.append_trace(exp_health[i], row, col)
        fig.append_trace(gdp_growth[i], row, col)

fig['layout'].update({'title': 'Health and Education Expenditures compared to GDP Growth', 
                      'height': 1000, 
                      'width': 1000})        
py.iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]
[ (3,1) x5,y5 ]  [ (3,2) x6,y6 ]
[ (4,1) x7,y7 ]  [ (4,2) x8,y8 ]




Consider using IPython.display.IFrame instead



#### Second Visualization

In [22]:
fig = go.Figure()
fig = tools.make_subplots(
        rows=4, 
        cols=2, 
        subplot_titles=[reg for reg in grouped_ed_health_econ_exp['Region'].unique()],
        vertical_spacing=0.10)

exp_education = []
exp_health = []
gdp_growth = []
for reg in grouped_ed_health_econ_exp['Region'].unique():
    reg_grouped_ed_health_econ_exp = grouped_ed_health_econ_exp[grouped_ed_health_econ_exp['Region'] == reg]

    reg_exp_education = go.Bar(
            x=reg_grouped_ed_health_econ_exp['Year'], 
            y=reg_grouped_ed_health_econ_exp['Expenditure on Education (% of GDP)'], 
            name='Education (% of GDP)',
            showlegend=False,
            marker=go.bar.Marker(
                color='Blue'))

    reg_exp_health = go.Bar(
            x=reg_grouped_ed_health_econ_exp['Year'], 
            y=reg_grouped_ed_health_econ_exp['Expenditure on Health(% of GDP)'], 
            name='Health (% of GDP)',
            showlegend=False,
            marker=go.bar.Marker(
                color='Orange'))

    reg_gdp_growth = go.Scatter(
            x=reg_grouped_ed_health_econ_exp['Year'], 
            y=reg_grouped_ed_health_econ_exp['GDP Growth(%)'], 
            name='GDP Growth(%)', 
            showlegend=False,
            mode='lines',
            marker=go.scatter.Marker(
                color='Green'))

    exp_education.append(reg_exp_education)
    exp_health.append(reg_exp_health)
    gdp_growth.append(reg_gdp_growth)

for i in range(8):
    col = i%2 + 1
    row = i//2 + 1
    
    exp_education[0]['showlegend'] = True
    exp_health[0]['showlegend'] = True
    gdp_growth[0]['showlegend'] = True
    if i < 7:
        fig.append_trace(exp_education[i], row, col)
        fig.append_trace(exp_health[i], row, col)
        fig.append_trace(gdp_growth[i], row, col)

fig['layout'].update({'title': 'Health and Education Expenditures compared to GDP Growth', 
                      'height': 1000, 
                      'width': 1000,
                      'barmode': 'stack'})        
py.iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]
[ (3,1) x5,y5 ]  [ (3,2) x6,y6 ]
[ (4,1) x7,y7 ]  [ (4,2) x8,y8 ]




Consider using IPython.display.IFrame instead



#### 1.4.3 - Basic Educational Metrics Comparison (2000 by 2016)

In [23]:
education_by_country_2000 = education_by_country_2000_2016[education_by_country_2000_2016['Year'] == '2000']

data = [
    go.Parcoords(
        name='2000 indicators',
                
        ids=education_by_country_2000['Country Name'],
        line=dict(color=education_by_country_2000['id_region'],
                 colorscale=[[0,'#D7C16B'],[0.5,'#23D8C3'],[1,'#F3F10F']]),
        
        dimensions = list([
            dict(range=[0,150],
                 label='Primary Completion Rate(%)', 
                 values=np.round(education_by_country_2000['Primary Completion Rate (% of relevant age group)'], 2)),
            
            dict(range=[0,100],
                 label='Children out of School(%)', 
                 values=np.round(education_by_country_2000['Children out of School(% primary school)'], 2)),
            
            dict(range=[0,150],
                 label='Pupil Teacher Ratio', 
                 values=np.round(education_by_country_2000['Pupil Teacher ratio']))
        ])
    )
]

layout = {
    'title': '2000', 
    'height': 500, 
    'width': 1000,
    'paper_bgcolor':'#ffffff'
}      

fig = go.Figure(
    data=data,
    layout=layout
)

py.iplot(fig)


Consider using IPython.display.IFrame instead



In [24]:
education_by_country_2016 = education_by_country_2000_2016[education_by_country_2000_2016['Year'] == '2016']

data = [
    go.Parcoords(
        name='2016 indicators',        
        ids=education_by_country_2016['Country Name'],
        line=dict(color=education_by_country_2016['id_region'],
                  colorscale=[[0,'#D7C16B'],[0.5,'#23D8C3'],[1,'#F3F10F']]),
        
        dimensions = list([
            dict(range=[0,150],
                 label='Primary Completion Rate(%)', 
                 values=np.round(education_by_country_2016['Primary Completion Rate (% of relevant age group)'], 2)),
            
            dict(range=[0,100],
                 label='Children out of School(%)', 
                 values=np.round(education_by_country_2016['Children out of School(% primary school)'], 2)),
            
            dict(range=[0,150],
                 label='Pupil Teacher Ratio', 
                 values=np.round(education_by_country_2016['Pupil Teacher ratio']))
        ])
    )
]
    
layout = {
    'title': '2016', 
    'height': 500, 
    'width': 1000, 
    'paper_bgcolor':'#ffffff'
}      

fig = go.Figure(
    data=data,
    layout=layout
)

py.iplot(fig)


Consider using IPython.display.IFrame instead



#### 1.4.4 - Basic Health Metrics Comparison (2000 by 2016)

In [27]:
health_by_country_2000 = health_by_country_2000_2016[health_by_country_2000_2016['Year'] == '2000']

data = [
    go.Parcoords(
        name='2000 indicators',
        ids=health_by_country_2000['Country Name'],
        line=dict(color=health_by_country_2000['id_region'],
                 colorscale=[[0,'#D7C16B'],[0.5,'#23D8C3'],[1,'#F3F10F']]),
        
        dimensions = list([
            dict(range=[0,300],
                 label='Mortality Rate Under 5(per 1.000 births)', 
                 values=health_by_country_2000['Mortality Rate Under 5(per 1.000 births)']),
            
            dict(range=[0,100],
                 label='Undernourishment(%)', 
                 values=np.round(health_by_country_2000['Undernourishment(% pop)'], 2)),
            
            dict(range=[0,100],
                 label='HIV - 15 to 49 (%)', 
                 values=np.round(health_by_country_2000['HIV - 15 to 49 (%)'], 2)),
            
            dict(range=[0,1000],
                 label='Inc. Tuberculosis(per 100.000)', 
                 values=health_by_country_2000['Inc. Tuberculosis(per 100.000)'])
        ])
    )
]

layout = {
    'title': '2000', 
    'height': 500, 
    'width': 1000,
    'paper_bgcolor':'#ffffff'
}      

fig = go.Figure(
    data=data,
    layout=layout
)

py.iplot(fig)


Consider using IPython.display.IFrame instead



In [28]:
health_by_country_2016 = health_by_country_2000_2016[health_by_country_2000_2016['Year'] == '2016']  

data = [
    go.Parcoords(
        name='2016 indicators',        
        ids=health_by_country_2016['Country Name'],
        line=dict(color=health_by_country_2016['id_region'],
                  colorscale=[[0,'#D7C16B'],[0.5,'#23D8C3'],[1,'#F3F10F']]),
        
        dimensions = list([
            dict(range=[0,300],
                 label='Mortality Rate Under 5(per 1.000 births)', 
                 values=health_by_country_2016['Mortality Rate Under 5(per 1.000 births)']),
            
            dict(range=[0,100],
                 label='Undernourishment(%)', 
                 values=np.round(health_by_country_2016['Undernourishment(% pop)'], 2)),
            
            dict(range=[0,100],
                 label='HIV - 15 to 49 (%)', 
                 values=np.round(health_by_country_2016['HIV - 15 to 49 (%)'], 2)),
            
            dict(range=[0,1000],
                 label='Inc. Tuberculosis(per 100.000)', 
                 values=health_by_country_2016['Inc. Tuberculosis(per 100.000)'])
        ])
    )
]

layout = {
    'title': '2016', 
    'height': 500, 
    'width': 1000,
    'paper_bgcolor':'#ffffff'
}      

fig = go.Figure(
    data=data,
    layout=layout
)

py.iplot(fig)


Consider using IPython.display.IFrame instead



 ### 1.5 - ODA by GDP and Extreme Poverty 
 ##### Are the poorest regions getting most assistance?

In [226]:
oda_gdp_ext_poverty = pd.merge(econ_per_capita, extreme_poverty_by_country, how='inner', on=['Country Name', 'Country Code', 'Region', 'Year'])
oda_gdp_ext_poverty.dropna(inplace=True)

oda_gdp_ext_poverty['text'] = [
    "{one}<br>Extreme Poverty: {two} %<br>Extreme Poverty: {three} millions<br>GDP per Capita: US${four}<br>ODA per Capita: US${five}<br>Last obtained: {six}"
    .format(one=row['Country Name'], 
            two=row['Extreme Poverty(% pop)'], 
            three=np.round(row['Total Pop. in Extreme Poverty']/10**6, 3),
            four=np.round(row['GDP per Capita(US$)'], 2),
            five=np.round(row['ODA per capita(US$)'], 2),
            six=row['last_obs_year']) for index, row in oda_gdp_ext_poverty.iterrows()]

oda_gdp_ext_poverty = oda_gdp_ext_poverty[oda_gdp_ext_poverty['ODA per capita(US$)']>1]
oda_gdp_ext_poverty.sort_values(by=['Year'], inplace=True)

oda_gdp_ext_poverty_2000_2015 = oda_gdp_ext_poverty[oda_gdp_ext_poverty['Year'].isin(['2000', '2005', '2010', '2015'])]

In [227]:
fig = go.Figure()
fig = tools.make_subplots(
        rows=2, 
        cols=2, 
        subplot_titles=oda_gdp_ext_poverty_2000_2015['Year'].unique(),
        vertical_spacing=0.15)

bubbles = []
for year in oda_gdp_ext_poverty_2000_2015['Year'].unique():
    yearly_oda_gdp_ext_pov = oda_gdp_ext_poverty_2000_2015[oda_gdp_ext_poverty_2000_2015['Year'] == year]
    
    year_bubble = go.Scatter(
        x=yearly_oda_gdp_ext_pov['Extreme Poverty(% pop)'],
        y=yearly_oda_gdp_ext_pov['GDP per Capita(US$)'],
        mode='markers',
        text=yearly_oda_gdp_ext_pov['text'],
        marker=dict(
            symbol='circle',
            sizemode='area',
            size=yearly_oda_gdp_ext_pov['ODA per capita(US$)'],
            line=dict(
                width=2
            ),
        )
    )
    bubbles.append(year_bubble)

for i in range(4):
    col = i%2 + 1
    row = i//2 + 1
    fig.append_trace(bubbles[i], row, col)

    
fig['layout'].update(
    go.Layout(
        title='ODA per Capita by Extreme Poverty Population and GDP per Capita',
        width=1000,
        height=1000,
        xaxis1=go.layout.XAxis(title='Pop. in Extreme Poverty(%)', range=[0, 100], zerolinewidth=1, ticklen=5, gridwidth=2),
        xaxis2=go.layout.XAxis(title='Pop. in Extreme Poverty(%)', range=[0, 100], zerolinewidth=1, ticklen=5, gridwidth=2),
        xaxis3=go.layout.XAxis(title='Pop. in Extreme Poverty(%)', range=[0, 100], zerolinewidth=1, ticklen=5, gridwidth=2),
        xaxis4=go.layout.XAxis(title='Pop. in Extreme Poverty(%)', range=[0, 100], zerolinewidth=1, ticklen=5, gridwidth=2),

        yaxis1=go.layout.YAxis(title='GDP per Capita(in US$)', range=[0, 16000], ticklen=5,gridwidth=2),
        yaxis2=go.layout.YAxis(title='GDP per Capita(in US$)', range=[0, 16000], ticklen=5,gridwidth=2),
        yaxis3=go.layout.YAxis(title='GDP per Capita(in US$)', range=[0, 16000], ticklen=5,gridwidth=2),
        yaxis4=go.layout.YAxis(title='GDP per Capita(in US$)', range=[0, 16000], ticklen=5,gridwidth=2)
    )
)

iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]

