In [1]:
import numpy as np
import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go 
# from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot as py
import matplotlib.pyplot as plt

from os import path

data_path = 'data'

In [2]:
init_notebook_mode(connected=True)

In [3]:
# List of SA countries
sa_countries = ['Argentina', 'Bolivia', 'Brazil', 'Chile', 'Colombia', 'Ecuador', 'Guyana', 'Paraguay', 'Peru', 'Uruguay', 'Venezuela, RB', 'Suriname']

In [4]:
def createWBDataFrame(path):
    df = pd.read_csv(path, header=2, encoding='utf-8')
    df = pd.merge(df, country_metadata, how='left', on=['Country Code'])
    return df.drop(['Indicator Name', 'Unnamed: 63'], axis=1)

def tideWBDataFrame(df):
    df = pd.melt(df, id_vars=['Country Name', 'Country Code', 'Region', 'Indicator Code'], var_name='Year', value_name='observation')
    df = pd.pivot_table(df, columns=['Indicator Code'], index=['Country Name', 'Country Code', 'Region', 'Year'], values='observation', aggfunc=np.mean)
    return df.reset_index()

def keepMetrics(df, metrics):
    standard_columns = ['Country Name', 'Country Code', 'Region', 'Year']
    return df.loc[:, standard_columns + metrics]

In [5]:
country_metadata = pd.read_csv(path.join(data_path, 'GDP_BY_COUNTRY', 'Metadata_Country_API_NY.GDP.MKTP.CD_DS2_en_csv_v2_10515210.csv'))
country_metadata.drop(['IncomeGroup', 'SpecialNotes', 'TableName', 'Unnamed: 5'], axis=1, inplace=True)

social_env_path = path.join(data_path, 'SOCIAL_ENV_ECON_FACTORS_BY_COUNTRY')

# Extreme poverty

In [6]:
extreme_poverty_by_country = createWBDataFrame(path.join(social_env_path, 'Environment', 'API_19_DS2_en_csv_v2_10515758.csv'))
extreme_poverty_by_country = tideWBDataFrame(extreme_poverty_by_country)

# Limiting data for the last 30 years
extreme_poverty_by_country = extreme_poverty_by_country[extreme_poverty_by_country['Year'] > '1980']
extreme_poverty_by_country = keepMetrics(extreme_poverty_by_country, ['SI.POV.DDAY','SP.POP.TOTL'])

# Renaming observation columns
extreme_poverty_by_country.rename(columns={'SI.POV.DDAY': 'Extreme Poverty(% pop)','SP.POP.TOTL': 'Total Pop.'}, inplace=True)
extreme_poverty_by_country = extreme_poverty_by_country[extreme_poverty_by_country['Year'] >= '1990']
extreme_poverty_by_country = extreme_poverty_by_country[extreme_poverty_by_country['Region'] == 'Latin America & Caribbean']
extreme_poverty_by_country = extreme_poverty_by_country[extreme_poverty_by_country['Country Name'].isin(sa_countries)]

In [7]:
extreme_poverty_by_country.head()

Indicator Code,Country Name,Country Code,Region,Year,Extreme Poverty(% pop),Total Pop.
441,Argentina,ARG,Latin America & Caribbean,1990,,32729739.0
442,Argentina,ARG,Latin America & Caribbean,1991,1.1,33193918.0
443,Argentina,ARG,Latin America & Caribbean,1992,2.1,33655151.0
444,Argentina,ARG,Latin America & Caribbean,1993,2.4,34110917.0
445,Argentina,ARG,Latin America & Caribbean,1994,2.3,34558115.0


## Education

In [8]:
edbc_path = path.join(social_env_path, 'Education', 'API_4_DS2_en_csv_v2_10577018.csv')
education_by_country = createWBDataFrame(edbc_path)
education_by_country = tideWBDataFrame(education_by_country)
education_by_country = keepMetrics(education_by_country, ['SE.PRM.CMPT.ZS', 
                                                          'SE.XPD.TOTL.GD.ZS',
                                                          'SE.XPD.PRIM.PC.ZS',
                                                          'SE.XPD.SECO.PC.ZS',
                                                          'SE.XPD.TERT.PC.ZS',
                                                          'SE.PRM.UNER.ZS',
                                                          'SE.PRM.ENRL.TC.ZS', 
                                                          'SE.ADT.1524.LT.ZS', 
                                                          ])

education_by_country.rename(columns={'SE.PRM.CMPT.ZS': 'Primary Completion Rate (% of relevant age group)', 
                                     'SE.XPD.PRIM.PC.ZS': 'Government expenditure per student, primary (% of GDP)',
                                     'SE.XPD.SECO.PC.ZS': 'Government expenditure per student, secondary (% of GDP)',
                                     'SE.XPD.TERT.PC.ZS': 'Government expenditure per student, tertiary (% of GDP)',
                                     'SE.PRM.UNER.ZS': 'Children out of School(% primary school)',  
                                     'SE.PRM.ENRL.TC.ZS': 'Pupil Teacher ratio', 
                                     'SE.ADT.1524.LT.ZS': 'Youth Literacy Rate 15-24 (% pop)', 
                                     'SE.XPD.TOTL.GD.ZS': 'Expenditure on Education (% of GDP)'}, inplace=True)
education_by_country = education_by_country[education_by_country['Year'] >= '1990']

In [9]:
education_by_country = education_by_country[education_by_country['Region'] == 'Latin America & Caribbean']
education_by_country.head()

Indicator Code,Country Name,Country Code,Region,Year,Primary Completion Rate (% of relevant age group),Expenditure on Education (% of GDP),"Government expenditure per student, primary (% of GDP)","Government expenditure per student, secondary (% of GDP)","Government expenditure per student, tertiary (% of GDP)",Children out of School(% primary school),Pupil Teacher ratio,Youth Literacy Rate 15-24 (% pop)
363,Antigua and Barbuda,ATG,Latin America & Caribbean,1990,,,,,,,,
364,Antigua and Barbuda,ATG,Latin America & Caribbean,1991,,,,,,,,
365,Antigua and Barbuda,ATG,Latin America & Caribbean,1992,83.785881,,,,,,,
366,Antigua and Barbuda,ATG,Latin America & Caribbean,1993,,,,,,,,
367,Antigua and Barbuda,ATG,Latin America & Caribbean,1994,,,,,,,,


In [10]:
education_by_country = education_by_country[education_by_country['Country Name'].isin(sa_countries)]
education_by_country.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 348 entries, 422 to 11849
Data columns (total 12 columns):
Country Name                                                348 non-null object
Country Code                                                348 non-null object
Region                                                      348 non-null object
Year                                                        348 non-null object
Primary Completion Rate (% of relevant age group)           212 non-null float64
Expenditure on Education (% of GDP)                         184 non-null float64
Government expenditure per student, primary (% of GDP)      140 non-null float64
Government expenditure per student, secondary (% of GDP)    134 non-null float64
Government expenditure per student, tertiary (% of GDP)     102 non-null float64
Children out of School(% primary school)                    215 non-null float64
Pupil Teacher ratio                                         211 non-null float64
Yout

In [11]:
education_by_country[education_by_country['Country Name'] == 'Brazil'].head()

Indicator Code,Country Name,Country Code,Region,Year,Primary Completion Rate (% of relevant age group),Expenditure on Education (% of GDP),"Government expenditure per student, primary (% of GDP)","Government expenditure per student, secondary (% of GDP)","Government expenditure per student, tertiary (% of GDP)",Children out of School(% primary school),Pupil Teacher ratio,Youth Literacy Rate 15-24 (% pop)
1533,Brazil,BRA,Latin America & Caribbean,1990,,,,,,,,
1534,Brazil,BRA,Latin America & Caribbean,1991,,,,,,,,
1535,Brazil,BRA,Latin America & Caribbean,1992,,,,,,,,
1536,Brazil,BRA,Latin America & Caribbean,1993,,,,,,,,
1537,Brazil,BRA,Latin America & Caribbean,1994,,,,,,,,


In [12]:
layout = go.Layout( 
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.arange(1990, 2017, step=1),
        tickangle=45,
        title='Year',
    ),
    yaxis = go.layout.YAxis(
        autorange=True,
        ticksuffix='%'
    ),
    title='Expenditure on Education (% of GDP), 1991-2017'
)

fig = go.Figure(
    layout=layout
)
        
for country in education_by_country['Country Name'].unique():
    ext_pov_region = education_by_country[education_by_country['Country Name'] == country]
    x = ext_pov_region['Year']
    y = ext_pov_region['Expenditure on Education (% of GDP)'].fillna(method='ffill')
    fig.add_scatter(x=x, y=y, name=country, mode='lines')
    
py(fig, filename='gdp-edu-line')

In [13]:
selected_sa_countries = ['Argentina', 'Brazil', 'Chile', 'Uruguay']
education_by_country_selected = education_by_country[education_by_country['Country Name'].isin(selected_sa_countries)]

In [14]:
layout = go.Layout( 
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.arange(1990, 2017, step=1),
        tickangle=45,
        title='Year',
    ),
    yaxis = go.layout.YAxis(
        autorange=True,
        ticksuffix='%'
    ),
    title='Expenditure on Education (% of GDP), 1991-2017'
)

fig = go.Figure(
    layout=layout
)
        
for country in education_by_country_selected['Country Name'].unique():
    ext_pov_region = education_by_country_selected[education_by_country_selected['Country Name'] == country]
    x = ext_pov_region['Year']
    y = ext_pov_region['Expenditure on Education (% of GDP)'].fillna(method='ffill')
    fig.add_scatter(x=x, y=y, name=country, mode='lines')
    
py(fig, filename='gdp-edu-line')

### Multiple axes

In [33]:
def expenditure_edu_x_ext_poverty(country_name):
    edu_by_region = education_by_country_selected[education_by_country_selected['Country Name'] == country_name]
    ext_pov_by_region = extreme_poverty_by_country[extreme_poverty_by_country['Country Name'] == country_name]

    trace1 = go.Scatter(
        x=ext_pov_region['Year'],
        y=edu_by_region['Expenditure on Education (% of GDP)'].fillna(method='ffill'),
        name='Expenditure on Education'
    )
    
    trace2 = go.Scatter(
        x=ext_pov_region['Year'],
        y=ext_pov_by_region['Extreme Poverty(% pop)'].fillna(method='ffill'),
        name='Extreme Poverty',
        yaxis='y2'
    )

    data = [trace1, trace2]

    layout = go.Layout(
        title='Expenditure on Education x Extreme Poverty on {}'.format(country_name),
        yaxis=dict(
            title='% GDP'
        ),
        yaxis2=dict(
            title='% Population',
            overlaying='y',
            side='right'
        ),
        xaxis = go.layout.XAxis(
            title='Year',
        ),
    )
    
    fig = go.Figure(data=data, layout=layout)
    return fig

### Brazil

In [34]:
fig_brazil = expenditure_edu_x_ext_poverty('Brazil')
py(fig_brazil)

### Argentina

In [36]:
fig_argentina = expenditure_edu_x_ext_poverty('Argentina')
py(fig_argentina)

In [42]:
for i in range(len(fig_brazil.data)):
    fig_brazil.data[i].xaxis='x1'
    fig_brazil.data[i].yaxis='y1'

fig_brazil.layout.xaxis1.update({'anchor': 'y1'})
fig_brazil.layout.yaxis1.update({'anchor': 'x1', 'domain': [.55, 1]})

for i in range(len(fig_argentina.data)):
    fig_argentina.data[i].xaxis='x2'
    fig_argentina.data[i].yaxis='y2'

# initialize xaxis2 and yaxis2
fig_argentina['layout']['xaxis2'] = {}
fig_argentina['layout']['yaxis2'] = {}

fig_argentina.layout.xaxis2.update({'anchor': 'y2'})
fig_argentina.layout.yaxis2.update({'anchor': 'x2', 'domain': [0, .45]})

fig = go.Figure()
fig.add_traces([fig_brazil.data[0], fig_argentina.data[0]])
fig.add_traces([fig_brazil.data[1], fig_argentina.data[1]])

fig.layout.update(fig_brazil.layout)
fig.layout.update(fig_argentina.layout)

py(fig, filename='figure_factory_subplot')

In [44]:
py(fig_brazil)

In [43]:
py(fig_argentina)

In [40]:
fig_brazil.data[0]

Scatter({
    'name': 'Expenditure on Education',
    'uid': '84b8fa9a-067a-4756-97d6-84c2d949bece',
    'x': array(['1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998',
                '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007',
                '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
                '2017', '2018'], dtype=object),
    'y': array([       nan,        nan,        nan,        nan,        nan, 4.56816006,
                4.56816006, 4.56816006, 4.75665998, 3.80062008, 3.94893003, 3.84468007,
                3.75037003, 3.75037003, 3.97447991, 4.4790802 , 4.87060022, 4.97425985,
                5.26883984, 5.46355009, 5.6487999 , 5.73741007, 5.85510015, 5.83885002,
                5.94848013, 6.24105978, 6.24105978, 6.24105978, 6.24105978])
})

In [39]:
fig_brazil.data[1]

Scatter({
    'name': 'Extreme Poverty',
    'uid': 'faee57a8-1341-41e0-a6b8-4a1a2fabd4ac',
    'x': array(['1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998',
                '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007',
                '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
                '2017', '2018'], dtype=object),
    'y': array([21.6, 21.6, 20.9, 19.9, 19.9, 13. , 14.2, 14. , 12.7, 13.4, 13.4, 11.6,
                10.3, 11.1,  9.7,  8.6,  7.2,  6.8,  5.6,  5.4,  5.4,  4.7,  3.8,  3.8,
                 2.8,  3.4,  4.3,  4.8,  4.8]),
    'yaxis': 'y2'
})

## Resources distribuition on Brazil

In [18]:
# years = 2011, 2013, 2015
resources_dist_brazil = education_by_country[education_by_country['Country Name'] == 'Brazil']
resources_dist_brazil = resources_dist_brazil[(resources_dist_brazil['Year'] == '2009') | (resources_dist_brazil['Year'] == '2011') | (resources_dist_brazil['Year'] == '2013') | (resources_dist_brazil['Year'] == '2015')]
resources_dist_brazil.head()

Indicator Code,Country Name,Country Code,Region,Year,Primary Completion Rate (% of relevant age group),Expenditure on Education (% of GDP),"Government expenditure per student, primary (% of GDP)","Government expenditure per student, secondary (% of GDP)","Government expenditure per student, tertiary (% of GDP)",Children out of School(% primary school),Pupil Teacher ratio,Youth Literacy Rate 15-24 (% pop)
1552,Brazil,BRA,Latin America & Caribbean,2009,,5.46355,19.755871,20.112749,27.840969,3.41492,22.64411,98.065536
1554,Brazil,BRA,Latin America & Caribbean,2011,,5.73741,20.24881,21.98007,27.577471,4.30282,21.28507,98.486923
1556,Brazil,BRA,Latin America & Caribbean,2013,,5.83885,19.79739,20.71352,30.020969,2.67611,21.227489,98.736679
1558,Brazil,BRA,Latin America & Caribbean,2015,,6.24106,20.201139,21.683451,33.284069,3.14769,20.585291,98.963753


In [19]:
x_years = ['2009', '2011', '2013', '2015']

trace1 = go.Bar(
    x=x_years,
    y=resources_dist_brazil['Government expenditure per student, primary (% of GDP)'],
    name='Primary'
)
trace2 = go.Bar(
    x=x_years,
    y=resources_dist_brazil['Government expenditure per student, secondary (% of GDP)'],
    name='Secondary'
)

trace3 = go.Bar(
    x=x_years,
    y=resources_dist_brazil['Government expenditure per student, tertiary (% of GDP)'],
    name='Tertiary'
    
)

data = [trace1, trace2, trace3]
layout = go.Layout(
    title='Government expenditure per student by schollarity level on Brazil',
    barmode='group',
    yaxis = go.layout.YAxis(
        title='% of GDP',
        ticksuffix='%'
    ),
    xaxis = go.layout.XAxis(
        title='Year'
    ),
)

fig = go.Figure(data=data, layout=layout)
py(fig, filename='resources-dist-brazil-edu')

# Climate change

In [20]:
env_path = path.join(social_env_path, 'Environment', 'API_19_DS2_en_csv_v2_10515758.csv')
access_to_eletricity_by_country = createWBDataFrame(env_path)
access_to_eletricity_by_country = tideWBDataFrame(access_to_eletricity_by_country)

# Limiting data for the last 30 years
access_to_eletricity_by_country = access_to_eletricity_by_country[access_to_eletricity_by_country['Year'] > '1980']
access_to_eletricity_by_country = keepMetrics(access_to_eletricity_by_country, ['EG.ELC.ACCS.ZS', 'SH.DYN.MORT'])

# Renaming observation columns
access_to_eletricity_by_country.rename(columns={'EG.ELC.ACCS.ZS': 'Access to electricity (% of pop)',
                                                'SH.DYN.MORT': 'Mortality rate, under-5 (per 1,000 live births)'
                                               }, inplace=True)
access_to_eletricity_by_country = access_to_eletricity_by_country[access_to_eletricity_by_country['Year'] >= '1990']
access_to_eletricity_by_country = access_to_eletricity_by_country[access_to_eletricity_by_country['Region'] == 'Latin America & Caribbean']
access_to_eletricity_by_country = access_to_eletricity_by_country[access_to_eletricity_by_country['Country Name'].isin(selected_sa_countries)]

In [21]:
access_to_eletricity_by_country.head()

Indicator Code,Country Name,Country Code,Region,Year,Access to electricity (% of pop),"Mortality rate, under-5 (per 1,000 live births)"
441,Argentina,ARG,Latin America & Caribbean,1990,90.640823,28.8
442,Argentina,ARG,Latin America & Caribbean,1991,91.123672,28.3
443,Argentina,ARG,Latin America & Caribbean,1992,91.606018,27.6
444,Argentina,ARG,Latin America & Caribbean,1993,92.085304,26.5
445,Argentina,ARG,Latin America & Caribbean,1994,92.558472,25.5


In [23]:
access_by_region = access_to_eletricity_by_country[access_to_eletricity_by_country['Country Name'] == 'Chile']
ext_pov_by_region = extreme_poverty_by_country[extreme_poverty_by_country['Country Name'] == 'Chile']

trace1 = go.Scatter(
    x=ext_pov_region['Year'],
    y=access_by_region['Access to electricity (% of pop)'].fillna(method='ffill'),
    name='Access to electricity'
)
trace2 = go.Scatter(
    x=ext_pov_region['Year'],
    y=ext_pov_by_region['Extreme Poverty(% pop)'].fillna(method='ffill'),
    name='Extreme Poverty',
    yaxis='y2'
)

data = [trace1, trace2]

layout = go.Layout(
    title='Access to electricity x Extreme Poverty on Brazil',
    yaxis=dict(
        title='% Population'
    ),
    yaxis2=dict(
        title='% Population',
        overlaying='y',
        side='right'
    ),
    xaxis = go.layout.XAxis(
        title='Year',
    ),
)
fig_brazil = go.Figure(data=data, layout=layout)
py(fig_brazil, filename='gdp-edu-line')

# Mortality rate

In [24]:
np.arange(1990, 2017, step=1)

array([1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
       2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
       2012, 2013, 2014, 2015, 2016])

In [25]:
layout = go.Layout( 
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.arange(1990, 2018, step=1),
        tickangle=45,
        title='Year',
    ),
    yaxis = go.layout.YAxis(
        autorange=True,
        ticksuffix='%'
    ),
    title='Mortality rate, under-5 (per 1,000 live births), 1991-2017'
)

fig = go.Figure(
    layout=layout
)
        
for country in access_to_eletricity_by_country['Country Name'].unique():
    ate_region = access_to_eletricity_by_country[access_to_eletricity_by_country['Country Name'] == country]
    x = ate_region['Year']
    y = ate_region['Mortality rate, under-5 (per 1,000 live births)'].fillna(method='ffill')
    fig.add_scatter(x=x, y=y, name=country, mode='lines')
    
py(fig, filename='gdp-edu-line')

In [26]:
access_by_region = access_to_eletricity_by_country[access_to_eletricity_by_country['Country Name'] == 'Brazil']
ext_pov_by_region = extreme_poverty_by_country[extreme_poverty_by_country['Country Name'] == 'Brazil']

trace1 = go.Scatter(
    x=ext_pov_region['Year'],
    y=access_by_region['Mortality rate, under-5 (per 1,000 live births)'].fillna(method='ffill'),
    name='Mortality Rate'
)
trace2 = go.Scatter(
    x=ext_pov_region['Year'],
    y=ext_pov_by_region['Extreme Poverty(% pop)'].fillna(method='ffill'),
    name='Extreme Poverty',
    yaxis='y2'
)

data = [trace1, trace2]

layout = go.Layout(
    title='Mortality rate (under-5) x Extreme Poverty on Brazil',
    yaxis=dict(
        title='(under-5)'
    ),
    yaxis2=dict(
        title='% Population',
        overlaying='y',
        side='right'
    ),
    xaxis = go.layout.XAxis(
        title='Year',
    ),
)
fig_brazil = go.Figure(data=data, layout=layout)
py(fig_brazil, filename='gdp-edu-line')