In [1]:
import numpy as np
import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot as pyof
import matplotlib.pyplot as plt

from os import path

data_path = 'data'

In [2]:
init_notebook_mode(connected=True)

In [3]:
def createWBDataFrame(path):
    df = pd.read_csv(path, header=2, encoding='utf-8')
    df = pd.merge(df, country_metadata, how='left', on=['Country Code'])
    return df.drop(['Indicator Name', 'Unnamed: 63'], axis=1)

def tideWBDataFrame(df):
    df = pd.melt(df, id_vars=['Country Name', 'Country Code', 'Region', 'Indicator Code'], var_name='Year', value_name='observation')
    df = pd.pivot_table(df, columns=['Indicator Code'], index=['Country Name', 'Country Code', 'Region', 'Year'], values='observation', aggfunc=np.mean)
    return df.reset_index()

def keepMetrics(df, metrics):
    standard_columns = ['Country Name', 'Country Code', 'Region', 'Year']
    return df.loc[:, standard_columns + metrics]

In [4]:
country_metadata = pd.read_csv(path.join(data_path, 'GDP_BY_COUNTRY', 'Metadata_Country_API_NY.GDP.MKTP.CD_DS2_en_csv_v2_10515210.csv'))
country_metadata.drop(['IncomeGroup', 'SpecialNotes', 'TableName', 'Unnamed: 5'], axis=1, inplace=True)

## Education

In [53]:
social_env_path = path.join(data_path, 'SOCIAL_ENV_ECON_FACTORS_BY_COUNTRY')

edbc_path = path.join(social_env_path, 'Education', 'API_4_DS2_en_csv_v2_10577018.csv')
education_by_country = createWBDataFrame(edbc_path)
education_by_country = tideWBDataFrame(education_by_country)
education_by_country = keepMetrics(education_by_country, ['SE.PRM.CMPT.ZS', 
                                                          'SE.XPD.TOTL.GD.ZS',
                                                          'SE.XPD.PRIM.PC.ZS',
                                                          'SE.XPD.SECO.PC.ZS',
                                                          'SE.XPD.TERT.PC.ZS',
                                                          'SE.PRM.UNER.ZS',
                                                          'SE.PRM.ENRL.TC.ZS', 
                                                          'SE.ADT.1524.LT.ZS', 
                                                          ])

education_by_country.rename(columns={'SE.PRM.CMPT.ZS': 'Primary Completion Rate (% of relevant age group)', 
                                     'SE.XPD.PRIM.PC.ZS': 'Government expenditure per student, primary (% of GDP)',
                                     'SE.XPD.SECO.PC.ZS': 'Government expenditure per student, secondary (% of GDP)',
                                     'SE.XPD.TERT.PC.ZS': 'Government expenditure per student, tertiary (% of GDP)',
                                     'SE.PRM.UNER.ZS': 'Children out of School(% primary school)',  
                                     'SE.PRM.ENRL.TC.ZS': 'Pupil Teacher ratio', 
                                     'SE.ADT.1524.LT.ZS': 'Youth Literacy Rate 15-24 (% pop)', 
                                     'SE.XPD.TOTL.GD.ZS': 'Expenditure on Education (% of GDP)'}, inplace=True)
education_by_country = education_by_country[education_by_country['Year'] >= '1990']

In [54]:
education_by_country = education_by_country[education_by_country['Region'] == 'Latin America & Caribbean']
education_by_country.head()

Indicator Code,Country Name,Country Code,Region,Year,Primary Completion Rate (% of relevant age group),Expenditure on Education (% of GDP),"Government expenditure per student, primary (% of GDP)","Government expenditure per student, secondary (% of GDP)","Government expenditure per student, tertiary (% of GDP)",Children out of School(% primary school),Pupil Teacher ratio,Youth Literacy Rate 15-24 (% pop)
363,Antigua and Barbuda,ATG,Latin America & Caribbean,1990,,,,,,,,
364,Antigua and Barbuda,ATG,Latin America & Caribbean,1991,,,,,,,,
365,Antigua and Barbuda,ATG,Latin America & Caribbean,1992,83.785881,,,,,,,
366,Antigua and Barbuda,ATG,Latin America & Caribbean,1993,,,,,,,,
367,Antigua and Barbuda,ATG,Latin America & Caribbean,1994,,,,,,,,


In [7]:
sa_countries = ['Argentina', 'Bolivia', 'Brazil', 'Chile', 'Colombia', 'Ecuador', 'Guyana', 'Paraguay', 'Peru', 'Uruguay', 'Venezuela, RB', 'Suriname']
education_by_country = education_by_country[education_by_country['Country Name'].isin(sa_countries)]
education_by_country.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 348 entries, 422 to 11849
Data columns (total 12 columns):
Country Name                                                348 non-null object
Country Code                                                348 non-null object
Region                                                      348 non-null object
Year                                                        348 non-null object
Primary Completion Rate (% of relevant age group)           212 non-null float64
Expenditure on Education (% of GDP)                         184 non-null float64
Government expenditure per student, primary (% of GDP)      140 non-null float64
Government expenditure per student, secondary (% of GDP)    134 non-null float64
Government expenditure per student, tertiary (% of GDP)     102 non-null float64
Children out of School(% primary school)                    215 non-null float64
Pupil Teacher ratio                                         211 non-null float64
Yout

In [56]:
education_by_country[education_by_country['Country Name'] == 'Brazil'].head()

Indicator Code,Country Name,Country Code,Region,Year,Primary Completion Rate (% of relevant age group),Expenditure on Education (% of GDP),"Government expenditure per student, primary (% of GDP)","Government expenditure per student, secondary (% of GDP)","Government expenditure per student, tertiary (% of GDP)",Children out of School(% primary school),Pupil Teacher ratio,Youth Literacy Rate 15-24 (% pop)
1533,Brazil,BRA,Latin America & Caribbean,1990,,,,,,,,
1534,Brazil,BRA,Latin America & Caribbean,1991,,,,,,,,
1535,Brazil,BRA,Latin America & Caribbean,1992,,,,,,,,
1536,Brazil,BRA,Latin America & Caribbean,1993,,,,,,,,
1537,Brazil,BRA,Latin America & Caribbean,1994,,,,,,,,


In [22]:
layout = go.Layout( 
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.arange(1990, 2017, step=1),
        tickangle=45,
        title='Year',
    ),
    yaxis = go.layout.YAxis(
        autorange=True,
        ticksuffix='%'
    ),
    title='Expenditure on Education (% of GDP), 1991-2017'
)

fig = go.Figure(
    layout=layout
)
        
for country in education_by_country['Country Name'].unique():
    ext_pov_region = education_by_country[education_by_country['Country Name'] == country]
#     ext_pov_region = ext_pov_region.fillna('ffill')
    x = ext_pov_region['Year']
    y = ext_pov_region['Expenditure on Education (% of GDP)'].fillna(method='ffill')
    fig.add_scatter(x=x, y=y, name=country, mode='lines')
    
py.iplot(fig, filename='gdp-edu-line')

In [62]:
selected_sa_countries = ['Argentina', 'Brazil', 'Chile', 'Uruguay']
education_by_country_selected = education_by_country[education_by_country['Country Name'].isin(selected_sa_countries)]

In [63]:
layout = go.Layout( 
    xaxis = go.layout.XAxis(
        tickmode='array',
        tickvals=np.arange(1990, 2017, step=1),
        tickangle=45,
        title='Year',
    ),
    yaxis = go.layout.YAxis(
        autorange=True,
        ticksuffix='%'
    ),
    title='Expenditure on Education (% of GDP), 1991-2017'
)

fig = go.Figure(
    layout=layout
)
        
for country in education_by_country_selected['Country Name'].unique():
    ext_pov_region = education_by_country_selected[education_by_country_selected['Country Name'] == country]
#     ext_pov_region = ext_pov_region.fillna('ffill')
    x = ext_pov_region['Year']
    y = ext_pov_region['Expenditure on Education (% of GDP)'].fillna(method='ffill')
    fig.add_scatter(x=x, y=y, name=country, mode='lines')
    
py.iplot(fig, filename='gdp-edu-line')

## Resources distribuition on Brazil

In [46]:
# years = 2011, 2013, 2015
resources_dist_brazil = education_by_country[education_by_country['Country Name'] == 'Brazil']
resources_dist_brazil = resources_dist_brazil[(resources_dist_brazil['Year'] == '2015') | (resources_dist_brazil['Year'] == '2013') | (resources_dist_brazil['Year'] == '2011') | (resources_dist_brazil['Year'] == '2009')]
resources_dist_brazil.head()

Indicator Code,Country Name,Country Code,Region,Year,Primary Completion Rate (% of relevant age group),Expenditure on Education (% of GDP),"Government expenditure per student, primary (% of GDP)","Government expenditure per student, secondary (% of GDP)","Government expenditure per student, tertiary (% of GDP)",Children out of School(% primary school),Pupil Teacher ratio,Youth Literacy Rate 15-24 (% pop)
1552,Brazil,BRA,Latin America & Caribbean,2009,,5.46355,19.755871,20.112749,27.840969,3.41492,22.64411,98.065536
1554,Brazil,BRA,Latin America & Caribbean,2011,,5.73741,20.24881,21.98007,27.577471,4.30282,21.28507,98.486923
1556,Brazil,BRA,Latin America & Caribbean,2013,,5.83885,19.79739,20.71352,30.020969,2.67611,21.227489,98.736679
1558,Brazil,BRA,Latin America & Caribbean,2015,,6.24106,20.201139,21.683451,33.284069,3.14769,20.585291,98.963753


In [47]:
x_years = ['2009', '2011', '2013', '2015']

trace1 = go.Bar(
    x=x_years,
    y=resources_dist_brazil['Government expenditure per student, primary (% of GDP)'],
    name='Primary'
)
trace2 = go.Bar(
    x=x_years,
    y=resources_dist_brazil['Government expenditure per student, secondary (% of GDP)'],
    name='Secondary'
)

trace3 = go.Bar(
    x=x_years,
    y=resources_dist_brazil['Government expenditure per student, tertiary (% of GDP)'],
    name='Tertiary'
    
)

data = [trace1, trace2, trace3]
layout = go.Layout(
    title='Government expenditure per student by schollarity level on Brazil',
    barmode='group',
    yaxis = go.layout.YAxis(
        title='% of GDP',
        ticksuffix='%'
    ),
    xaxis = go.layout.XAxis(
        title='Year'
    ),
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='resources-dist-brazil-edu')