### Lucas Leal Vale

### Pergunta: Como o acesso a internet influenciou no desenvolvimento do IDH dos BRICS?

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import math as math
from scipy import stats
import matplotlib.pyplot as plt

#### Lendo os dataframes

In [2]:
Cel = pd.read_excel('cell_phones_total.xlsx')
Pc = pd.read_excel('personal_computers_total.xlsx')
Idh = pd.read_excel('Indicator_HDI.xlsx')
Int = pd.read_excel('internet_users.xlsx')

#### Visualizando os dataframes que serao utilizados

In [3]:
Cel.head()

Unnamed: 0,geo,1960,1961,1962,1963,1964,1965,1966,1967,1968,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,Afghanistan,0.0,,,,,0.0,,,,...,4670000.0,7900000.0,10500000.0,10200000.0,13800000.0,15300000.0,16800000.0,18400000.0,19700000,21600000.0
1,Albania,0.0,,,,,0.0,,,,...,2320000.0,1860000.0,2460000.0,2690000.0,3100000.0,3500000.0,3690000.0,3360000.0,3400000,3370000.0
2,Algeria,0.0,,,,,0.0,,,,...,27600000.0,27000000.0,32700000.0,32800000.0,35600000.0,37500000.0,39500000.0,43300000.0,43200000,47000000.0
3,Andorra,0.0,,,,,0.0,,,,...,63500.0,64200.0,64500.0,65500.0,65000.0,63900.0,63900.0,66200.0,71300,71100.0
4,Angola,0.0,,,,,0.0,,,,...,4960000.0,6770000.0,8110000.0,9400000.0,12100000.0,12800000.0,13300000.0,14100000.0,13900000,13000000.0


In [4]:
Pc.head()

Unnamed: 0,geo,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006
0,Afghanistan,,,,,,,,,,,,,,,,85000.0,100000.0
1,Albania,,,,,,,5000.0,10000.0,15000.0,20000.0,25000.0,30000.0,36000.0,42000.0,48000.0,54000.0,120000.0
2,Algeria,25000.0,39000.0,48000.0,60000.0,72000.0,85000.0,130000.0,140000.0,160000.0,180000.0,200000.0,220000.0,242000.0,265000.0,290000.0,350000.0,
3,Angola,,,,,,,,8000.0,10000.0,12000.0,15000.0,17000.0,27000.0,,,93400.0,111000.0
4,Antigua and Barbuda,,,,,,,,,,,,8000.0,9000.0,10000.0,11000.0,12100.0,17500.0


In [5]:
Int.head()

Unnamed: 0,geo,1960,1961,1962,1963,1964,1965,1966,1967,1968,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,Afghanistan,,,,,,,,,,...,1.9,1.84,3.55,4.0,5.0,5.45,5.9,7.0,8.26,10.6
1,Albania,,,,,,,,,,...,15.0,23.9,41.2,45.0,49.0,54.7,57.2,60.1,63.3,66.4
2,Algeria,,,,,,,,,,...,9.45,10.2,11.2,12.5,14.9,18.2,22.5,29.5,38.2,42.9
3,Andorra,,,,,,,,,,...,70.9,70.0,78.5,81.0,81.0,86.4,94.0,95.9,96.9,97.9
4,Angola,,,,,,,,,,...,1.7,1.9,2.3,2.8,3.1,6.5,8.9,10.2,12.4,13.0


#### Reindexando os dataframes para evitar erros devido ao index numerico

In [6]:
Cel_idx = Cel.set_index('geo')
Pc_idx = Pc.set_index('geo')
Idh_idx = Idh.set_index('HDI')
Int_idx = Int.set_index('geo')

#### Adicionando sufixo nas colunas para que nao haja colisao durante a juncao dos dataframes

In [7]:
#Idh_idx= Idh_idx.rename(index=str, columns=lambda column: "IDH_" + column)
Idh_idx = Idh_idx.add_suffix('_IDH')
Cel_idx = Cel_idx.add_suffix('_Cel')
Pc_idx = Pc_idx.add_suffix('_Pc')
Int_idx = Int_idx.add_suffix('_Int')


#### Juntando os dataframes em um unico, note que juntei os tres usando o dataframe de IDH como base.

In [8]:
Idh_idx = Idh_idx.join(Cel_idx, how="inner")

In [9]:
Idh_idx = Idh_idx.join(Int_idx, how="inner")

In [10]:
df=Idh_idx.join(Pc_idx, how="inner")

In [11]:
df.head()

Unnamed: 0,1980_IDH,1990_IDH,2000_IDH,2005_IDH,2006_IDH,2007_IDH,2008_IDH,2009_IDH,2011_IDH,1960_Cel,...,1997_Pc,1998_Pc,1999_Pc,2000_Pc,2001_Pc,2002_Pc,2003_Pc,2004_Pc,2005_Pc,2006_Pc
Afghanistan,0.198,0.246,0.23,0.34,0.354,0.363,0.37,0.387,0.398,0.0,...,,,,,,,,,85000.0,100000.0
Albania,,0.656,0.691,0.721,0.724,0.729,0.733,0.734,0.739,0.0,...,10000.0,15000.0,20000.0,25000.0,30000.0,36000.0,42000.0,48000.0,54000.0,120000.0
Algeria,0.454,0.551,0.624,0.667,0.673,0.68,0.686,0.691,0.698,0.0,...,140000.0,160000.0,180000.0,200000.0,220000.0,242000.0,265000.0,290000.0,350000.0,
Angola,,,0.384,0.445,0.459,0.471,0.476,0.481,0.486,0.0,...,8000.0,10000.0,12000.0,15000.0,17000.0,27000.0,,,93400.0,111000.0
Antigua and Barbuda,,,,,,,,,0.764,0.0,...,,,,,8000.0,9000.0,10000.0,11000.0,12100.0,17500.0
