## GDP and Internet Usage Project

#### Import Pandas, Numpy, Matploytlib, Seaborn

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

#### Read CSVs In Pandas

In [2]:
gdp_df = pd.read_csv("data/gdp_percapita.csv")
gdp_df.head(6)

Unnamed: 0,Country or Area,Year,Value,Value Footnotes
0,Afghanistan,2018,2190.240321,
1,Afghanistan,2017,2202.570851,
2,Afghanistan,2016,2200.764487,
3,Afghanistan,2015,2212.750899,
4,Afghanistan,2014,2249.253738,
5,Afghanistan,2013,2264.317575,


In [3]:
internet_df = pd.read_csv("data/internet_use.csv")
internet_df.head(6)

Unnamed: 0,Country or Area,Year,Value,Value Footnotes
0,Afghanistan,2014,6.39,
1,Afghanistan,2013,5.9,
2,Afghanistan,2012,5.45454545454545,
3,Afghanistan,2011,5.0,
4,Afghanistan,2010,4.0,
5,Afghanistan,2009,3.55,


#### Check Shapes of DataFrames

In [4]:
gdp_df.shape

(6608, 4)

In [5]:
internet_df.shape

(4676, 4)

#### Check DataFrame Types

In [6]:
gdp_df.dtypes

Country or Area     object
Year                object
Value              float64
Value Footnotes    float64
dtype: object

In [7]:
internet_df.dtypes

Country or Area    object
Year               object
Value              object
Value Footnotes    object
dtype: object

#### Check Tails

In [8]:
gdp_df.tail(10)

Unnamed: 0,Country or Area,Year,Value,Value Footnotes
6598,Zimbabwe,1997,3580.048793,
6599,Zimbabwe,1996,3520.430146,
6600,Zimbabwe,1995,3226.41393,
6601,Zimbabwe,1994,3263.934978,
6602,Zimbabwe,1993,3033.504852,
6603,Zimbabwe,1992,3054.889178,
6604,Zimbabwe,1991,3426.598094,
6605,Zimbabwe,1990,3324.348171,
6606,footnoteSeqID,Footnote,,
6607,2,"Excludes South Sudan after July 9, 2011.",,


In [9]:
internet_df.tail(10)

Unnamed: 0,Country or Area,Year,Value,Value Footnotes
4666,171,Internet Dial-up customers.,,
4667,172,Population age 16+ using the Internet in the l...,,
4668,173,Population age 16+ using internet in the last ...,,
4669,174,"U.S. Census Bureau, Table 2. Reported Internet...",,
4670,175,Includes individuals 3 years and older,,
4671,176,NTIA/CPS survey.,,
4672,177,"Pew Internet Project, Internet",Broadband and cell phone statistics,” January 5 2010 http://www.pewinternet.org/~/...
4673,178,Estimated based on Survey's results. Populatio...,,
4674,179,Preliminary. Country estimate.,,
4675,180,The methodology used to estimated the figure f...,,


#### Remove "Value Footnotes" Column

In [10]:
gdp_df = gdp_df.drop('Value Footnotes', 1)

In [11]:
internet_df = internet_df.drop('Value Footnotes', 1)

#### Change Column Names

In [12]:
gdp_df.columns = ['Country', 'Year', 'GDP_Per_Capita']

In [13]:
internet_df.columns = ['Country', 'Year', 'Internet_Users_Pct']

#### Merge ALL ROWS from DataFrames

In [14]:
gdp_and_internet_use = pd.merge(gdp_df, internet_df)

In [15]:
gdp_and_internet_use.tail(10)

Unnamed: 0,Country,Year,GDP_Per_Capita,Internet_Users_Pct
3428,Zimbabwe,2001,3510.893992,0.799846045633137
3429,Zimbabwe,2000,3473.429801,0.401433535211582
3430,Zimbabwe,1999,3600.849291,0.161675527563414
3431,Zimbabwe,1998,3653.920016,0.0816484762026535
3432,Zimbabwe,1997,3580.048793,0.033080326724463
3433,Zimbabwe,1996,3520.430146,0.016790483892511
3434,Zimbabwe,1995,3226.41393,0.0076835399195328
3435,Zimbabwe,1994,3263.934978,0.0017392795590995
3436,Zimbabwe,1990,3324.348171,0.0
3437,footnoteSeqID,Footnote,,


###### Dropped Last Row

In [16]:
gdp_and_internet_use = gdp_and_internet_use.drop(3437,0)

#### Subset Data to Only Include 2004, 2009, 2014

In [26]:
gdp_and_internet_use = gdp_and_internet_use[(gdp_and_internet_use.Year == '2004') | (gdp_and_internet_use.Year == '2009') | (gdp_and_internet_use.Year == '2014')]

In [27]:
gdp_and_internet_use

Unnamed: 0,Country,Year,GDP_Per_Capita,Internet_Users_Pct
0,Afghanistan,2014,2249.253738,6.39
5,Afghanistan,2009,1881.777626,3.55
10,Afghanistan,2004,1284.127338,0.105809030021958
13,Albania,2014,11828.232264,60.1
18,Albania,2009,10528.778102,41.2
...,...,...,...,...
3398,Zambia,2009,2917.522504,6.31
3403,Zambia,2004,2261.771409,2.01354953218533
3415,Zimbabwe,2014,2955.240287,19.89
3420,Zimbabwe,2009,1925.364381,11.36


#### Create 3 New DataFrame for 2004, 2009, 2014

In [37]:
gdp_and_internet_use_2004= gdp_and_internet_use[gdp_and_internet_use.Year == '2004'] 
gdp_and_internet_use_2009 = gdp_and_internet_use[gdp_and_internet_use.Year == '2009'] 
gdp_and_internet_use_2014 = gdp_and_internet_use[gdp_and_internet_use.Year == '2014'] 

In [38]:
gdp_and_internet_use_2004.groupby[.Internet_Users_Pct.max()

'9.8'

In [50]:
gdp_and_internet_use_2009.iloc[gdp_and_internet_use_2009['Internet_Users_Pct'].idxmax()]

TypeError: reduction operation 'argmax' not allowed for this dtype

In [40]:
gdp_and_internet_use_2014.Internet_Users_Pct.max()

'98.16'

In [42]:
gdp_and_internet_use_2004.sort_values("Internet_Users_Pct", ascending=False)

Unnamed: 0,Country,Year,GDP_Per_Capita,Internet_Users_Pct
343,Belize,2004,7327.474691,9.8
691,Colombia,2004,9814.268838,9.1186903029666
1699,Lebanon,2004,14283.959712,9
2964,Sweden,2004,45029.814344,83.89
1370,Iceland,2004,45380.965464,83.88
...,...,...,...,...
998,Ethiopia,2004,793.093786,0.155334520794722
10,Afghanistan,2004,1284.127338,0.105809030021958
3014,Tajikistan,2004,1737.743630,0.0774799071356825
1738,Liberia,2004,1181.546158,0.0310111848040233


In [43]:
gdp_and_internet_use_2004.groupby(['Country','Year'])['Internet_Users_Pct'].max()

Country              Year
Afghanistan          2004    0.105809030021958
Albania              2004     2.42038779776014
Algeria              2004     4.63447508776537
Angola               2004    0.464814617985909
Antigua and Barbuda  2004     24.2665437161785
                                   ...        
Uruguay              2004     17.0630983403661
Uzbekistan           2004     2.59372542084924
Vanuatu              2004     4.74660261917533
Zambia               2004     2.01354953218533
Zimbabwe             2004      6.5640450271075
Name: Internet_Users_Pct, Length: 156, dtype: object

In [51]:
# Ian's Code
gdp_and_internet_use_2004[gdp_and_internet_use_2004['Internet_Users_Pct']==gdp_and_internet_use_2004['Internet_Users_Pct'].max()]

Unnamed: 0,Country,Year,GDP_Per_Capita,Internet_Users_Pct
343,Belize,2004,7327.474691,9.8
