In [1]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np

pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
# Import GDP
gdp_file = "Resources/World_GDP/gdp_per_capita.csv"
gdp_df = pd.read_csv(gdp_file)
gdp_df.head()

Unnamed: 0,Country,Subject Descriptor,Units,Scale,Country/Series-specific Notes,2015,Estimates Start After
0,Afghanistan,"Gross domestic product per capita, current prices",U.S. dollars,Units,"See notes for: Gross domestic product, curren...",599.99,2013.0
1,Albania,"Gross domestic product per capita, current prices",U.S. dollars,Units,"See notes for: Gross domestic product, curren...",3995.38,2010.0
2,Algeria,"Gross domestic product per capita, current prices",U.S. dollars,Units,"See notes for: Gross domestic product, curren...",4318.14,2014.0
3,Angola,"Gross domestic product per capita, current prices",U.S. dollars,Units,"See notes for: Gross domestic product, curren...",4100.32,2014.0
4,Antigua and Barbuda,"Gross domestic product per capita, current prices",U.S. dollars,Units,"See notes for: Gross domestic product, curren...",14414.3,2011.0


In [3]:
# Clean GDP
gdp_cols = ['Country', '2015']
gdp_trans = gdp_df[gdp_cols].copy()
gdp_trans = gdp_trans.rename(columns={"Country": "country",
                                      "2015": 'gdp'})

gdp_trans.head()

Unnamed: 0,country,gdp
0,Afghanistan,599.99
1,Albania,3995.38
2,Algeria,4318.14
3,Angola,4100.32
4,Antigua and Barbuda,14414.3


In [4]:
world_happiness = "./Resources/World_Happiness_Data/2015.csv"
world_happiness_df = pd.read_csv(world_happiness)
world_happiness_df.head()

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201
2,Denmark,Western Europe,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,Norway,Western Europe,4,7.522,0.0388,1.459,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,Canada,North America,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176


In [5]:
# Find just the happiness dataset countries to compare to GDP
wh_country = ["Country"]
wh_trans_df = world_happiness_df[wh_country].copy()
result_hap = wh_trans_df.sort_values(['Country'])
result_hap = result_hap.rename(columns={"Country": 'country'})
result_hap = result_hap.reset_index()
del result_hap['index']
result_hap.head()

Unnamed: 0,country
0,Afghanistan
1,Albania
2,Algeria
3,Angola
4,Argentina


In [6]:
gdp_count = ["country"]
result_gdp_df = gdp_trans[gdp_count].copy()
result_gdp_df.head()

Unnamed: 0,country
0,Afghanistan
1,Albania
2,Algeria
3,Angola
4,Antigua and Barbuda


In [7]:
# Finding unique countries between the two dataframes
merge_df = pd.concat([result_hap, result_gdp_df]).drop_duplicates(keep=False)
merge_df.head()

Unnamed: 0,country
31,Congo (Brazzaville)
32,Congo (Kinshasa)
56,Hong Kong
61,Iran
66,Ivory Coast


In [8]:
merge_df.describe()

Unnamed: 0,country
count,59
unique,59
top,Guyana
freq,1


In [9]:
# Create a filtered dataframe from specific columns
happiness_2015 = ["Country", "Happiness Rank"]
happiness_transformed= world_happiness_df[happiness_2015].copy()

# Rename the column headers
happiness_transformed = happiness_transformed.rename(columns={"Country": "country",
                                                          "Happiness Rank": "happiness_rank",
                                                          })

happiness_transformed.head()

Unnamed: 0,country,happiness_rank
0,Switzerland,1
1,Iceland,2
2,Denmark,3
3,Norway,4
4,Canada,5


In [10]:
happiness_transformed.describe()

Unnamed: 0,happiness_rank
count,158.0
mean,79.493671
std,45.754363
min,1.0
25%,40.25
50%,79.5
75%,118.75
max,158.0


In [11]:
merged_df = pd.merge(happiness_transformed, gdp_trans, on="country")
merged_df.head()

Unnamed: 0,country,happiness_rank,gdp
0,Switzerland,1,80675.31
1,Iceland,2,50854.58
2,Denmark,3,52114.17
3,Norway,4,74822.11
4,Canada,5,43331.96


In [12]:
# Check for any null values in GDP
check = merged_df['gdp'].isnull()
check_df = merged_df[check]
check_df

Unnamed: 0,country,happiness_rank,gdp
64,Kosovo,69,
141,Syria,156,


In [13]:
cleaned_gdp_df = merged_df.dropna()
cleaned_gdp_df = cleaned_gdp_df.reset_index(drop=True) 
cleaned_gdp_df.head()

Unnamed: 0,country,happiness_rank,gdp
0,Switzerland,1,80675.31
1,Iceland,2,50854.58
2,Denmark,3,52114.17
3,Norway,4,74822.11
4,Canada,5,43331.96


In [14]:
cleaned_gdp_df.describe()

Unnamed: 0,happiness_rank,gdp
count,142.0,142.0
mean,78.021127,13691.567676
std,46.544186,18866.58265
min,1.0,305.78
25%,36.25,1491.8125
50%,78.5,5740.855
75%,118.75,16879.7375
max,158.0,101994.09


In [15]:
original_order = cleaned_gdp_df.reset_index()
original_order['index'] = original_order['index']+1
#og_order = og_order.set_index('index')
#original_order = original_order[['index']]
del original_order['happiness_rank']
original_order= original_order.rename(columns={'index': 'happiness_rank'})
original_order = original_order[['country', 'happiness_rank', 'gdp']]
original_order

Unnamed: 0,country,happiness_rank,gdp
0,Switzerland,1,80675.31
1,Iceland,2,50854.58
2,Denmark,3,52114.17
3,Norway,4,74822.11
4,Canada,5,43331.96
...,...,...,...
137,Afghanistan,138,599.99
138,Rwanda,139,731.51
139,Benin,140,780.06
140,Burundi,141,305.78


In [16]:
original_happiness_rank = original_order[['happiness_rank']]
original_happiness_rank

Unnamed: 0,happiness_rank
0,1
1,2
2,3
3,4
4,5
...,...
137,138
138,139
139,140
140,141


In [17]:
cleaned_gdp_df.dtypes

country            object
happiness_rank      int64
gdp               float64
dtype: object

In [18]:
# Sort by GDP
#gdp_sort = cleaned_gdp_df.sort_values(['gdp'], ascending=False)
gdp_sort = original_order.sort_values(['gdp'], ascending=False)
gdp_sort = gdp_sort.reset_index()
del gdp_sort['index']
gdp_sort
#gdp_sort.dtypes

Unnamed: 0,country,happiness_rank,gdp
0,Luxembourg,17,101994.09
1,Switzerland,1,80675.31
2,Qatar,28,76576.08
3,Norway,4,74822.11
4,United States,15,55805.20
...,...,...,...
137,Niger,130,405.21
138,Madagascar,133,401.77
139,Malawi,118,354.28
140,Central African Republic,134,334.87


In [19]:
# Add gdp_rank
gdp_rank = gdp_sort
gdp_rank['gdp_rank'] = gdp_rank.index +1
gdp_rank

Unnamed: 0,country,happiness_rank,gdp,gdp_rank
0,Luxembourg,17,101994.09,1
1,Switzerland,1,80675.31,2
2,Qatar,28,76576.08,3
3,Norway,4,74822.11,4
4,United States,15,55805.20,5
...,...,...,...,...
137,Niger,130,405.21,138
138,Madagascar,133,401.77,139
139,Malawi,118,354.28,140
140,Central African Republic,134,334.87,141


In [20]:
#change gdp to currency
gdp_mon = gdp_rank
gdp_mon['gdp'] = gdp_mon[['gdp']].applymap('${:,.2f}'.format)
gdp_mon

Unnamed: 0,country,happiness_rank,gdp,gdp_rank
0,Luxembourg,17,"$101,994.09",1
1,Switzerland,1,"$80,675.31",2
2,Qatar,28,"$76,576.08",3
3,Norway,4,"$74,822.11",4
4,United States,15,"$55,805.20",5
...,...,...,...,...
137,Niger,130,$405.21,138
138,Madagascar,133,$401.77,139
139,Malawi,118,$354.28,140
140,Central African Republic,134,$334.87,141
