In [2]:
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as sts
import numpy as np

In [47]:
## 2015 Data Cleaning
# Import 2015 World Happiness Data
data_2015 = pd.read_csv('./Resources/2015_data.csv')

# Add column indicating the year
data_2015['Year'] = '2015'

# Drop non-predictive variables
data_2015_drop = data_2015.drop(columns='Standard Error')

# Rearrange the data fields
column_order = ['Year','Country','Region','Happiness Rank','Happiness Score','Economy (GDP per Capita)','Family',
                'Health (Life Expectancy)','Freedom','Trust (Government Corruption)','Generosity','Dystopia Residual']
data_2015_order = data_2015_drop.reindex(columns = column_order)

# Trim the data set to only show top 20 countries
data_2015_clean = data_2015_order.drop(range(20,len(data_2015)))

data_2015_clean

Unnamed: 0,Year,Country,Region,Happiness Rank,Happiness Score,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,2015,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,2015,Iceland,Western Europe,2,7.561,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201
2,2015,Denmark,Western Europe,3,7.527,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,2015,Norway,Western Europe,4,7.522,1.459,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,2015,Canada,North America,5,7.427,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
5,2015,Finland,Western Europe,6,7.406,1.29025,1.31826,0.88911,0.64169,0.41372,0.23351,2.61955
6,2015,Netherlands,Western Europe,7,7.378,1.32944,1.28017,0.89284,0.61576,0.31814,0.4761,2.4657
7,2015,Sweden,Western Europe,8,7.364,1.33171,1.28907,0.91087,0.6598,0.43844,0.36262,2.37119
8,2015,New Zealand,Australia and New Zealand,9,7.286,1.25018,1.31967,0.90837,0.63938,0.42922,0.47501,2.26425
9,2015,Australia,Australia and New Zealand,10,7.284,1.33358,1.30923,0.93156,0.65124,0.35637,0.43562,2.26646


In [49]:
## 2016 Data Cleaning
# Import 2016 World Happiness Data
data_2016 = pd.read_csv('./Resources/2016_data.csv')

# Add column indicating the year
data_2016['Year'] = '2016'

# Drop non-predictive variables
data_2016_drop = data_2016.drop(columns=['Lower Confidence Interval','Upper Confidence Interval'])

# Rearrange the data fields
column_order = ['Year','Country','Region','Happiness Rank','Happiness Score','Economy (GDP per Capita)','Family',
                'Health (Life Expectancy)','Freedom','Trust (Government Corruption)','Generosity','Dystopia Residual']
data_2016_order = data_2016_drop.reindex(columns = column_order)

# Trim the data set to only show top 20 countries
data_2016_clean = data_2016_order.drop(range(20,len(data_2016)))

data_2016_clean

Unnamed: 0,Year,Country,Region,Happiness Rank,Happiness Score,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,2016,Denmark,Western Europe,1,7.526,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939
1,2016,Switzerland,Western Europe,2,7.509,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463
2,2016,Iceland,Western Europe,3,7.501,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137
3,2016,Norway,Western Europe,4,7.498,1.57744,1.1269,0.79579,0.59609,0.35776,0.37895,2.66465
4,2016,Finland,Western Europe,5,7.413,1.40598,1.13464,0.81091,0.57104,0.41004,0.25492,2.82596
5,2016,Canada,North America,6,7.404,1.44015,1.0961,0.8276,0.5737,0.31329,0.44834,2.70485
6,2016,Netherlands,Western Europe,7,7.339,1.46468,1.02912,0.81231,0.55211,0.29927,0.47416,2.70749
7,2016,New Zealand,Australia and New Zealand,8,7.334,1.36066,1.17278,0.83096,0.58147,0.41904,0.49401,2.47553
8,2016,Australia,Australia and New Zealand,9,7.313,1.44443,1.10476,0.8512,0.56837,0.32331,0.47407,2.5465
9,2016,Sweden,Western Europe,10,7.291,1.45181,1.08764,0.83121,0.58218,0.40867,0.38254,2.54734


In [58]:
## 2017 Data Cleaning
# Import 2017 World Happiness Data
data_2017 = pd.read_csv('./Resources/2017_data.csv')

# Add column indicating the year
data_2017['Year'] = '2017'

# Rename data fields to match prior years
data_2017_rename_1 = data_2017.rename(columns={'Happiness.Rank': 'Happiness Rank'})
data_2017_rename_2 = data_2017_rename_1.rename(columns={'Happiness.Score': 'Happiness Score'})
data_2017_rename_3 = data_2017_rename_2.rename(columns={'Economy..GDP.per.Capita.': 'Economy (GDP per Capita)'})
data_2017_rename_4 = data_2017_rename_3.rename(columns={'Health..Life.Expectancy.': 'Health (Life Expectancy)'})
data_2017_rename_5 = data_2017_rename_4.rename(columns={'Trust..Government.Corruption.': 'Dystopia Residual'})
data_2017_rename_6 = data_2017_rename_5.rename(columns={'Dystopia.Residual': 'Trust (Government Corruption)'})
                            

# Drop non-predictive variables
data_2017_drop = data_2017_rename_6.drop(columns=['Whisker.high','Whisker.low'])

# Rearrange the data fields
column_order = ['Year','Country','Happiness Rank','Happiness Score','Economy (GDP per Capita)','Family',
                'Health (Life Expectancy)','Freedom','Trust (Government Corruption)','Generosity','Dystopia Residual']
data_2017_order = data_2017_drop.reindex(columns = column_order)

# Trim the data set to only show top 20 countries
data_2017_clean = data_2017_order.drop(range(20,len(data_2017)))

data_2017_clean

Unnamed: 0,Year,Country,Happiness Rank,Happiness Score,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,2017,Norway,1,7.537,1.616463,1.533524,0.796667,0.635423,2.277027,0.362012,0.315964
1,2017,Denmark,2,7.522,1.482383,1.551122,0.792566,0.626007,2.313707,0.35528,0.40077
2,2017,Iceland,3,7.504,1.480633,1.610574,0.833552,0.627163,2.322715,0.47554,0.153527
3,2017,Switzerland,4,7.494,1.56498,1.516912,0.858131,0.620071,2.276716,0.290549,0.367007
4,2017,Finland,5,7.469,1.443572,1.540247,0.809158,0.617951,2.430182,0.245483,0.382612
5,2017,Netherlands,6,7.377,1.503945,1.428939,0.810696,0.585384,2.294804,0.47049,0.282662
6,2017,Canada,7,7.316,1.479204,1.481349,0.834558,0.611101,2.187264,0.43554,0.287372
7,2017,New Zealand,8,7.314,1.405706,1.548195,0.81676,0.614062,2.046456,0.500005,0.382817
8,2017,Sweden,9,7.284,1.494387,1.478162,0.830875,0.612924,2.097538,0.385399,0.384399
9,2017,Australia,10,7.284,1.484415,1.510042,0.843887,0.601607,2.065211,0.477699,0.301184
