In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as sts
import numpy as np

In [2]:
## 2015 Data Cleaning
# Import 2015 World Happiness Data
data_2015 = pd.read_csv('happines')

# Add column indicating the year
data_2015['Year'] = '2015'

# Drop non-predictive variables
data_2015_drop = data_2015.drop(columns='Standard Error')

# Rearrange the data fields
column_order = ['Year','Country','Region','Happiness Rank','Happiness Score','Economy (GDP per Capita)','Family',
                'Health (Life Expectancy)','Freedom','Trust (Government Corruption)','Generosity','Dystopia Residual']
data_2015_clean = data_2015_drop.reindex(columns = column_order)

data_2015_clean

Unnamed: 0,Year,Country,Region,Happiness Rank,Happiness Score,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,2015,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,2015,Iceland,Western Europe,2,7.561,1.30232,1.40223,0.94784,0.62877,0.14145,0.43630,2.70201
2,2015,Denmark,Western Europe,3,7.527,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,2015,Norway,Western Europe,4,7.522,1.45900,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,2015,Canada,North America,5,7.427,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
...,...,...,...,...,...,...,...,...,...,...,...,...
153,2015,Rwanda,Sub-Saharan Africa,154,3.465,0.22208,0.77370,0.42864,0.59201,0.55191,0.22628,0.67042
154,2015,Benin,Sub-Saharan Africa,155,3.340,0.28665,0.35386,0.31910,0.48450,0.08010,0.18260,1.63328
155,2015,Syria,Middle East and Northern Africa,156,3.006,0.66320,0.47489,0.72193,0.15684,0.18906,0.47179,0.32858
156,2015,Burundi,Sub-Saharan Africa,157,2.905,0.01530,0.41587,0.22396,0.11850,0.10062,0.19727,1.83302


In [8]:
## 2016 Data Cleaning
# Import 2016 World Happiness Data
data_2016 = pd.read_csv('happines')

# Add column indicating the year
data_2016['Year'] = '2016'

# Drop non-predictive variables
data_2016_drop = data_2016.drop(columns=['Standard Error'])

# Rearrange the data fields
column_order = ['Year','Country','Region','Happiness Rank','Happiness Score','Economy (GDP per Capita)','Family',
                'Health (Life Expectancy)','Freedom','Trust (Government Corruption)','Generosity','Dystopia Residual']
data_2016_clean = data_2016_drop.reindex(columns = column_order)

data_2016_clean

Unnamed: 0,Year,Country,Region,Happiness Rank,Happiness Score,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,2016,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,2016,Iceland,Western Europe,2,7.561,1.30232,1.40223,0.94784,0.62877,0.14145,0.43630,2.70201
2,2016,Denmark,Western Europe,3,7.527,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,2016,Norway,Western Europe,4,7.522,1.45900,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,2016,Canada,North America,5,7.427,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
...,...,...,...,...,...,...,...,...,...,...,...,...
153,2016,Rwanda,Sub-Saharan Africa,154,3.465,0.22208,0.77370,0.42864,0.59201,0.55191,0.22628,0.67042
154,2016,Benin,Sub-Saharan Africa,155,3.340,0.28665,0.35386,0.31910,0.48450,0.08010,0.18260,1.63328
155,2016,Syria,Middle East and Northern Africa,156,3.006,0.66320,0.47489,0.72193,0.15684,0.18906,0.47179,0.32858
156,2016,Burundi,Sub-Saharan Africa,157,2.905,0.01530,0.41587,0.22396,0.11850,0.10062,0.19727,1.83302


In [9]:
## 2017 Data Cleaning
# Import 2017 World Happiness Data
data_2017 = pd.read_csv('happines')

# Add column indicating the year
data_2017['Year'] = '2017'

# Rename data fields to match prior years
data_2017_rename = data_2017.rename(columns={'Happiness.Rank': 'Happiness Rank',
                                             'Happiness.Score': 'Happiness Score',
                                             'Economy..GDP.per.Capita.': 'Economy (GDP per Capita)',
                                             'Health..Life.Expectancy.': 'Health (Life Expectancy)',
                                             'Trust..Government.Corruption.': 'Trust (Government Corruption)',
                                             'Dystopia.Residual': 'Dystopia Residual'})                           

# Drop non-predictive variables
data_2017_drop = data_2017_rename.drop(columns=['Standard Error'])

# Rearrange the data fields
column_order = ['Year','Country','Happiness Rank','Happiness Score','Economy (GDP per Capita)','Family',
                'Health (Life Expectancy)','Freedom','Trust (Government Corruption)','Generosity','Dystopia Residual']
data_2017_clean = data_2017_drop.reindex(columns = column_order)

data_2017_clean

Unnamed: 0,Year,Country,Happiness Rank,Happiness Score,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,2017,Switzerland,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,2017,Iceland,2,7.561,1.30232,1.40223,0.94784,0.62877,0.14145,0.43630,2.70201
2,2017,Denmark,3,7.527,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,2017,Norway,4,7.522,1.45900,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,2017,Canada,5,7.427,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
...,...,...,...,...,...,...,...,...,...,...,...
153,2017,Rwanda,154,3.465,0.22208,0.77370,0.42864,0.59201,0.55191,0.22628,0.67042
154,2017,Benin,155,3.340,0.28665,0.35386,0.31910,0.48450,0.08010,0.18260,1.63328
155,2017,Syria,156,3.006,0.66320,0.47489,0.72193,0.15684,0.18906,0.47179,0.32858
156,2017,Burundi,157,2.905,0.01530,0.41587,0.22396,0.11850,0.10062,0.19727,1.83302


In [10]:
## 2018 Data Cleaning
# Import 2018 World Happiness Data
data_2018 = pd.read_csv('happines')

# Add column indicating the year
data_2018['Year'] = '2018'

# Rename data fields to match prior years
data_2018_rename = data_2018.rename(columns={'Overall rank': 'Happiness Rank',
                                             'Country or region': 'Country',
                                             'Score': 'Happiness Score',
                                             'GDP per capita': 'Economy (GDP per Capita)',
                                             'Social support': 'Family',
                                             'Healthy life expectancy': 'Health (Life Expectancy)',
                                             'Freedom to make life choices': 'Freedom',
                                             'Perceptions of corruption': 'Trust (Government Corruption)'})
                            
# Rearrange the data fields
column_order = ['Year','Country','Happiness Rank','Happiness Score','Economy (GDP per Capita)','Family',
                'Health (Life Expectancy)','Freedom','Trust (Government Corruption)','Generosity','Dystopia Residual']
data_2018_clean = data_2018_rename.reindex(columns = column_order)

data_2018_clean

Unnamed: 0,Year,Country,Happiness Rank,Happiness Score,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,2018,Switzerland,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,2018,Iceland,2,7.561,1.30232,1.40223,0.94784,0.62877,0.14145,0.43630,2.70201
2,2018,Denmark,3,7.527,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,2018,Norway,4,7.522,1.45900,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,2018,Canada,5,7.427,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
...,...,...,...,...,...,...,...,...,...,...,...
153,2018,Rwanda,154,3.465,0.22208,0.77370,0.42864,0.59201,0.55191,0.22628,0.67042
154,2018,Benin,155,3.340,0.28665,0.35386,0.31910,0.48450,0.08010,0.18260,1.63328
155,2018,Syria,156,3.006,0.66320,0.47489,0.72193,0.15684,0.18906,0.47179,0.32858
156,2018,Burundi,157,2.905,0.01530,0.41587,0.22396,0.11850,0.10062,0.19727,1.83302


In [12]:
## 2019 Data Cleaning
# Import 2019 World Happiness Data
data_2019 = pd.read_csv('happines')

# Add column indicating the year
data_2019['Year'] = '2019'

# Rename data fields to match prior years
data_2019_rename = data_2019.rename(columns={'Overall rank': 'Happiness Rank',
                                             'Country or region': 'Country',
                                             'Score': 'Happiness Score',
                                             'GDP per capita': 'Economy (GDP per Capita)',
                                             'Social support': 'Family',
                                             'Healthy life expectancy': 'Health (Life Expectancy)',
                                             'Freedom to make life choices': 'Freedom',
                                             'Perceptions of corruption': 'Trust (Government Corruption)'})
                            
# Rearrange the data fields
column_order = ['Year','Country','Happiness Rank','Happiness Score','Economy (GDP per Capita)','Family',
                'Health (Life Expectancy)','Freedom','Trust (Government Corruption)','Generosity','Dystopia Residual']
data_2019_clean = data_2019_rename.reindex(columns = column_order)

data_2019_clean

Unnamed: 0,Year,Country,Happiness Rank,Happiness Score,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,2019,Switzerland,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,2019,Iceland,2,7.561,1.30232,1.40223,0.94784,0.62877,0.14145,0.43630,2.70201
2,2019,Denmark,3,7.527,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,2019,Norway,4,7.522,1.45900,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,2019,Canada,5,7.427,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
...,...,...,...,...,...,...,...,...,...,...,...
153,2019,Rwanda,154,3.465,0.22208,0.77370,0.42864,0.59201,0.55191,0.22628,0.67042
154,2019,Benin,155,3.340,0.28665,0.35386,0.31910,0.48450,0.08010,0.18260,1.63328
155,2019,Syria,156,3.006,0.66320,0.47489,0.72193,0.15684,0.18906,0.47179,0.32858
156,2019,Burundi,157,2.905,0.01530,0.41587,0.22396,0.11850,0.10062,0.19727,1.83302


In [13]:
# Aggregate cleaned data sets from each year into one file
data_list = [data_2015_clean, data_2016_clean, data_2017_clean, data_2018_clean, data_2019_clean]
multi_year_data = pd.concat(data_list)
multi_year_data

Unnamed: 0,Year,Country,Region,Happiness Rank,Happiness Score,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,2015,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,2015,Iceland,Western Europe,2,7.561,1.30232,1.40223,0.94784,0.62877,0.14145,0.43630,2.70201
2,2015,Denmark,Western Europe,3,7.527,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,2015,Norway,Western Europe,4,7.522,1.45900,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,2015,Canada,North America,5,7.427,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
...,...,...,...,...,...,...,...,...,...,...,...,...
153,2019,Rwanda,,154,3.465,0.22208,0.77370,0.42864,0.59201,0.55191,0.22628,0.67042
154,2019,Benin,,155,3.340,0.28665,0.35386,0.31910,0.48450,0.08010,0.18260,1.63328
155,2019,Syria,,156,3.006,0.66320,0.47489,0.72193,0.15684,0.18906,0.47179,0.32858
156,2019,Burundi,,157,2.905,0.01530,0.41587,0.22396,0.11850,0.10062,0.19727,1.83302


In [14]:
# Group the data sets by country
total_country_rank = multi_year_data.groupby('Country')['Happiness Rank'].sum()

# Calculate the average happiness rank for each country over the 5 year period
avg_country_rank = total_country_rank/5
data_rank = pd.merge(left=multi_year_data, right=avg_country_rank, on='Country')
data_rank

Unnamed: 0,Year,Country,Region,Happiness Rank_x,Happiness Score,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual,Happiness Rank_y
0,2015,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
1,2016,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
2,2017,Switzerland,,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
3,2018,Switzerland,,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
4,2019,Switzerland,,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
785,2015,Togo,Sub-Saharan Africa,158,2.839,0.20868,0.13995,0.28443,0.36453,0.10731,0.16681,1.56726,158.0
786,2016,Togo,Sub-Saharan Africa,158,2.839,0.20868,0.13995,0.28443,0.36453,0.10731,0.16681,1.56726,158.0
787,2017,Togo,,158,2.839,0.20868,0.13995,0.28443,0.36453,0.10731,0.16681,1.56726,158.0
788,2018,Togo,,158,2.839,0.20868,0.13995,0.28443,0.36453,0.10731,0.16681,1.56726,158.0


In [15]:
# Rename columns and fill nulls
data_rank_all_rename = data_rank.rename(columns={'Happiness Rank_x': 'Happiness Rank',
                                                 'Happiness Rank_y': 'Avg Happiness Rank'})
data_rank_all = data_rank_all_rename.ffill(axis=0)

data_rank_all

Unnamed: 0,Year,Country,Region,Happiness Rank,Happiness Score,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual,Avg Happiness Rank
0,2015,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
1,2016,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
2,2017,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
3,2018,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
4,2019,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
785,2015,Togo,Sub-Saharan Africa,158,2.839,0.20868,0.13995,0.28443,0.36453,0.10731,0.16681,1.56726,158.0
786,2016,Togo,Sub-Saharan Africa,158,2.839,0.20868,0.13995,0.28443,0.36453,0.10731,0.16681,1.56726,158.0
787,2017,Togo,Sub-Saharan Africa,158,2.839,0.20868,0.13995,0.28443,0.36453,0.10731,0.16681,1.56726,158.0
788,2018,Togo,Sub-Saharan Africa,158,2.839,0.20868,0.13995,0.28443,0.36453,0.10731,0.16681,1.56726,158.0


In [16]:
# Export data for all years
data_rank_all.to_csv('happines', index=False)

In [17]:
# Sort the data set and drop values below top 20
data_rank_all_sort = data_rank_all.sort_values('Avg Happiness Rank')
data_rank_top = data_rank_all_sort.drop(range(100,len(data_rank_all_sort)))
data_rank_top

Unnamed: 0,Year,Country,Region,Happiness Rank,Happiness Score,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual,Avg Happiness Rank
0,2015,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
1,2016,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
2,2017,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
3,2018,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
4,2019,Switzerland,Western Europe,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,2019,United Arab Emirates,Middle East and Northern Africa,20,6.901,1.42727,1.12575,0.80925,0.64157,0.38583,0.26428,2.24743,20.0
97,2017,United Arab Emirates,Middle East and Northern Africa,20,6.901,1.42727,1.12575,0.80925,0.64157,0.38583,0.26428,2.24743,20.0
98,2018,United Arab Emirates,Middle East and Northern Africa,20,6.901,1.42727,1.12575,0.80925,0.64157,0.38583,0.26428,2.24743,20.0
96,2016,United Arab Emirates,Middle East and Northern Africa,20,6.901,1.42727,1.12575,0.80925,0.64157,0.38583,0.26428,2.24743,20.0


In [18]:
data_rank_top.to_csv('happines', index=False)