In [11]:
import numpy as np
import pandas as pd
import matplotlib 
import matplotlib.pyplot as plt
%matplotlib inline
from functools import reduce

In [12]:
# import data
h_2015 = pd.read_csv('../data/happiness_score_2015.csv')
h_2016 = pd.read_csv('../data/happiness_score_2016.csv')
h_2017 = pd.read_csv('../data/happiness_score_2017.csv')
h_2018 = pd.read_csv('../data/happiness_score_2018.csv')
h_2019 = pd.read_csv('../data/happiness_score_2019.csv')
h_2020 = pd.read_csv('../data/happiness_score_2020.csv')

In [13]:
# keep relevant columns in each dataset
h_2015 = h_2015[['Country', 'Happiness Score']]
h_2016 = h_2016[['Country', 'Happiness Score']]
h_2017 = h_2017[['Country', 'Happiness.Score']]
h_2018 = h_2018[['Country or region', 'Score']]
h_2019 = h_2019[['Country or region', 'Score']]
h_2020 = h_2020[['Country name', 'Ladder score']]

In [14]:
# rename the column names to be consistent in each dataset and to distinguish the year
h_2015 = h_2015.rename({'Happiness Score': '2015'}, axis=1)
h_2016 = h_2016.rename({'Happiness Score': '2016'}, axis=1)
h_2017 = h_2017.rename({'Happiness.Score': '2017'}, axis=1)
h_2018 = h_2018.rename({'Country or region': 'Country', 'Score': '2018'}, axis=1)
h_2019 = h_2019.rename({'Country or region': 'Country', 'Score': '2019'}, axis=1)
h_2020 = h_2020.rename({'Country name': 'Country', 'Ladder score': '2020'}, axis=1)

In [15]:
# merge all datasets together
datasets = [h_2015, h_2016, h_2017, h_2018, h_2019, h_2020]
happiness_2015_2020 = reduce(lambda left,right: pd.merge(left,right,on=['Country'],
                                            how='outer'), datasets)

In [16]:
happiness_2015_2020

Unnamed: 0,Country,2015,2016,2017,2018,2019,2020
0,Switzerland,7.587,7.509,7.494,7.487,7.480,7.5599
1,Iceland,7.561,7.501,7.504,7.495,7.494,7.5045
2,Denmark,7.527,7.526,7.522,7.555,7.600,7.6456
3,Norway,7.522,7.498,7.537,7.594,7.554,7.4880
4,Canada,7.427,7.404,7.316,7.328,7.278,7.2321
...,...,...,...,...,...,...,...
167,Northern Cyprus,,,,5.835,5.718,
168,North Macedonia,,,,,5.274,
169,Gambia,,,,,4.516,4.7506
170,Hong Kong S.A.R. of China,,,,,,5.5104


In [20]:
# set Country to be index
happiness_2015_2020 = happiness_2015_2020.set_index('Country') 
happiness_2015_2020

Unnamed: 0_level_0,2015,2016,2017,2018,2019,2020
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Switzerland,7.587,7.509,7.494,7.487,7.480,7.5599
Iceland,7.561,7.501,7.504,7.495,7.494,7.5045
Denmark,7.527,7.526,7.522,7.555,7.600,7.6456
Norway,7.522,7.498,7.537,7.594,7.554,7.4880
Canada,7.427,7.404,7.316,7.328,7.278,7.2321
...,...,...,...,...,...,...
Northern Cyprus,,,,5.835,5.718,
North Macedonia,,,,,5.274,
Gambia,,,,,4.516,4.7506
Hong Kong S.A.R. of China,,,,,,5.5104


In [21]:
# check for missing values
happiness_2015_2020.isna().sum()

2015    14
2016    15
2017    17
2018    16
2019    16
2020    19
dtype: int64

In [24]:
# drop all missing values
happiness_2015_2020 = happiness_2015_2020.dropna()

In [25]:
# check for successful dropping of missing values
happiness_2015_2020.isna().sum()

2015    0
2016    0
2017    0
2018    0
2019    0
2020    0
dtype: int64

In [29]:
happiness_2015_2020

Unnamed: 0_level_0,2015,2016,2017,2018,2019,2020
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Switzerland,7.587,7.509,7.494,7.487,7.480,7.5599
Iceland,7.561,7.501,7.504,7.495,7.494,7.5045
Denmark,7.527,7.526,7.522,7.555,7.600,7.6456
Norway,7.522,7.498,7.537,7.594,7.554,7.4880
Canada,7.427,7.404,7.316,7.328,7.278,7.2321
...,...,...,...,...,...,...
Afghanistan,3.575,3.360,3.794,3.632,3.203,2.5669
Rwanda,3.465,3.515,3.471,3.408,3.334,3.3123
Benin,3.340,3.484,3.657,4.141,4.883,5.2160
Burundi,2.905,2.905,2.905,2.905,3.775,3.7753


In [30]:
# save new dataframe
happiness_2015_2020.to_csv('../data/happiness_2015_2020.csv')

In [31]:
# load new dataframe
happiness = pd.read_csv('../data/happiness_2015_2020.csv')

In [32]:
happiness

Unnamed: 0,Country,2015,2016,2017,2018,2019,2020
0,Switzerland,7.587,7.509,7.494,7.487,7.480,7.5599
1,Iceland,7.561,7.501,7.504,7.495,7.494,7.5045
2,Denmark,7.527,7.526,7.522,7.555,7.600,7.6456
3,Norway,7.522,7.498,7.537,7.594,7.554,7.4880
4,Canada,7.427,7.404,7.316,7.328,7.278,7.2321
...,...,...,...,...,...,...,...
133,Afghanistan,3.575,3.360,3.794,3.632,3.203,2.5669
134,Rwanda,3.465,3.515,3.471,3.408,3.334,3.3123
135,Benin,3.340,3.484,3.657,4.141,4.883,5.2160
136,Burundi,2.905,2.905,2.905,2.905,3.775,3.7753
