# Contents
## 1. Importing Libraries
## 2. Data Consistency
## 3. Data Wrangling
## 4. Basic Stats of the Data
## 5. Exporting Data

# 1.Importing Libraries

In [25]:
#importing libraries
import pandas as pd
import numpy as np
import os

In [26]:
path= r'C:\Users\spada\OneDrive\Data Analytics\World Happiness Report'

In [27]:
df16 = pd.read_csv(os.path.join(path, '02 Data', 'Original Data', '2016.csv'), index_col = False)

In [28]:
df16

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Denmark,Western Europe,1,7.526,7.460,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939
1,Switzerland,Western Europe,2,7.509,7.428,7.590,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463
2,Iceland,Western Europe,3,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137
3,Norway,Western Europe,4,7.498,7.421,7.575,1.57744,1.12690,0.79579,0.59609,0.35776,0.37895,2.66465
4,Finland,Western Europe,5,7.413,7.351,7.475,1.40598,1.13464,0.81091,0.57104,0.41004,0.25492,2.82596
...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,Benin,Sub-Saharan Africa,153,3.484,3.404,3.564,0.39499,0.10419,0.21028,0.39747,0.06681,0.20180,2.10812
153,Afghanistan,Southern Asia,154,3.360,3.288,3.432,0.38227,0.11037,0.17344,0.16430,0.07112,0.31268,2.14558
154,Togo,Sub-Saharan Africa,155,3.303,3.192,3.414,0.28123,0.00000,0.24811,0.34678,0.11587,0.17517,2.13540
155,Syria,Middle East and Northern Africa,156,3.069,2.936,3.202,0.74719,0.14866,0.62994,0.06912,0.17233,0.48397,0.81789


In [29]:
#understanding more about the data
df16.info

<bound method DataFrame.info of          Country                           Region  Happiness Rank  \
0        Denmark                   Western Europe               1   
1    Switzerland                   Western Europe               2   
2        Iceland                   Western Europe               3   
3         Norway                   Western Europe               4   
4        Finland                   Western Europe               5   
..           ...                              ...             ...   
152        Benin               Sub-Saharan Africa             153   
153  Afghanistan                    Southern Asia             154   
154         Togo               Sub-Saharan Africa             155   
155        Syria  Middle East and Northern Africa             156   
156      Burundi               Sub-Saharan Africa             157   

     Happiness Score  Lower Confidence Interval  Upper Confidence Interval  \
0              7.526                      7.460              

# 2. Data Consistency

In [30]:
#checking for missing values
df16.isnull().sum()

Country                          0
Region                           0
Happiness Rank                   0
Happiness Score                  0
Lower Confidence Interval        0
Upper Confidence Interval        0
Economy (GDP per Capita)         0
Family                           0
Health (Life Expectancy)         0
Freedom                          0
Trust (Government Corruption)    0
Generosity                       0
Dystopia Residual                0
dtype: int64

#### No missing values

In [31]:
#Checking for duplicates
df16_dups=df16[df16.duplicated()]

In [32]:
df16_dups

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual


#### No duplicates

In [33]:
#seeing column names
df16.columns

Index(['Country', 'Region', 'Happiness Rank', 'Happiness Score',
       'Lower Confidence Interval', 'Upper Confidence Interval',
       'Economy (GDP per Capita)', 'Family', 'Health (Life Expectancy)',
       'Freedom', 'Trust (Government Corruption)', 'Generosity',
       'Dystopia Residual'],
      dtype='object')

# 3. Data Wrangling

In [34]:
#dropping 'region' column
df16=df16.drop(columns=['Region'])

In [35]:
#dropping 'Lower Confidence Interval' column
df16=df16.drop(columns=['Lower Confidence Interval'])

In [37]:
#dropping 'Dystopia Residual' column
df16=df16.drop(columns=['Dystopia Residual'])

In [38]:
#dropping 'Upper Confidence Interval' column
df16=df16.drop(columns=['Upper Confidence Interval'])

In [39]:
#renaming 'Country' column
df16.rename(columns={'Country':'Country or Region'}, inplace=True)

In [40]:
#renaming 'Family' column
df16.rename(columns={'Family':'Social Support'}, inplace=True)

In [44]:
df16

Unnamed: 0,Country or Region,Happiness Rank,Happiness Score,GDP per capita,Social Support,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Year
0,Denmark,1,7.526,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2016
1,Switzerland,2,7.509,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2016
2,Iceland,3,7.501,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2016
3,Norway,4,7.498,1.57744,1.12690,0.79579,0.59609,0.35776,0.37895,2016
4,Finland,5,7.413,1.40598,1.13464,0.81091,0.57104,0.41004,0.25492,2016
...,...,...,...,...,...,...,...,...,...,...
152,Benin,153,3.484,0.39499,0.10419,0.21028,0.39747,0.06681,0.20180,2016
153,Afghanistan,154,3.360,0.38227,0.11037,0.17344,0.16430,0.07112,0.31268,2016
154,Togo,155,3.303,0.28123,0.00000,0.24811,0.34678,0.11587,0.17517,2016
155,Syria,156,3.069,0.74719,0.14866,0.62994,0.06912,0.17233,0.48397,2016


In [45]:
#renaming 'Economy (GDP per Capita)' column
df16.rename(columns={'Economy (GDP per Capita)':'GDP per capita'}, inplace=True)

In [46]:
#creating a 'Year' column
df16.loc[:, ["Year"]] = [2016]

In [47]:
df16

Unnamed: 0,Country or Region,Happiness Rank,Happiness Score,GDP per capita,Social Support,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Year
0,Denmark,1,7.526,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2016
1,Switzerland,2,7.509,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2016
2,Iceland,3,7.501,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2016
3,Norway,4,7.498,1.57744,1.12690,0.79579,0.59609,0.35776,0.37895,2016
4,Finland,5,7.413,1.40598,1.13464,0.81091,0.57104,0.41004,0.25492,2016
...,...,...,...,...,...,...,...,...,...,...
152,Benin,153,3.484,0.39499,0.10419,0.21028,0.39747,0.06681,0.20180,2016
153,Afghanistan,154,3.360,0.38227,0.11037,0.17344,0.16430,0.07112,0.31268,2016
154,Togo,155,3.303,0.28123,0.00000,0.24811,0.34678,0.11587,0.17517,2016
155,Syria,156,3.069,0.74719,0.14866,0.62994,0.06912,0.17233,0.48397,2016


# 4.Basic Stats of the data

In [48]:
df16.describe()

Unnamed: 0,Happiness Rank,Happiness Score,GDP per capita,Social Support,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Year
count,157.0,157.0,157.0,157.0,157.0,157.0,157.0,157.0,157.0
mean,78.980892,5.382185,0.95388,0.793621,0.557619,0.370994,0.137624,0.242635,2016.0
std,45.46603,1.141674,0.412595,0.266706,0.229349,0.145507,0.111038,0.133756,0.0
min,1.0,2.905,0.0,0.0,0.0,0.0,0.0,0.0,2016.0
25%,40.0,4.404,0.67024,0.64184,0.38291,0.25748,0.06126,0.15457,2016.0
50%,79.0,5.314,1.0278,0.84142,0.59659,0.39747,0.10547,0.22245,2016.0
75%,118.0,6.269,1.27964,1.02152,0.72993,0.48453,0.17554,0.31185,2016.0
max,157.0,7.526,1.82427,1.18326,0.95277,0.60848,0.50521,0.81971,2016.0


# 5. Exporting Data

In [49]:
# Export 2016 Data dataframe
df16.to_pickle(os.path.join(path, '02 Data','Prepared Data', 'df16_clean.pkl'))