# Contents
## 1. Importing Libraries
## 2. Data Consistency
## 3. Data Wrangling
## 4. Basic Stats of the Data
## 5. Exporting Data

# 1. Importing Libraries

In [49]:
#importing libraries
import pandas as pd
import numpy as np
import os

In [50]:
path= r'C:\Users\spada\OneDrive\Data Analytics\World Happiness Report'

In [51]:
df17 = pd.read_csv(os.path.join(path, '02 Data', 'Original Data', '2017.csv'), index_col = False)

In [52]:
df17

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual
0,Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027
1,Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.355280,0.400770,2.313707
2,Iceland,3,7.504,7.622030,7.385970,1.480633,1.610574,0.833552,0.627163,0.475540,0.153527,2.322715
3,Switzerland,4,7.494,7.561772,7.426227,1.564980,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716
4,Finland,5,7.469,7.527542,7.410458,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,2.430182
...,...,...,...,...,...,...,...,...,...,...,...,...
150,Rwanda,151,3.471,3.543030,3.398970,0.368746,0.945707,0.326425,0.581844,0.252756,0.455220,0.540061
151,Syria,152,3.462,3.663669,3.260331,0.777153,0.396103,0.500533,0.081539,0.493664,0.151347,1.061574
152,Tanzania,153,3.349,3.461430,3.236570,0.511136,1.041990,0.364509,0.390018,0.354256,0.066035,0.621130
153,Burundi,154,2.905,3.074690,2.735310,0.091623,0.629794,0.151611,0.059901,0.204435,0.084148,1.683024


In [53]:
#understanding more about the data
df17.info

<bound method DataFrame.info of                       Country  Happiness.Rank  Happiness.Score  Whisker.high  \
0                      Norway               1            7.537      7.594445   
1                     Denmark               2            7.522      7.581728   
2                     Iceland               3            7.504      7.622030   
3                 Switzerland               4            7.494      7.561772   
4                     Finland               5            7.469      7.527542   
..                        ...             ...              ...           ...   
150                    Rwanda             151            3.471      3.543030   
151                     Syria             152            3.462      3.663669   
152                  Tanzania             153            3.349      3.461430   
153                   Burundi             154            2.905      3.074690   
154  Central African Republic             155            2.693      2.864884   

     Wh

# 2. Data Consistency

In [54]:
#checking for missing values
df17.isnull().sum()

Country                          0
Happiness.Rank                   0
Happiness.Score                  0
Whisker.high                     0
Whisker.low                      0
Economy..GDP.per.Capita.         0
Family                           0
Health..Life.Expectancy.         0
Freedom                          0
Generosity                       0
Trust..Government.Corruption.    0
Dystopia.Residual                0
dtype: int64

#### No missing values

In [55]:
#Checking for duplicates
df17_dups=df17[df17.duplicated()]

In [56]:
df17_dups

Unnamed: 0,Country,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual


#### No duplicates

In [57]:
#seeing column names
df17.columns

Index(['Country', 'Happiness.Rank', 'Happiness.Score', 'Whisker.high',
       'Whisker.low', 'Economy..GDP.per.Capita.', 'Family',
       'Health..Life.Expectancy.', 'Freedom', 'Generosity',
       'Trust..Government.Corruption.', 'Dystopia.Residual'],
      dtype='object')

# 3. Data Wrangling

In [58]:
#renaming 'Happiness.Rank' column
df17.rename(columns={'Happiness.Rank':'Happiness Rank'}, inplace=True)

In [59]:
#renaming 'Happiness.Score' column
df17.rename(columns={'Happiness.Score':'Happiness Score'}, inplace=True)

In [60]:
#renaming 'Economy..GDP.per.Capita.' column
df17.rename(columns={'Economy..GDP.per.Capita.':'GDP per capita'}, inplace=True)

In [61]:
#renaming 'Family' column
df17.rename(columns={'Family':'Social Support'}, inplace=True)

In [62]:
#renaming 'Health..Life.Expectancy.' column
df17.rename(columns={'Health..Life.Expectancy.':'Health (Life Expectancy)'}, inplace=True)

In [63]:
#renaming 'Country' column
df17.rename(columns={'Country':'Country or Region'}, inplace=True)

In [64]:
#renaming 'Trust..Government.Corruption' column
df17.rename(columns={'Trust..Government.Corruption.':'Trust (Government Corruption)'}, inplace=True)

In [65]:
#renaming 'Dystopia.Residual' column
df17.rename(columns={'Dystopia.Residual':'Dystopia Residual'}, inplace=True)

In [66]:
#dropping Whisker.high column
df17=df17.drop(columns=['Whisker.high'])

In [67]:
#dropping Whisker.low column
df17=df17.drop(columns=['Whisker.low'])

In [68]:
#dropping 'Dystopia Residual' column
df17=df17.drop(columns=['Dystopia Residual'])

In [69]:
# Creating a 'Year' column
df17.loc[:, ["Year"]] = [2017]

In [70]:
df17

Unnamed: 0,Country or Region,Happiness Rank,Happiness Score,GDP per capita,Social Support,Health (Life Expectancy),Freedom,Generosity,Trust (Government Corruption),Year
0,Norway,1,7.537,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2017
1,Denmark,2,7.522,1.482383,1.551122,0.792566,0.626007,0.355280,0.400770,2017
2,Iceland,3,7.504,1.480633,1.610574,0.833552,0.627163,0.475540,0.153527,2017
3,Switzerland,4,7.494,1.564980,1.516912,0.858131,0.620071,0.290549,0.367007,2017
4,Finland,5,7.469,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,2017
...,...,...,...,...,...,...,...,...,...,...
150,Rwanda,151,3.471,0.368746,0.945707,0.326425,0.581844,0.252756,0.455220,2017
151,Syria,152,3.462,0.777153,0.396103,0.500533,0.081539,0.493664,0.151347,2017
152,Tanzania,153,3.349,0.511136,1.041990,0.364509,0.390018,0.354256,0.066035,2017
153,Burundi,154,2.905,0.091623,0.629794,0.151611,0.059901,0.204435,0.084148,2017


# 4. Basic Stats of the Data

In [71]:
df17.describe()

Unnamed: 0,Happiness Rank,Happiness Score,GDP per capita,Social Support,Health (Life Expectancy),Freedom,Generosity,Trust (Government Corruption),Year
count,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0
mean,78.0,5.354019,0.984718,1.188898,0.551341,0.408786,0.246883,0.12312,2017.0
std,44.888751,1.13123,0.420793,0.287263,0.237073,0.149997,0.13478,0.101661,0.0
min,1.0,2.693,0.0,0.0,0.0,0.0,0.0,0.0,2017.0
25%,39.5,4.5055,0.663371,1.042635,0.369866,0.303677,0.154106,0.057271,2017.0
50%,78.0,5.279,1.064578,1.253918,0.606042,0.437454,0.231538,0.089848,2017.0
75%,116.5,6.1015,1.318027,1.414316,0.723008,0.516561,0.323762,0.153296,2017.0
max,155.0,7.537,1.870766,1.610574,0.949492,0.658249,0.838075,0.464308,2017.0


# 5. Exporting 2017 Data

In [73]:
# Export 2017 Data dataframe
df17.to_pickle(os.path.join(path, '02 Data','Prepared Data', 'df17_clean.pkl'))