# Contents
## 1. Importing Libraries
## 2. Data Consistency
## 3. Data Wrangling
## 4. Basic Stats of the Data
## 5. Exporting Data

# 1. Importing Libraries

In [36]:
#importing libraries
import pandas as pd
import numpy as np
import os

In [37]:
path= r'C:\Users\spada\OneDrive\Data Analytics\World Happiness Report'

In [38]:
df15 = pd.read_csv(os.path.join(path, '02 Data', 'Original Data', '2015.csv'), index_col = False)

In [39]:
df15

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.43630,2.70201
2,Denmark,Western Europe,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,Norway,Western Europe,4,7.522,0.03880,1.45900,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,Canada,North America,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
...,...,...,...,...,...,...,...,...,...,...,...,...
153,Rwanda,Sub-Saharan Africa,154,3.465,0.03464,0.22208,0.77370,0.42864,0.59201,0.55191,0.22628,0.67042
154,Benin,Sub-Saharan Africa,155,3.340,0.03656,0.28665,0.35386,0.31910,0.48450,0.08010,0.18260,1.63328
155,Syria,Middle East and Northern Africa,156,3.006,0.05015,0.66320,0.47489,0.72193,0.15684,0.18906,0.47179,0.32858
156,Burundi,Sub-Saharan Africa,157,2.905,0.08658,0.01530,0.41587,0.22396,0.11850,0.10062,0.19727,1.83302


In [40]:
df15.shape

(158, 12)

In [41]:
#understanding more about the data
df15.info

<bound method DataFrame.info of          Country                           Region  Happiness Rank  \
0    Switzerland                   Western Europe               1   
1        Iceland                   Western Europe               2   
2        Denmark                   Western Europe               3   
3         Norway                   Western Europe               4   
4         Canada                    North America               5   
..           ...                              ...             ...   
153       Rwanda               Sub-Saharan Africa             154   
154        Benin               Sub-Saharan Africa             155   
155        Syria  Middle East and Northern Africa             156   
156      Burundi               Sub-Saharan Africa             157   
157         Togo               Sub-Saharan Africa             158   

     Happiness Score  Standard Error  Economy (GDP per Capita)   Family  \
0              7.587         0.03411                   1.39651  

# 2. Data Consistency

In [42]:
#checking for missing values
df15.isnull().sum()

Country                          0
Region                           0
Happiness Rank                   0
Happiness Score                  0
Standard Error                   0
Economy (GDP per Capita)         0
Family                           0
Health (Life Expectancy)         0
Freedom                          0
Trust (Government Corruption)    0
Generosity                       0
Dystopia Residual                0
dtype: int64

#### No missing values

In [43]:
#Checking for duplicates
df15_dups=df15[df15.duplicated()]

In [44]:
df15_dups

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual


#### No duplicates

In [45]:
#seeing column names
df15.columns

Index(['Country', 'Region', 'Happiness Rank', 'Happiness Score',
       'Standard Error', 'Economy (GDP per Capita)', 'Family',
       'Health (Life Expectancy)', 'Freedom', 'Trust (Government Corruption)',
       'Generosity', 'Dystopia Residual'],
      dtype='object')

# 3. Data Wrangling

In [46]:
#dropping 'region' column
df15=df15.drop(columns=['Region'])

In [47]:
#dropping 'Standard Error' column
df15=df15.drop(columns=['Standard Error'])

In [51]:
#dropping 'Dystopia Residual' column
df15=df15.drop(columns=['Dystopia Residual'])

In [52]:
#renaming 'Family' column
df15.rename(columns={'Family':'Social Support'}, inplace=True)

In [53]:
#renaming 'Country' column
df15.rename(columns={'Country':'Country or Region'}, inplace=True)

In [54]:
#renaming 'Economy GDP per Capita' column
df15.rename(columns={'Economy (GDP per Capita)':'GDP per capita'}, inplace=True)

In [55]:
#creating a 'Year' column
df15.loc[:, ["Year"]] = [2015]

In [56]:
df15

Unnamed: 0,Country or Region,Happiness Rank,Happiness Score,GDP per capita,Social Support,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Year
0,Switzerland,1,7.587,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2015
1,Iceland,2,7.561,1.30232,1.40223,0.94784,0.62877,0.14145,0.43630,2015
2,Denmark,3,7.527,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2015
3,Norway,4,7.522,1.45900,1.33095,0.88521,0.66973,0.36503,0.34699,2015
4,Canada,5,7.427,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2015
...,...,...,...,...,...,...,...,...,...,...
153,Rwanda,154,3.465,0.22208,0.77370,0.42864,0.59201,0.55191,0.22628,2015
154,Benin,155,3.340,0.28665,0.35386,0.31910,0.48450,0.08010,0.18260,2015
155,Syria,156,3.006,0.66320,0.47489,0.72193,0.15684,0.18906,0.47179,2015
156,Burundi,157,2.905,0.01530,0.41587,0.22396,0.11850,0.10062,0.19727,2015


# 4. Basic Stats of the data

In [57]:
df15.describe()

Unnamed: 0,Happiness Rank,Happiness Score,GDP per capita,Social Support,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Year
count,158.0,158.0,158.0,158.0,158.0,158.0,158.0,158.0,158.0
mean,79.493671,5.375734,0.846137,0.991046,0.630259,0.428615,0.143422,0.237296,2015.0
std,45.754363,1.14501,0.403121,0.272369,0.247078,0.150693,0.120034,0.126685,0.0
min,1.0,2.839,0.0,0.0,0.0,0.0,0.0,0.0,2015.0
25%,40.25,4.526,0.545808,0.856823,0.439185,0.32833,0.061675,0.150553,2015.0
50%,79.5,5.2325,0.910245,1.02951,0.696705,0.435515,0.10722,0.21613,2015.0
75%,118.75,6.24375,1.158448,1.214405,0.811013,0.549092,0.180255,0.309883,2015.0
max,158.0,7.587,1.69042,1.40223,1.02525,0.66973,0.55191,0.79588,2015.0


# 5. Exporting Data

In [58]:
# Export 2015 Data dataframe
df15.to_pickle(os.path.join(path, '02 Data','Prepared Data', 'df15_clean.pkl'))