In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#imports 
import pandas as pd #dataframes 
import numpy as np #linear algebra / arrays
import seaborn as sns #visualization 
from matplotlib import pyplot as plt #creating data plots
import statsmodels.api as sm #statistical analysis
from statsmodels.formula.api import ols

In [None]:
#import data 
df = pd.read_csv('../input/world-happiness/2019.csv')

#view head of data
df.head()

In [None]:
#Rename columns 
df=df.rename(columns={'Country or region': 'Country','Healthy life expectancy':'Life expectancy'})

df.head()

In [None]:
#Look at shape of data and check for missing values 

#shape of data 
print(df.shape)

df.isnull().sum()

> There are 156 rows and 9 columns. It appears that there is no missing data, so let's start looking at it.

In [None]:
#ten happieset countries
df.head(10)

In [None]:
#ten least happiest countries 
df.tail(10)

In [None]:
#What makes them happy or sad? 

#put the head and tail into seperate dataframes 
df_head = df.head(10)
df_tail = df.tail(10)

#concatenate the dataframes so we can look at them with visualization tools 
concat_df = pd.concat([df_head, df_tail])

print(concat_df)


In [None]:
#Display GDP of top 10 and bottom 10 countries 

plt.figure(figsize=(15,8)) #Adjust size of plot so it can be read. 
ax = sns.barplot(x="Overall rank", y="GDP per capita", data=concat_df,
                 palette="Blues_d")

The GDP of the happiest countries is consistantly high. The GDP of the least happy countries is less than half of the happier countries, with the exception of #148 - Botswana

In [None]:
#Looking at social support

plt.figure(figsize=(15,8)) #Adjust size of plot so it can be read. 
ax = sns.barplot(x="Overall rank", y="Social support", data=concat_df,
                 palette = "viridis")

We see a similar trend of countries with lower scores being about half of their higher scoring counterparts, with the exceptions of: 
- #148, Botswana 
- #151, Yemen
- #153, Tanzania

These values are less drastic than GDP

In [None]:
#Looking at life expectancy 


plt.figure(figsize=(15,8)) #Adjust size of plot so it can be read. 
ax = sns.barplot(x="Overall rank", y="Life expectancy", data=concat_df,
                 palette = "rocket_r")

These are fairly consistently half the values of highest ranking countries, with the exception of #155, Central African Republic which has a much lower score than all of the others.

In [None]:
#Looking at Freedom to make life choices 


plt.figure(figsize=(15,8)) #Adjust size of plot so it can be read. 
ax = sns.barplot(x="Overall rank", y="Freedom to make life choices", data=concat_df,
                 palette = "mako")

Freedom to make life choices is interesting because wee don't see the same trends as the previous parameters. The top 10 are all high scoring, but the bottom ten are either fairly high scoring (some even approaching the samee scores as the top 10 countries) or they're very low scoring, with no real middle ground.

In [None]:
#Looking at Perceptions of corruption


plt.figure(figsize=(15,8)) #Adjust size of plot so it can be read. 
ax = sns.barplot(x="Overall rank", y="Perceptions of corruption", data=concat_df,
                 palette = "flare")

We again see some outliers in the data that we would not have expected based on the countris overall rank. 
Country #4, Iceland scored shockingly low for it being consistently high ranking in the other parameters, while Country #151, Yemen, scored shockingly high. 

In [None]:
#Looking at Generosity


plt.figure(figsize=(15,8)) #Adjust size of plot so it can be read. 
ax = sns.barplot(x="Overall rank", y="Generosity", data=concat_df,
                 palette = "crest")

This particular parameter might have the most average score across the top and bottom, with only two big outliers. 
Country #147 - Haiti, ranking higheset of all for generosity, and country #148 - Botswana ranking lowest of all for generosity. 
So perhaps generosity is not an indicator of happiness.

Based on the above graphs, I think perceived corruption, life expectancy, social support, and GDP may have the biggest impacts on overall happiness, as those were the charts that showed the most change between the top and bottom 10. I will test this using a correlation analysis.

In [None]:
#Looking for correlations

corr = df.corr(method='pearson')
print(corr)

Let's drop overall rank.

In [None]:
df_revised = df.drop(columns=['Overall rank'])
corr = df_revised.corr(method='pearson')
print(corr)

In [None]:
#heat map of correlation

plt.figure(figsize=(15,8)) #Adjust size of plot so it can be read.
ax = sns.heatmap(df_revised.corr(), annot = True, vmin=-1, vmax=1, center= 0, cmap = 'PuBuGn')


From the chart we can gather that the areas that are closest to the dark green shade are most impactful, whereas the categories approaching the light purple are least impactful. This means that GDP, social support, and life expectancy have the greatest impact, whereas generosity and perceptions of corruption do not make an impact, as I predicted above. The middle ground of Freedom to make life choices makes sense based on the data above, where people either felt like they had it or didn't, with no real middle ground. 