In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

%matplotlib inline
sns.set(style="whitegrid")
sns.set_color_codes("pastel")

In [None]:
happines_filepath = "../input/world-happiness/2019.csv"

happines_data = pd.read_csv(happines_filepath, parse_dates=True, encoding = "cp1252")

happines_data.head()

In [None]:
happines_data.info()

In [None]:
happines_data.shape
print(f"There are {happines_data.shape[0]} rows and {happines_data.shape[1]} columns in our data")

In [None]:
happines_data.describe()

In [None]:
happines_data.isnull().sum()

In [None]:
happines_data.duplicated().sum()

In [None]:
happines_data.loc[happines_data['Healthy life expectancy'] > 1]

In [None]:
happines_data.loc[happines_data['Country or region'] == 'Bulgaria']

In [None]:
happines_data.sort_values(by="Generosity", ascending=False).head(10)

In [None]:
happines_data[happines_data['Score'] > 7.5]

In [None]:
print(f"There are {len(happines_data[happines_data['Score'] > 7])} countries that have a happiness score above 7.0")

In [None]:
happines_data.loc[happines_data['Healthy life expectancy'] >= 0.7, 'Color'] = 'green'
happines_data.loc[(happines_data['Healthy life expectancy'] > 0.5) & (happines_data['Healthy life expectancy'] < 0.7), 'Color'] = 'blue'
happines_data.loc[happines_data['Healthy life expectancy'] <= 0.5, 'Color'] = 'red'

In [None]:
happines_data.head()

In [None]:
whr_color = happines_data.groupby('Color')

In [None]:
whr_color['Score'].describe().sort_values(by="mean",ascending=True).head(10)

In [None]:
plt.figure(figsize=(20,6))
sns.distplot(happines_data['Healthy life expectancy'], bins=20)

In [None]:
f, ax = plt.subplots(figsize=(20, 6))

ax.scatter(happines_data['Healthy life expectancy'], happines_data['Score'])

ax.set_title('WHR 2019')
ax.set_xlabel('Healthy life expectancy')
ax.set_ylabel('Score')

In [None]:
f, ax = plt.subplots(figsize=(20, 6))

for i in range(len(happines_data['Score'])):
    ax.scatter(happines_data['Perceptions of corruption'][i], happines_data['Score'][i], color=happines_data['Color'][i])

ax.set_title('WHR 2019')
ax.set_xlabel('Perceptions of corruption')
ax.set_ylabel('Score')

In [None]:
f, ax = plt.subplots(figsize=(20, 6))

ax.hist(happines_data['Healthy life expectancy'], bins=10, density=False, edgecolor='k', color='darkgreen', alpha=0.5)

ax.set_title('Healthy life expectancy')
ax.set_xlabel('Points')
ax.set_ylabel('Frequency')

In [None]:
f, ax = plt.subplots(figsize=(20, 6))
sns.scatterplot(x='Perceptions of corruption', y='Score', hue='Score', data=happines_data)

In [None]:
sns.pairplot(happines_data, hue="Color", palette="husl")

## Correlation

In [None]:
happines_data.corr(method="pearson", min_periods=20)["Score"].sort_values(ascending=False)

In [None]:
happines_data.corr(method="pearson", min_periods=20)["Score"].abs().sort_values(ascending=False)

In [None]:
happines_data.corr(method="pearson", min_periods=20)

In [None]:
corr = happines_data.corr(method = "pearson")

f, ax = plt.subplots(figsize=(10, 10))

sns.heatmap(corr, mask=np.zeros_like(corr, dtype=np.bool), 
            cmap=sns.diverging_palette(220, 10, as_cmap=True), square=True, ax=ax)

## Probabilities

In [None]:
happines_data[happines_data['Score'] > 4].shape[0]

In [None]:
happines_data[(happines_data['Score'] > 5.5) & (happines_data['Color'] == 'green')].shape[0]

In [None]:
float(len(happines_data[(happines_data['Score'] > 5.5) & (happines_data['Color'] == 'blue')]))/float(len(happines_data[happines_data['Score'] > 5.5]))