In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.style.use("fivethirtyeight")
import warnings
warnings.filterwarnings("ignore")

# Data info

- **Country**
- **Ladder** measure of life satisfaction.
- **Standard** Deviation of Ladder
- **Positive affect** Measure of positive emotion.
- **Negative affect** Measure of negative emotion.
- **Social support** The extent to which Social support contributed to the calculation of the Happiness Score.
- **Freedom** The extent to which Freedom contributed to the calculation of the Happiness Score.
- **Corruption** The extent to which Perception of Corruption contributes to Happiness Score.
- **Generosity** The extent to which Generosity contributed to the calculation of the Happiness Score.
- **Log of GDP (number per capita)**: The extent to which GDP contributes to the calculation of the Happiness Score.
- **Healthy life expectancy** The extent to which Life expectancy contributed to the calculation of the Happiness Score.

In [None]:
df = pd.read_csv("../input/world-happiness-report-2019/world-happiness-report-2019.csv")
df.head()

In [None]:
# Rename Columns
df.rename(columns = {"Country (region)":"country",
                     "Log of GDP\nper capita":"gdp_log",
                    "Healthy life\nexpectancy":"life_exp"}, inplace=True)

df.columns = [x.lower() for x in df.columns]
df.columns = df.columns.str.replace(" ","_")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.isnull().sum().sort_values(ascending=False)

In [None]:
plt.figure(figsize=(15,8))
mask = np.triu(df.corr(),1)
sns.heatmap(df.corr(), annot=True, cmap="flare", mask=mask)
plt.show()

In [None]:
asia = ["Israel", "United Arab Emirates", "Singapore", "Thailand", "Taiwan Province of China",
                 "Qatar", "Saudi Arabia", "Kuwait", "Bahrain", "Malaysia", "Uzbekistan", "Japan",
                 "South Korea", "Turkmenistan", "Kazakhstan", "Turkey", "Hong Kong S.A.R., China", "Philippines",
                 "Jordan", "China", "Pakistan", "Indonesia", "Azerbaijan", "Lebanon", "Vietnam",
                 "Tajikistan", "Bhutan", "Kyrgyzstan", "Nepal", "Mongolia", "Palestinian Territories",
                 "Iran", "Bangladesh", "Myanmar", "Iraq", "Sri Lanka", "Armenia", "India", "Georgia",
                 "Cambodia", "Afghanistan", "Yemen", "Syria"]

europe = ["Norway", "Denmark", "Iceland", "Switzerland", "Finland",
                 "Netherlands", "Sweden", "Austria", "Ireland", "Germany",
                 "Belgium", "Luxembourg", "United Kingdom", "Czech Republic",
                 "Malta", "France", "Spain", "Slovakia", "Poland", "Italy",
                 "Russia", "Lithuania", "Latvia", "Moldova", "Romania",
                 "Slovenia", "North Cyprus", "Cyprus", "Estonia", "Belarus",
                 "Serbia", "Hungary", "Croatia", "Kosovo", "Montenegro",
                 "Greece", "Portugal", "Bosnia and Herzegovina", "Macedonia",
                 "Bulgaria", "Albania", "Ukraine"]

north_america = ["Canada", "Costa Rica", "United States", "Mexico",  
                 "Panama","Trinidad and Tobago", "El Salvador", "Belize", "Guatemala",
                 "Jamaica", "Nicaragua", "Dominican Republic", "Honduras",
                 "Haiti"]

south_america = ["Chile", "Brazil", "Argentina", "Uruguay",
                 "Colombia", "Ecuador", "Bolivia", "Peru",
                 "Paraguay", "Venezuela"]

australia = ["New Zealand", "Australia"]

df["continent"] = np.nan
df.loc[(df["country"].isin(asia)), "continent"] ="Asia"
df.loc[(df["country"].isin(europe)), "continent"] ="Europe"
df.loc[(df["country"].isin(north_america)), "continent"] ="North America"
df.loc[(df["country"].isin(south_america)), "continent"] = "South America"
df.loc[(df["country"].isin(australia)), "continent"] ="Australia"
df.continent.fillna("Africa", inplace=True)

In [None]:
df.head()

In [None]:
fig , ax = plt.subplots(4,2, figsize=(15,20))
sns.lineplot(data=df, x="ladder", y="positive_affect", ax=ax[0,0], linewidth=3, marker=".", color="#597cb5")
sns.lineplot(data=df, x="ladder", y="negative_affect", ax=ax[0,1], linewidth=3, marker=".", color="#59a0b5")
sns.lineplot(data=df, x="ladder", y="social_support", ax=ax[1,0], linewidth=3, marker=".", color="#59b5a6")
sns.lineplot(data=df, x="ladder", y="freedom", ax=ax[1,1], linewidth=3, marker=".", color="#7f8251")
sns.lineplot(data=df, x="ladder", y="corruption", ax=ax[2,0], linewidth=3, marker=".", color="#39805c")
sns.lineplot(data=df, x="ladder", y="generosity", ax=ax[2,1], linewidth=3, marker=".", color="#bd9c73")
sns.lineplot(data=df, x="ladder", y="gdp_log", ax=ax[3,0], linewidth=3, marker=".", color="#7c4a87")
sns.lineplot(data=df, x="ladder", y="life_exp", ax=ax[3,1], linewidth=3, marker=".", color="#4a877d")
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.kdeplot(df["positive_affect"], hue=df["continent"], fill=True, palette="tab10")
plt.xlabel("Positive Affect")
plt.title("Positive Affect by Continent")
plt.show()

In [None]:
fig , ax = plt.subplots(1,2, figsize=(20,8))

filter = df["positive_affect"].sort_values(ascending=True)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["positive_affect"], ax=ax[0], palette="Set3")
ax[0].set_title("First 10 Country of Positive Affect")
ax[0].set_ylabel("Positive Affect")

filter = df["positive_affect"].sort_values(ascending=False)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["positive_affect"], ax=ax[1], palette="Set3_r")
ax[1].set_title("Last 10 Country of Positive Affect")
ax[1].set_ylabel("Positive Affect")

fig.autofmt_xdate(rotation=45 )
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.kdeplot(df["negative_affect"], hue=df["continent"], fill=True, palette="tab10")
plt.title("Negative Affect by Continent")
plt.xlabel("Negative Affect")
plt.show()

In [None]:
fig , ax = plt.subplots(1,2, figsize=(20,8))

filter = df["negative_affect"].sort_values(ascending=True)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["negative_affect"], ax=ax[0], palette="Set1")
ax[0].set_title("First 10 Country of Negative Affect")
ax[0].set_ylabel("Negative Affect")

filter = df["negative_affect"].sort_values(ascending=False)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["negative_affect"], ax=ax[1], palette="Set1")
ax[1].set_title("Last 10 Country of Negative Affect")
ax[1].set_ylabel("Negative Affect")

fig.autofmt_xdate(rotation=45 )
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.boxplot(df["continent"], df["social_support"] )
plt.ylabel("Social Support")
plt.title("Social Support by Continent")
plt.show()

In [None]:
fig , ax = plt.subplots(1,2, figsize=(20,8))

filter = df["social_support"].sort_values(ascending=True)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["social_support"], ax=ax[0], palette="Accent")
ax[0].set_title("First 10 Country of Social Support")
filter = df["social_support"].sort_values(ascending=False)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["social_support"], ax=ax[1], palette="Accent_r")
ax[1].set_title("Last 10 Country of Social Support")
fig.autofmt_xdate(rotation=45 )
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.boxplot(df["continent"], df["freedom"] )
plt.ylabel("Freedom")
plt.title("Freedom by Continent")
plt.show()

In [None]:
fig , ax = plt.subplots(1,2, figsize=(20,8))

filter = df["freedom"].sort_values(ascending=True)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["freedom"], ax=ax[0], palette="Accent")
ax[0].set_title("First 10 Country of Freedom")

filter = df["freedom"].sort_values(ascending=False)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["freedom"], ax=ax[1], palette="Accent_r")
ax[1].set_title("Last 10 Country of Freedom")

fig.autofmt_xdate(rotation=45 )
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.kdeplot(x=df["corruption"], hue=df["continent"], fill=True, palette="tab10")
plt.title("Corruption by Continent")
plt.show()

In [None]:
fig , ax = plt.subplots(1,2, figsize=(20,8))

filter = df["corruption"].sort_values(ascending=True)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["corruption"], ax=ax[0], palette="Accent")
ax[0].set_title("First 10 Country of Corruption")

filter = df["corruption"].sort_values(ascending=False)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["corruption"], ax=ax[1], palette="Accent_r")
ax[1].set_title("Last 10 Country of Corruption")

fig.autofmt_xdate(rotation=45 )
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.kdeplot(x=df["generosity"], hue=df["continent"], fill=True, palette="tab10")
plt.title("Generosity by Continent")
plt.show()

In [None]:
fig , ax = plt.subplots(1,2, figsize=(20,8))

filter = df["generosity"].sort_values(ascending=True)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["generosity"], ax=ax[0], palette="Accent")
ax[0].set_title("First 10 Country of Generosity")

filter = df["generosity"].sort_values(ascending=False)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["generosity"], ax=ax[1], palette="Accent_r")
ax[1].set_title("Last 10 Country of Generosity")

fig.autofmt_xdate(rotation=45 )
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.boxplot(y=df["gdp_log"], x=df["continent"])
plt.title("gdp_log by Continent")
plt.show()

In [None]:
fig , ax = plt.subplots(1,2, figsize=(20,8))

filter = df["gdp_log"].sort_values(ascending=True)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["gdp_log"], ax=ax[0], palette="Accent")
ax[0].set_title("First 10 Country of GDP_log")

filter = df["gdp_log"].sort_values(ascending=False)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["gdp_log"], ax=ax[1], palette="Accent_r")
ax[1].set_title("Last 10 Country of GDP_log")

fig.autofmt_xdate(rotation=45 )
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.kdeplot(x=df["life_exp"], hue=df["continent"], fill=True, palette="tab10")
plt.title("life_exp by Continent")
plt.show()

In [None]:
fig , ax = plt.subplots(1,2, figsize=(20,8))

filter = df["life_exp"].sort_values(ascending=True)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["life_exp"], ax=ax[0], palette="Accent")
ax[0].set_title("First 10 Country of life_exp")

filter = df["life_exp"].sort_values(ascending=False)[:10].index
sns.barplot(x = df.iloc[filter]["country"], y=df["life_exp"], ax=ax[1], palette="Accent_r")
ax[1].set_title("Last 10 Country of life_exp")

fig.autofmt_xdate(rotation=45 )
plt.show()