In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt   
from sklearn.impute import SimpleImputer



# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Introduction
Content: 

1.[Load and Check Data](#1)

2.[Preparing Data](#2)
     
   * [Rename to Economy](#3)
   * [Rename Happiness Score to Score](#4)
   * [Rename Happiness Rank to Rank](#5)
   * [Remove Region and Change name to Country](#6)
   * [Change name for health](#7)
   * [Change name for Freedom](#8)
   * [Change name for Trust (Government Corruption)](#9)

<a id = "1"></a><br>
# Load and Check Data

In [None]:
df_2019 = pd.read_csv("/kaggle/input/world-happiness/2019.csv")
df_2018 = pd.read_csv("/kaggle/input/world-happiness/2018.csv")
df_2017 = pd.read_csv("/kaggle/input/world-happiness/2017.csv")
df_2016 = pd.read_csv("/kaggle/input/world-happiness/2016.csv")
df_2015 = pd.read_csv("/kaggle/input/world-happiness/2015.csv")

In [None]:
df_2019.info()

In [None]:
df_2019.head()

In [None]:
df_2019.tail()

In [None]:
df_2015.describe()

In [None]:
df_2019.info()

In [None]:
print(df_2019.columns, df_2018.columns, df_2017.columns, df_2016.columns, df_2015.columns, sep ="\n")

<a id = "2"></a><br>
# Preparing Data

<a id = "3"></a><br>
## Rename to economy for every csv

In [None]:
df_2015.rename(columns={"Economy (GDP per Capita)" : "Economy"}, inplace=True)
df_2016.rename(columns={"Economy (GDP per Capita)" : "Economy"}, inplace=True)
df_2017.rename(columns={"Economy..GDP.per.Capita." : "Economy"}, inplace=True)
df_2018.rename(columns={"GDP per capita" : "Economy"}, inplace=True)
df_2019.rename(columns={"GDP per capita" : "Economy"}, inplace=True)

<a id = "4"></a><br>
## Rename Happiness Score to Score

In [None]:
df_2015.rename(columns={"Happiness Score" : "Score"}, inplace=True)
df_2016.rename(columns={"Happiness Score" : "Score"}, inplace=True)
df_2017.rename(columns={"Happiness.Score" : "Score"}, inplace=True)

<a id = "5"></a><br>
## Rename Happiness Rank to Rank

In [None]:
df_2015.rename(columns={"Happiness Rank" : "Rank"}, inplace=True)
df_2016.rename(columns={"Happiness Rank" : "Rank"}, inplace=True)
df_2017.rename(columns={"Happiness.Rank" : "Rank"}, inplace=True)
df_2018.rename(columns={"Overall rank" : "Rank"}, inplace=True)
df_2019.rename(columns={"Overall rank" : "Rank"}, inplace=True)

<a id = "6"></a><br>
## Remove Region and Change name to Country

In [None]:
df_2015.drop(columns=["Region"], inplace=True)
df_2016.drop(columns=["Region"], inplace=True)
df_2018.rename(columns={"Country or region" : "Country"}, inplace=True)
df_2019.rename(columns={"Country or region" : "Country"}, inplace=True)

<a id = "7"></a><br>
## Change name for health

In [None]:
df_2015.rename(columns={"Health (Life Expectancy)" : "Health"}, inplace=True)
df_2016.rename(columns={"Health (Life Expectancy)" : "Health"}, inplace=True)
df_2017.rename(columns={"Health..Life.Expectancy." : "Health"}, inplace=True)
df_2018.rename(columns={"Healthy life expectancy" : "Health"}, inplace=True)
df_2019.rename(columns={"Healthy life expectancy" : "Health"}, inplace=True)

<a id = "8"></a><br>
## Change name for Freedom

In [None]:
df_2018.rename(columns={"Freedom to make life choices" : "Freedom"}, inplace=True)
df_2019.rename(columns={"Freedom to make life choices" : "Freedom"}, inplace=True)

<a id = "9"></a><br>
## Change name for Trust (Government Corruption)

In [None]:
df_2015.rename(columns={"Trust (Government Corruption)" : "Trust"}, inplace=True)
df_2016.rename(columns={"Trust (Government Corruption)" : "Trust"}, inplace=True)
df_2017.rename(columns={"Trust..Government.Corruption." : "Trust"}, inplace=True)
df_2018.rename(columns={"Perceptions of corruption" : "Trust"}, inplace=True)
df_2019.rename(columns={"Perceptions of corruption" : "Trust"}, inplace=True)

In [None]:
df_2015['Year'] =2015
df_2016['Year'] =2016
df_2017['Year'] =2017
df_2018['Year'] =2018
df_2019['Year'] =2019

In [None]:
df_2015.drop(columns=["Standard Error"], inplace=True)

In [None]:
df_2015.drop(columns=["Family","Dystopia Residual"], inplace=True)

In [None]:
df_2016.drop(columns=["Lower Confidence Interval","Upper Confidence Interval","Family","Dystopia Residual"],inplace=True)

In [None]:
df_2017.drop(columns=["Whisker.high","Whisker.low","Family","Dystopia.Residual"], inplace=True)

In [None]:
df_2018.drop(columns=["Social support"], inplace=True)

In [None]:
df_2019.drop(columns=["Social support"], inplace=True)

In [None]:
print(df_2019.columns, df_2018.columns, df_2017.columns, df_2016.columns, df_2015.columns, sep ="\n")

In [None]:
df_happiness = pd.concat([df_2015,df_2016,df_2017,df_2018,df_2019],axis=0)

In [None]:
df_happiness

In [None]:
df_happiness.describe()

In [None]:
print('Country unique')
countries=df_happiness.Country.unique()
for country in countries:
    print(country)

In [None]:
print("Country Counts :\n")
print(df_happiness['Country'].value_counts())

In [None]:
pd.isnull(df_happiness).sum()

In [None]:
df_happiness.dropna(how='any',axis='rows')

In [None]:
Sum=df_happiness.isnull().sum()
Percentage = (df_happiness.isnull().sum()/df_happiness.isnull().count())
pd.concat([Sum,Percentage], axis=1,keys=['Sum','Percentage'])

In [None]:
df_eksik =df_happiness[df_happiness['Trust'].isna()]
df_eksik

In [None]:
Sum=df_happiness.isnull().sum()
Percentage = (df_happiness.isnull().sum()/df_happiness.isnull().count())
pd.concat([Sum,Percentage], axis=1,keys=['Sum','Percentage'])

In [None]:
pd.isnull(df_happiness).sum()

In [None]:
df_happiness.drop([19],axis=0,inplace=True)

In [None]:
pd.isnull(df_happiness).sum()

In [None]:
import matplotlib.pyplot as plt 

In [None]:
plt.figure(figsize=(12,6))

plt.plot(df_2019.Economy)  

plt.show()

In [None]:
plt.figure(figsize=(12,6))

plt.plot(df_2019.Health, df_2019.Score) 

plt.title("Ülkelerin mutluluk-sağlık değerleri ilişkisi ")

plt.xlabel("Sağlık Oranları")

plt.ylabel("Mutluluk Değerleri")
plt.show()

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df_2019.Score,df_2019.Economy,color="red") 
plt.plot(df_2019.Score,df_2019.Health,color="blue")
plt.xlabel("Mutluluk Değeri")
plt.ylabel("Ekonomi ve Sağlık oranları")
plt.title("Ülkelerin Ekonomi ve Sağlık oranları")
plt.show()

In [None]:
plt.figure(figsize=(20,10))
plt.subplot(2,2,1)   
plt.plot(df_2019.Score,df_2019.Trust,color="r") 
plt.xlabel("Mutluluk")
plt.ylabel("Güven Değeri")
plt.title("2019 yılı Ülkelerin Mutluluk-Güven Oranları")

plt.subplot(2,2,2)
plt.plot(df_2019.Score,df_2019.Freedom,color="blue")
plt.xlabel("Mutluluk")
plt.ylabel("Özgürlük Değeri")
plt.title("2019 yılı Ülkelerin Mutluluk-Özgürlük Oranları")
plt.show()

In [None]:
f=plt.figure(figsize=(12,4)) 
axes1=f.add_axes([0.1,0.1,0.9,0.9]) 
axes2=f.add_axes([0.65,0.2,0.3,0.3])

axes1.plot(df_2019.Score,df_2019.Health,color="blue")
axes2.plot(df_2015.Score,df_2015.Health,color="red")
axes1.set_xlabel("Skor")
axes2.set_xlabel("Skor")
axes1.set_ylabel("Sağlık")
axes2.set_ylabel("Sağlık")
axes1.set_title("2019 Sağlık-Mutluluk Oranı")
axes2.set_title("2015 Sağlık-Mutluluk Oranı")

plt.show()

In [None]:
sns.barplot(y= df_happiness['Country'][:15], x= df_happiness['Score'])

In [None]:
plt.figure(figsize=(15,6))
plt.plot(df_2019.Score,df_2019.Economy,color="r",linewidth=2,linestyle="--",
        marker="o",markersize=9,markerfacecolor="yellow",markeredgewidth=1) 
plt.xlabel("Mutluluk Skorları")
plt.ylabel("Ekonomi Değerleri")
plt.title("Ekonomi - Mutluluk İlişkisi Oranları")
plt.grid(linestyle="--")
plt.show()

In [None]:
plt.figure(figsize=(18,5))
plt.plot(df_2019.Score,df_2019.Health,color="r",linewidth=3,linestyle="--",marker="o",markersize=8,
markerfacecolor="yellow",markeredgewidth=2,markeredgecolor="blue",alpha=0.6) 
plt.plot(df_2019.Score,df_2019.Trust,color="green",linewidth=2.5,linestyle=":",marker="o",markerfacecolor="yellow",
markeredgewidth=2,markeredgecolor="black",alpha=0.8)
plt.xlabel("Mutluluk Skor Değeri")
plt.ylabel("Özgürlük ve Ekonomi Değerleri")
plt.title("Ekonomi ve Özgürlük Değerleri-Mutluluk Oranları")
plt.show()

In [None]:
sns.lmplot(x='Score', y='Economy', data=df_2019)

## Health vs Happiness Score from 2015- 2019

In [None]:
df_copy = df_2019.drop(["Rank", "Trust","Generosity","Year"], axis=1)
sns.boxplot(data=df_copy)

In [None]:
sns.violinplot(data=df_copy)
plt.show()

In [None]:
corr = df_copy.corr()
sns.heatmap(corr)

In [None]:
fig = plt.figure(figsize=(10,6))
sns.scatterplot(x= df_2015['Score'],y=df_2015['Health'])
sns.scatterplot(x= df_2016['Score'],y=df_2016['Health'])
sns.scatterplot(x= df_2017['Score'],y=df_2017['Health'])
sns.scatterplot(x= df_2018['Score'],y=df_2018['Health'])
sns.scatterplot(x= df_2019['Score'],y=df_2019['Health'])
fig.legend(labels=['2015','2016','2017','2018','2019'])
plt.title("Health vs Happiness Score from 2015- 2019")

## Freedom vs Happiness Score from 2015- 2019

In [None]:
fig = plt.figure(figsize=(10,6))
sns.scatterplot(x= df_2015['Score'],y=df_2015['Freedom'])
sns.scatterplot(x= df_2016['Score'],y=df_2016['Freedom'])
sns.scatterplot(x= df_2017['Score'],y=df_2017['Freedom'])
sns.scatterplot(x= df_2018['Score'],y=df_2018['Freedom'])
sns.scatterplot(x= df_2019['Score'],y=df_2019['Freedom'])
fig.legend(labels=['2015','2016','2017','2018','2019'])
plt.title("Freedom vs Happiness Score from 2015- 2019")

## Trust vs Happiness Score from 2015- 2019

In [None]:
fig = plt.figure(figsize=(10,6))
sns.scatterplot(x= df_2015['Score'],y=df_2015['Trust'])
sns.scatterplot(x= df_2016['Score'],y=df_2016['Trust'])
sns.scatterplot(x= df_2017['Score'],y=df_2017['Trust'])
sns.scatterplot(x= df_2018['Score'],y=df_2018['Trust'])
sns.scatterplot(x= df_2019['Score'],y=df_2019['Trust'])
fig.legend(labels=['2015','2016','2017','2018','2019'])
plt.title("Trust vs Happiness Score from 2015- 2019")

## Happiest Country vs. Saddest Country

In [None]:
print("In 2015, Happiest Country is "+ str(df_2015[:1]['Country'][0]))
print("In 2016, Happiest Country is "+ str(df_2016[:1]['Country'][0]))
print("In 2017, Happiest Country is "+ str(df_2017[:1]['Country'][0]))
print("In 2018, Happiest Country is "+ str(df_2018[:1]['Country'][0]))
print("In 2019, Happiest Country is "+ str(df_2019[:1]['Country'][0]))

In [None]:
print("Saddest Country is "+ str(df_2015[-1:]['Country'][len(df_2015)-1]))
print("Saddest Country is "+ str(df_2016[-1:]['Country'][len(df_2016)-1]))
print("Saddest Country is "+ str(df_2017[-1:]['Country'][len(df_2017)-1]))
print("Saddest Country is "+ str(df_2018[-1:]['Country'][len(df_2018)-1]))
print("Saddest Country is "+ str(df_2019[-1:]['Country'][len(df_2019)-1]))

In [None]:
plt.figure(figsize=(10,5))
sns.kdeplot(df_2015['Score'],color='red', label='2015')
sns.kdeplot(df_2016['Score'],color='blue', label='2016')
sns.kdeplot(df_2017['Score'],color='limegreen', label='2017')
sns.kdeplot(df_2018['Score'],color='orange', label='2018')
sns.kdeplot(df_2019['Score'],color='pink', label='2019')
plt.title('Happiness Over The Years',size=20)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.kdeplot(df_2015['Economy'],color='red', label='2015')
sns.kdeplot(df_2016['Economy'],color='blue', label='2016')
sns.kdeplot(df_2017['Economy'],color='limegreen', label='2017')
sns.kdeplot(df_2018['Economy'],color='orange', label='2018')
sns.kdeplot(df_2019['Economy'],color='pink', label='2019')
plt.title('Economy Over The Years',size=20)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.kdeplot(df_2015['Health'],color='red', label='2015')
sns.kdeplot(df_2016['Health'],color='blue', label='2016')
sns.kdeplot(df_2017['Health'],color='limegreen', label='2017')
sns.kdeplot(df_2018['Health'],color='orange', label='2018')
sns.kdeplot(df_2019['Health'],color='pink', label='2019')
plt.title('Health Over The Years',size=20)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.kdeplot(df_2015['Freedom'],color='red', label='2015')
sns.kdeplot(df_2016['Freedom'],color='blue', label='2016')
sns.kdeplot(df_2017['Freedom'],color='limegreen', label='2017')
sns.kdeplot(df_2018['Freedom'],color='orange', label='2018')
sns.kdeplot(df_2019['Freedom'],color='pink', label='2019')
plt.title('Freedom Over The Years',size=20)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.kdeplot(df_2015['Trust'],color='red', label='2015')
sns.kdeplot(df_2016['Trust'],color='blue', label='2016')
sns.kdeplot(df_2017['Trust'],color='limegreen', label='2017')
sns.kdeplot(df_2018['Trust'],color='orange', label='2018')
sns.kdeplot(df_2019['Trust'],color='pink', label='2019')
plt.title('Trust Over The Years',size=20)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.kdeplot(df_2015['Generosity'],color='red', label='2015')
sns.kdeplot(df_2016['Generosity'],color='blue', label='2016')
sns.kdeplot(df_2017['Generosity'],color='limegreen', label='2017')
sns.kdeplot(df_2018['Generosity'],color='orange', label='2018')
sns.kdeplot(df_2019['Generosity'],color='pink', label='2019')
plt.title('Generosity - Over The Years',size=20)
plt.legend()
plt.show()