In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df_2020 = pd.read_csv('../input/world-happiness-report-2021/world-happiness-report.csv')

In [None]:
print(df_2020.head())

In [None]:
df_whr = df_2020

In [None]:
df_whr.columns

# Column Description
* **Country Name** - World Happiness report country name.
* **Year** - World Happiness report recorded year.
* **Life Ladder** - Imagine a ladder, with steps numbered from 0 at the bottom to 10 at the top. The top of the ladder represents the best possible life for you and the bottom of the ladder represents the worst possible life for you.
* **Log GDP per capita** - Per capita gross domestic product (GDP) is a metric that breaks down a country's economic output per person and is calculated by dividing the GDP of a country by its population.
* **Social Support** - Having friends and other people, including family, to turn to in times of need or crisis to give you a broader focus and positive self-image. Social support enhances quality of life and provides a buffer against adverse life events.
* **Healthy Life expectancy at birth** - : The average equivalent number of years of full health that a newborn could expect to live, if he or she were to pass through life subject to the agespecific death rates and ill-health rates of a given period.
* **Generosity** - a willingness to give help or support, esp. more than is usual or expected.
* **Perception of Corruption** - Corruption Rate in the county.

# World Happiness Report Information

In [None]:
df_whr.info()

In [None]:
df_whr.isnull().sum()

* Except Country, Year and Life ladder for all other features having null values.
* These null values were filled by mean on preprocessing. 

In [None]:
df_whr.describe().transpose()

* Healthy Life expectancy at birth is having higher standard deviation, it should be normalized.

# Exploratory Data Analysis (EDA)

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt
figure = plt.figure(figsize=(16,16))
print(df_whr['Country name'].value_counts())
# plt.show()

In [None]:
import plotly.express as px

In [None]:
def animated_graph(df, column_name):
    fig = px.choropleth(df.sort_values('year'), locations="Country name", color=column_name, 
                    locationmode='country names',
                    animation_frame='year',
                 color_continuous_midpoint=0.8,
                    color_continuous_scale=px.colors.sequential.dense,
#                         width=800, height=400
                   )

    fig.update_layout(title=column_name+' Comparison by country',autosize=True)
    fig.show() 

In [None]:
import numpy as np
col = df_whr.loc[:, df_whr.dtypes == np.float64].columns
print(col)

* Averagely 140 records for the year from 2011 to 2020.

In [None]:
figure = plt.figure(figsize = (16,16))
i = 431
for cl in col:
    plt.subplot(i)
    sns.histplot(df_whr[cl], kde=True)
    i = i + 1
plt.show()

# Breakdown
**In the whole Data set**
* **Life Ladder** -  Having average of 4.5 to 6.5 .
* **GDP Per Capita** - Having average of 9 to 10.5
* **Social Support** - having average of 0.75 to 0.95
* **Healthy life expectancy at birth** - Having average of 65 to 75 and it is left skewed.
* **Freedom to make life choices** - Having average of 0.65 to 0.95
* **Generosity** - Having average of -0.2 to 0.1
* **Perceptions of corruption** - Having average of 0.7 to 0.9 
* **Positive effect** - Having average of 0.55 to 0.85
* **Negative effect** - Having average of 0.15 to 0.35

# Comparing features based on the country

**It shows the latest year for all the features based on the country**

In [None]:
for cl in col:
    animated_graph(df_whr,cl)

In [None]:
df_top10 = df_whr[['Country name','year', 'Life Ladder']]

In [None]:
print(df_top10.head())

In [None]:
df_top10.sort_values(by=['Life Ladder'], ascending=False, inplace=True)

In [None]:
df_life = df_top10.head(10)

In [None]:
df_life['Country name'].value_counts()

In [None]:
df_life_bad = df_top10.tail(10)

In [None]:
df_life_bad['Country name'].value_counts()

# Conclusion
* Top 3 Happiest countries in the world are **Denmark, Finland and Switzerland** from the year 2005 to 2020 based on the life ladder.
* Worst 3 countries in the word are **Afghanistan, Togo, Liberia**