# The World's Happy Value Analysis

![](https://bradaronson.com/wp-content/uploads/2013/10/happy.jpg)

## 1. import and load the datasets

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from plotly.validators.scatter.marker import SymbolValidator

import plotly.offline as pyo
pyo.init_notebook_mode()

### Used dateset's data explain

Country - Name of the country.

Region - Region the country belongs to.

Happiness Rank - Rank of the country based on the Happiness Score.

Happiness Score - A metric measured in each year by asking the sampled people the question: "How would you rate your happiness on a

Economy (GDP per Capita) - The extent to which GDP contributes to the calculation of the Happiness Score.

Family - The extent to which Family contributes to the calculation of the Happiness Score

Health (Life Expectancy) - The extent to which Life expectancy contributed to the calculation of the Happiness Score

Freedom - The extent to which Freedom contributed to the calculation of the Happiness Score.

Trust (Government Corruption) or Perceptions of corruption - The extent to which Perception of Corruption contributes to Happiness Score.

Generosity - The extent to which Generosity contributed to the calculation of the Happiness Score.

Social support - after 2017, the Faimily is change to Social support

Dystopia Residual - The extent to which Dystopia Residual contributed to the calculation of the Happiness Score.

Standard Error - The standard error of the happiness score.

Lower Confidence Interval - Lower Confidence Interval of the Happiness Score

Upper Confidence Interval - Upper Confidence Interval of the Happiness Score


## 2. Data Preprocessing

### The datasets have other columns and columns's name. So I will Data Preprocessing
### I will ignore the Standard Error, Lower Confidence Interval, Upper Confidence Interval

In [None]:
df2015 = pd.read_csv("../input/world-happiness/2015.csv")
df2015.drop("Standard Error", axis=1,inplace=True)
df2015["year"] = 2015
df2015.columns = ["Country","Region","Happines Rank","Happiness Score","Economy","Social support (Family)","Health","Freedom","Trust","Generosity","Dystopia Residual","year"]

df2016 = pd.read_csv("../input/world-happiness/2016.csv")
df2016.drop(["Lower Confidence Interval", "Upper Confidence Interval"],axis=1,inplace=True)
df2016["year"] = 2016
df2016.columns=["Country", "Region", "Happines Rank", "Happiness Score", "Economy", "Social support (Family)", "Health", "Freedom", "Trust", "Generosity", "Dystopia Residual", "year"]

df2017 = pd.read_csv("../input/world-happiness/2017.csv")
df2017.drop(["Whisker.high","Whisker.low"],axis=1,inplace=True)
df2017["year"]=2017
df2017.columns=["Country", "Happines Rank", "Happiness Score", "Economy", "Social support (Family)", "Health", "Freedom","Generosity", "Trust", "Dystopia Residual", "year"]

country_region = df2015[["Country","Region"]]
country_region = country_region.to_numpy()

def make_region(Country):
    for i in range(len(country_region)):
        if Country == country_region[i][0]:
            return country_region[i][1]
    
    return "no_region"

df2017["Region"] = df2017["Country"].apply(make_region)

df2017.loc[32,"Region"] = 'Eastern Asia'
df2017.loc[49,"Region"] = 'Latin America and Caribbean'
df2017.loc[70,"Region"] = 'Eastern Asia'
df2017.loc[92,"Region"] = 'Sub-Saharan Africa'
df2017.loc[110,"Region"] = 'Sub-Saharan Africa'
df2017.loc[146,"Region"] =  'Sub-Saharan Africa'

df2018 = pd.read_csv("../input/world-happiness/2018.csv")
df2018["year"] =2018
df2018.columns = [ "Happines Rank","Country", "Happiness Score", "Economy", "Social support (Family)", "Health", "Freedom","Generosity", "Trust", "year"]
df2018["Dystopia Residual"] = df2018["Happiness Score"] - df2018["Economy"] - df2018["Social support (Family)"] - df2018["Health"] - df2018["Freedom"] - df2018["Generosity"] - df2018["Trust"]
df2018["Region"] = df2018["Country"].apply(make_region)
df2018.loc[37,"Region"] = 'Latin America and Caribbean'
df2018.loc[48,"Region"] = 'Latin America and Caribbean'
df2018.loc[57,"Region"] = 'Central and Eastern Europe'
df2018.loc[97,"Region"] = 'Sub-Saharan Africa'
df2018.loc[118,"Region"] = 'Sub-Saharan Africa'
df2018.loc[153,"Region"] = 'Sub-Saharan Africa'

df2019 =pd.read_csv("../input/world-happiness/2019.csv")
df2019["year"] = 2019
df2019.columns =[ "Happines Rank","Country", "Happiness Score", "Economy", "Social support (Family)", "Health", "Freedom","Generosity", "Trust", "year"]
df2019["Dystopia Residual"] = df2019["Happiness Score"] - df2019["Economy"] - df2019["Social support (Family)"] - df2019["Health"] - df2019["Freedom"] - df2019["Generosity"] - df2019["Trust"]
df2019["Region"] = df2019["Country"].apply(make_region)
df2019.loc[38,"Region"] = 'Latin America and Caribbean'
df2019.loc[63,"Region"] = 'Central and Eastern Europe'
df2019.loc[83,"Region"] = 'Central and Eastern Europe'
df2019.loc[111,"Region"] = 'Sub-Saharan Africa'
df2019.loc[112,"Region"] = 'Sub-Saharan Africa'
df2019.loc[119,"Region"] = 'Sub-Saharan Africa'
df2019.loc[155,"Region"] = 'Sub-Saharan Africa'

### After the Data Preprocessing, The datasets remain Happiness Rank, Country, Region, Happiness Score, Economy, Social support(Family), Health, Freedom, Generosity, Trust, year, and Dystopia Residual and then combine five csv files, only one

In [None]:
df.head()

## 3. Visualization


![](https://www.ivie.es/wp-content/uploads/2019/04/20190415_PEIRO_measurement-of-hedonic-and-eudaimonic-orientations-to-happiness-the-spanish-orientations-to-happiness-scale.jpg)

## Where is the happiest Country?

In [None]:
df = pd.concat([df2015.melt(id_vars=['Country','Region','year','Happiness Score','Happines Rank']),
                df2016.melt(id_vars=['Country','Region','year','Happiness Score','Happines Rank']),
               df2017.melt(id_vars=['Country','Region','year','Happiness Score','Happines Rank']),
               df2018.melt(id_vars=['Country','Region','year','Happiness Score','Happines Rank']),
               df2019.melt(id_vars=['Country','Region','year','Happiness Score','Happines Rank']),],ignore_index=True)

fig = px.bar(df[df["Happines Rank"] <=10].sort_values(by="Happiness Score"), y="Country", x="value", color='variable',animation_frame='year',
             height=700,title="The happiest Country Top 10",opacity=.5,text="value")
fig.update_traces(texttemplate='%{text:.2f}',textposition='auto')
fig.update_layout(transition = {'duration': 1000})
fig.show()

## How about not the happiest Country?

In [None]:
df_under10 = pd.concat([df2015.tail(10).melt(id_vars=['Country','Region','year','Happiness Score','Happines Rank']),
                df2016.tail(10).melt(id_vars=['Country','Region','year','Happiness Score','Happines Rank']),
               df2017.tail(10).melt(id_vars=['Country','Region','year','Happiness Score','Happines Rank']),
               df2018.tail(10).melt(id_vars=['Country','Region','year','Happiness Score','Happines Rank']),
               df2019.tail(10).melt(id_vars=['Country','Region','year','Happiness Score','Happines Rank']),],ignore_index=True)

fig = px.bar(df_under10, y="Country", x="value", color='variable',animation_frame='year',
             height=700,title="Not the happiest Country Top 10",opacity=.5,text="value")
fig.update_traces(texttemplate='%{text:.2f}',textposition='auto')
fig.update_layout(transition = {'duration': 1000})
fig.show()

### Through the visualization, We can analysis the happiest country top 10 and not the happiest country top 10 each year.

## Which index will affect the happy value?

In [None]:
fig = make_subplots(rows=1, cols=5, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df[df['year']==2015]["variable"], values=df[df['year']==2015]["value"], name="2015"),
              1, 1)
fig.add_trace(go.Pie(labels=df[df['year']==2016]["variable"], values=df[df['year']==2015]["value"], name="2016"),
              1, 2)
fig.add_trace(go.Pie(labels=df[df['year']==2017]["variable"], values=df[df['year']==2015]["value"], name="2017"),
              1, 3)
fig.add_trace(go.Pie(labels=df[df['year']==2018]["variable"], values=df[df['year']==2015]["value"], name="2018"),
              1, 4)
fig.add_trace(go.Pie(labels=df[df['year']==2019]["variable"], values=df[df['year']==2015]["value"], name="2019"),
              1, 5)

fig.update_traces(hole=.3, hoverinfo="label+percent+name",textinfo='percent+label')
fig.update_layout(
    title_text="Change of variables 2015 - 2019",
    annotations=[dict(text='2015', x=0.07, y=0.5, font_size=20, showarrow=False),
                 dict(text='2016', x=0.27, y=0.5, font_size=20, showarrow=False),
                dict(text='2017', x=0.50, y=0.5, font_size=20, showarrow=False),
                dict(text='2018', x=0.73, y=0.5, font_size=20, showarrow=False),
                dict(text='2019', x=0.94, y=0.5, font_size=20, showarrow=False)])
fig.show()

### The Dystopia Residual is the best affect value. and then Social support, Economy, Health, Freedom
### Interestingly, After 2016, Trust records more than Generosity

## Region's happy value Analysis

In [None]:
df_tmp = df.groupby(["Region","year"])["Happiness Score"].mean()
df_tmp = pd.DataFrame(df_tmp).unstack()
df_tmp = df_tmp.reset_index()
df_tmp.columns=["Region", "2015","2016","2017","2018","2019"]
df_tmp = df_tmp.melt("Region")
fig = px.bar(df_tmp.sort_values(by="value"), x="Region", y="value",animation_frame='variable',text="value",color="Region",height=600)
fig.update_traces(texttemplate='%{text:.2f}',textposition='auto')
fig.update_layout(transition = {'duration': 1000})
fig.show()

### Australia and New Zealand recorded the best happiest Regions, followed North America
### Sub-saharan Africa and Southern Asia recorded low happy values

## Where is the Best increase country, and decrease country?

In [None]:
df_tmp = df.pivot_table(index=["Country","year"],values="Happiness Score")
df_tmp = df_tmp.unstack()
df_tmp = df_tmp.reset_index()
df_tmp.columns = ["Country","2015","2016","2017","2018","2019"]
df_tmp = df_tmp.dropna()
df_tmp = df_tmp.reset_index(drop=True)
for i in range(len(df_tmp)):
    df_tmp.loc[i,"min"] = min(df_tmp.loc[i,"2015"],df_tmp.loc[i,"2016"],df_tmp.loc[i,"2017"],df_tmp.loc[i,"2018"],df_tmp.loc[i,"2019"])

for i in range(len(df_tmp)):
    df_tmp.loc[i,"max"] = max(df_tmp.loc[i,"2015"],df_tmp.loc[i,"2016"],df_tmp.loc[i,"2017"],df_tmp.loc[i,"2018"],df_tmp.loc[i,"2019"])

for i in range(len(df_tmp)):
    df_tmp.loc[i,"difference"] = df_tmp.loc[i,"max"] -df_tmp.loc[i,"min"]

In [None]:
df_tmp.sort_values(by="difference",ascending=False).head(2)

### Through the DataFrame, we can know that the best decrease country is Venezuela and, increase country is benin. Now Visuallization that data

In [None]:
df_venezuela = df[df["Country"] =="Venezuela"]
df_Benin = df[df["Country"]=="Benin"]

fig = px.bar(df_venezuela,x="year",y="value",color="variable",text="value",title="Venezuela's Happy value change")
fig.update_traces(texttemplate='%{text:.2f}',textposition='auto')
fig.show()

### Venezuela decrease happy'svalue for 5 years. especially Dystopia Residual is decrease about 1.94. 

In [None]:
fig = px.bar(df_Benin,x="year",y="value",color="variable",text="value",title="Benin's Happy value change")
fig.update_traces(texttemplate='%{text:.2f}',textposition='auto')
fig.show()

### On the contrary, Benin increase happy's value for five years. Dystopia Residual increase about 1.42

## The world's happy value

In [None]:
df_tmp = df.groupby("year")["Happiness Score"].mean()
df_tmp = pd.DataFrame(df_tmp).unstack()
df_tmp = df_tmp.reset_index()

df_tmp.columns=["world's Happiness Score","year","value"]
fig = px.bar(df_tmp, x="year", y="value",text="value",height=600,log_y=True,color='year',
             title="The world's Happy value",color_continuous_scale=px.colors.sequential.Viridis)
fig.update_traces(texttemplate='%{text:.2f}',textposition='auto')
fig.show()

### Through the bar chart. In 2019 is more happier than other years. That means we happier now...?

![](https://contenthub-static.grammarly.com/blog/wp-content/uploads/2017/10/thank-you-760x400.jpg)