In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Importing related libaries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objs as go

# **<font color = DarkBlue>Introduction</font>**

* The World Happiness Report is a landmark survey of the state of global happiness . The report continues to gain global recognition as governments, organizations and civil society increasingly use happiness indicators to inform their policy-making decisions. Leading experts across fields – economics, psychology, survey analysis, national statistics, health, public policy and more – describe how measurements of well-being can be used effectively to assess the progress of nations. The reports review the state of happiness in the world today and show how the new science of happiness explains personal and national variations in happiness.

# **<font color = DarkBlue>Content</font>**

* The happiness scores and rankings use data from the Gallup World Poll . The columns following the happiness score estimate the extent to which each of six factors – economic production, social support, life expectancy, freedom, absence of corruption, and generosity – contribute to making life evaluations higher in each country than they are in Dystopia, a hypothetical country that has values equal to the world’s lowest national averages for each of the six factors. They have no impact on the total score reported for each country, but they do explain why some countries rank higher than others.


# **<font color = DarkBlue>Importing Data</font>**

In [None]:
df = pd.read_csv("../input/world-happiness-report-2021/world-happiness-report-2021.csv")

In [None]:
# Show first five row of data
df.head()

In [None]:
# Information about data
df.info()

# **<font color = DarkBlue>Data Cleaning</font>**

In [None]:
df.isnull().sum()

**There are no missing values. Let's do next step.**

In [None]:
df2021= df[['Country name','Regional indicator','Ladder score','Logged GDP per capita','Social support','Healthy life expectancy','Freedom to make life choices','Generosity','Perceptions of corruption']].copy() 

In [None]:
df2021.head()

In [None]:
# Describe basic statistics of data
df2021.describe()

# **<font color = DarkBlue>Data Analysis</font>**

In [None]:
# Count regional indicator
df2021['Regional indicator'].value_counts().reset_index()

**The total number of countries which are located in Sub-Saharan Africa is the most than others.The second is Western Europe.**

In [None]:
fig = px.bar(df2021, y='Ladder score', x='Regional indicator',color = 'Ladder score',
             hover_data = ['Country name'],
             color_continuous_scale = 'RdBu')
fig.show()

**Western Europe Countries have the highest level happiness scores.
On the other hand,countries are located in South Asia and Sub-Saharan Africa have the lowest level happiness scores amongst the regions.**

In [None]:
fig = px.sunburst(df2021, 
                  path=['Regional indicator', 'Country name'], values='Ladder score',
                  color='Healthy life expectancy',
                  color_continuous_scale='rdbu',
                  color_continuous_midpoint=np.average(df['Healthy life expectancy'], weights=df['Ladder score'])
                  
                  )


fig.show()

**The countries with higher healthy life expectancy are mostly situated in Western Europe and the countries with lower healthy life expectancy are situated in Sub-Saharan Africa.**

In [None]:
df1 = df.sort_values(by =['Ladder score'],ascending = False).reset_index().head(20)

tr1 = go.Bar(x = df1['Country name'],
             y = df1['Explained by: Log GDP per capita'],
             name = 'Explained by: Log GDP per capita',
             marker = dict(color = 'SteelBlue'))

tr2 = go.Bar(x = df1['Country name'],
             y = df1['Explained by: Social support'],
             name = 'Explained by: Social support',
             marker = dict(color = 'LightSeaGreen'))

tr3 = go.Bar(x =df1['Country name'],
             y = df1['Explained by: Healthy life expectancy'],
             name = 'Explained by: Healthy life expectancy',
             marker = dict(color = 'DarkSeaGreen'))

tr4 = go.Bar(x = df1['Country name'],
             y = df1['Explained by: Freedom to make life choices'],
             name = 'Explained by: Freedom to make life choices',
             marker = dict(color = 'Gold'))

tr5 = go.Bar(x = df1['Country name'],
             y = df1['Explained by: Generosity'],
             name = 'Explained by: Generosity',
             marker = dict(color = 'FireBrick'))

tr6 = go.Bar(x = df1['Country name'],
             y = df1['Explained by: Perceptions of corruption'],
             name = 'Explained by: Perceptions of corruption',
             marker = dict(color = 'Chocolate'))


data = [tr1,tr2,tr3,tr4,tr5,tr6]
layout= dict(title = 'Top 20 Happiest Countries: Six factors',
             title_x = 0.5,
             barmode = 'relative',
             title_font = dict(size = 16,color = 'MidnightBlue'),
             xaxis = dict(tickangle = 45)
            
            )

fig = go.Figure(data, layout)
fig.show()

In [None]:
df1 = df2021.sort_values(by =['Ladder score'],ascending = False).reset_index().head(10)

fig = px.bar(df1, x = 'Country name', y = 'Ladder score', 
             color= 'Healthy life expectancy',
             color_continuous_scale = 'rdbu',
             hover_data = ['Regional indicator']
            
            )

fig.update_layout(title = 'Top 10 Happiest Countries',
                  title_x = 0.5,
                  title_font = dict(size = 16, color = 'DarkRed'))

fig.show()

* **Top 3 happiest countries are Finland,Denmark and Switzerland.**
* **Nine of the top ten happiest countries are located in Western Europe.**

In [None]:
df1 = df2021.sort_values(by = ['Ladder score'],ascending = False).reset_index().tail(10)

fig = px.bar(df1 , x = 'Country name', y = 'Ladder score', 
             color = 'Healthy life expectancy',
             color_continuous_scale = 'ylorbr',
             hover_data = ['Regional indicator']
            
            )

fig.update_layout(title = 'Least 10 happiest countries',
                  title_x = 0.5,
                  title_font = dict(size = 16, color = 'DarkRed')
                 
                 )

fig.show()

* **Burundi,Yemen and Tanzania have the lowest happiness scores.**
* **Seven of the ten countries which have the lowest happiness scores are in Sub-Saharan Africa.**

In [None]:
df1 = df2021.sort_values(by =['Healthy life expectancy'],ascending = False).reset_index().head(10)

fig = px.bar(df1, x = 'Country name', y = 'Healthy life expectancy', 
             color= 'Freedom to make life choices', 
             color_continuous_scale = 'rdbu'
            
            )

fig.update_layout(title = 'Top 10 countries with the highest healthy life expectancy',
                  title_x = 0.5,
                  title_font = dict(size = 16, color = 'DarkRed'))

fig.show()

* **Top countries - Singapore, hongkong and Japan have the least values of healthy life expectancy,which are all situated in Aisa.**
* **Singapore has the highest healthy life expectancy with value of 76.953 and the value of freedom to make life choice is also very high.**

In [None]:
df1 = df2021.sort_values(by =['Healthy life expectancy'],ascending = False).reset_index().tail(10)

fig = px.bar(df1, x = 'Country name', y = 'Healthy life expectancy', 
             color= 'Freedom to make life choices', 
             color_continuous_scale = 'ylorbr')

fig.update_layout(title = '10 countries with the lowest healthy life expectancy',
                  title_x = 0.5,
                  title_font = dict(size = 16, color = 'DarkRed'))

fig.show()

**Afghanistan, Lesotho, Nigeria and Sierra Leone have the least values of healthy life expectancy.**

In [None]:
df1 = df2021.sort_values(by = ['Logged GDP per capita'],ascending = False).reset_index().head(10)

fig = px.bar(df1, x= 'Country name', y = 'Logged GDP per capita' ,
             color = 'Ladder score',color_continuous_scale = 'rdbu')
fig.update_layout(title = 'Top 10 countries with the highest GDP per capita',
                  title_x = 0.5,
                  title_font = dict(size = 16, color = 'MidnightBlue')
                 
                 )
fig.show()

* **Top 3 countries with the highest value of GDP are Luxembourg,Singapore and Ireland.**
* **Top 10 countries with the highest value of GDP have higher happiness scores,except Hongkong.**

In [None]:
df1 = df2021.sort_values(by = ['Logged GDP per capita'],ascending = False).reset_index().tail(10)

fig = px.bar(df1, x= 'Country name', y = 'Logged GDP per capita', 
             color = 'Ladder score',color_continuous_scale = 'ylorbr')
fig.update_layout(title = '10 countries with the lowest GDP per capita',
                  title_x = 0.5,
                  title_font = dict(size = 16,color = 'DarkRed')
                 )
fig.show()

* **Burundi,Malawi and Niger have the lowest values of GDP.**
* **10 countries with the lowest socre of GDP have the lowest happiness scores,which are lower than mean score of happiness score.**

In [None]:
df1 = df2021.sort_values(by =['Perceptions of corruption'],ascending = False).reset_index().head(10)

fig = px.bar(df1, x= 'Country name', y ='Perceptions of corruption',
             color = 'Ladder score',color_continuous_scale = 'rdbu')

fig.update_layout(title = 'Top 10 countries with the highest perceptions of corruption',
                 title_x = 0.5,
                 title_font = dict(size = 16, color = 'DarkRed')
                
                )

fig.show()

**Croatia,Romania,and Bulgaria have the highest values of perceptions of corruption.**

In [None]:
df1 = df2021.sort_values(by = ['Perceptions of corruption'],ascending = False).reset_index().tail(10)

fig = px.bar(df1, x ='Country name', y ='Perceptions of corruption',
             color ='Ladder score',color_continuous_scale = 'ylorbr')
fig.update_layout(title = '10 countries with the lowest perceptions of corruption',
                  title_x = 0.5,
                  title_font = dict(size = 16, color = 'DarkRed')
                 
                 )

fig.show()

**Top countries - Singapore, Rwanda, Denmark, Sweden and Finland have the least values of perceptions of corruption.**

In [None]:
df1 = df.sort_values(by = ['Social support'],ascending = False).reset_index().head(10)
fig=px.bar(df1, x = 'Country name', y = 'Social support',
           color = 'Ladder score',
           color_continuous_scale = 'rdbu',
           hover_data = ['Regional indicator']
          
          )
fig.update_layout(title = 'Top 10 Countries with the highest social support',
                  title_x = 0.5,
                  title_font=dict(size = 16, color = 'DarkRed')
                 )

fig.show()

**Top Countries - Iceland,Finland,Norway and Denmark with higher happiness score have the highest values of social support, and these countries all are situated in Westen Europe.**

In [None]:
df1 = df.sort_values(by = ['Social support'],ascending = False).reset_index().tail(10)
fig=px.bar(df1, x = 'Country name', y = 'Social support',
           color = 'Ladder score',
           color_continuous_scale = 'ylorbr',
           hover_data = ['Regional indicator']
          
          )
fig.update_layout(title = '10 Countries with the lowest social support',
                  title_x = 0.5,
                  title_font=dict(size = 16, color = 'DarkRed')
                 )

fig.show()

**Afghanistan, Benin and Burundi have the lowest values of social support.**

In [None]:
fig = px.imshow(df2021.corr())
fig.show()

* **Happiness score has strong level correlation with GDP, Social Support and Healthy life Expectancy.**
* **GDP score has strong correlation with Healthy Life expectancy and Social support.**
* **The value of Freedom to make life choice and happiness score have middle level correlation between them.**
* **The value of Perceptions of corruption and happiness score have weak and negative level correlation between them.**

In [None]:
fig = px.scatter(df2021, x = 'Healthy life expectancy', y = 'Ladder score',
                 color = 'Regional indicator',
                 color_discrete_sequence = px.colors.sequential.thermal,
                 hover_name ='Country name',
                 template = 'plotly_dark',
                )

fig.update_traces(marker = dict(size = 25, opacity = 0.7, line = dict(width = 1, color = 'SlateGray')),
                  mode = 'markers')

fig.show()

**Happier countries tend to be those with longer life expectancies,which are also most of Western Europe.**

In [None]:
fig = px.scatter(df2021, x = 'Healthy life expectancy', y = 'Logged GDP per capita',
                 color = 'Regional indicator',
                 color_discrete_sequence = px.colors.sequential.thermal,
                 hover_data = ['Country name'],
                 template = 'plotly_dark'
                 
                )
fig.update_traces(marker = dict(size = 25,opacity = 0.7,line = dict(width = 1, color = 'SlateGray')),
                  mode = 'markers'
                 
                 )

fig.show()

**The countries with higher value of healthy life expectancy have higher values of GDP.**

In [None]:
fig = px.scatter(df2021, x ='Logged GDP per capita', y  ='Ladder score', 
                 color = 'Regional indicator',
                 color_discrete_sequence = px.colors.sequential.thermal,
                 hover_data = ['Country name'],
                 template = 'plotly_dark'
                
                )

fig.update_traces(marker = dict(size = 25, opacity = 0.7, line = dict(width = 1, color = 'SlateGray')),
                  mode = 'markers'
                 
                 )
fig.show()

**The countries with higher value of GDP have higher happiness scores.**

In [None]:
fig = px.scatter(df2021, y = 'Ladder score', x = 'Social support',
                 color = 'Regional indicator',
                 color_discrete_sequence = px.colors.sequential.thermal,
                 hover_name ='Country name',
                 template = 'plotly_dark')

fig.update_traces(marker = dict(size = 25, opacity = 0.7, line = dict(width = 1, color = 'SlateGray')),
                  mode = 'markers')

fig.show()

**Countries have higher scores for happiness scores correlate with higher score of social support.**

In [None]:
fig = px.scatter(df2021, x = 'Perceptions of corruption', y = 'Ladder score',
                 color = 'Regional indicator',
                 color_discrete_sequence = px.colors.sequential.thermal,
                 hover_data = ['Country name'],
                 template = 'plotly_dark'
                
                )

fig.update_traces(marker = dict(size = 25, opacity = 0.7,line = dict(color = 'SlateGray',width = 1)),
                  mode = 'markers')

fig.show()

**The countries with the lowest value of perceptions of corruption has the highest happiness scores,including the countries are situated in Western Europe and North America and ANZ.**

In [None]:
fig = px.scatter(df2021, x="Freedom to make life choices", 
                 y="Perceptions of corruption", 
                 color="Regional indicator",
                 size="Ladder score",log_x=True, size_max=20,
                 color_discrete_sequence = px.colors.sequential.thermal,
                 hover_name = "Country name",template = 'plotly_dark')
fig.show()

**The values of freedom and corruption are inversely related, which means that higher values of corruption tends to have lower values of freedom.
On the other hand, some countriess which are situated in Western Europe have high values of corruption too,including Portugal,Italy and Iceland.**

In [None]:
fig = px.choropleth(df2021, 
                    locations = 'Country name',
                    locationmode = 'country names',
                    color = 'Ladder score',
                    hover_name = 'Country name',
                    color_continuous_scale = 'twilight',
                    
 )


fig.update_layout(title = 'World - Happiness Score',
                  title_x = 0.5,
                  title_font = dict(size = 16, color = 'MidnightBlue'),
                  geo = dict(showframe = False,
                             showcoastlines = False,
                             projection_type = 'equirectangular')
                 
                 )
fig.show()


# **<font color = DarkBlue>Summary of analysis</font>**

* **Top 3 happiest countries in the world are Finland,Denmark and Switzerland.**
* **Happiness score has strong level correlation with GDP, Social Support and Healthy life Expectancy.**
* **Happiest countries tend to have longer life expectancy and a higher value of GDP ,which are also most of Western Europe.**
* **Most of the Sub-Saharan African countries have lower level of happiness.** 
* **African countries have lower values of life expectancy, lower values of GDP, and ultimately, have lower happiness scores.**

# **<font color = DarkBlue>Thank you for reading!</font>**

**Please consider upvoting & checking out more of my work if you found this interesting & valuable!
Thanks so much!**

[Link to my Kaggle](http://www.kaggle.com/carriech)