In [None]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
df = pd.read_csv("../input/world-happiness-report-2021/world-happiness-report-2021.csv")
df.head()

# Data Dictionary

> 'Happiness score' or subjective well-being (variable name 'Ladder score' ).
The English wording of the question is “Please imagine a ladder, with steps numbered from 0 at the bottom to 10 at the top. 
The top of the ladder represents the best possible life for you and the bottom of the ladder represents the worst possible life for you.On which step of the ladder would you say you personally feel you stand at this time?” This measure is also referred to as Cantril life ladder, or just life ladder in our analysis.

> 'Logged GDP per capita' is log of GDP per capita(per person)

> 'Healthy Life Expectancy (HLE)'. Healthy life expectancies at birth are based on the data extracted from the World Health Organization’s (WHO) Global Health Observatory data repository (Last updated: 2020-09-28). The data at
the source are available for the years 2000, 2005, 2010, 2015 and 2016. To match this report’s sample period (2005-2020), interpolation and extrapolation are used.

> 'Social support' (or having someone to count on in times of trouble) is the national average of the binary responses (either 0 or 1) to the GWP question “If you were in trouble, do you have relatives or friends you can count on to help you
whenever you need them, or not?”

> 'Freedom to make life choices' is the national average of responses to the GWP question “Are you satisfied or dissatisfied with your freedom to choose what you do with your life?”

> 'Generosity' is the residual of regressing national average of response to the GWP question “Have you donated money to a charity in the past month?” on GDP per capita.

> 'Corruption Perception': The measure is the national average of the survey responses to two questions in the GWP: “Is corruption widespread throughout the government or not” and “Is corruption widespread within businesses or
not?” The overall perception is just the average of the two 0-or-1 responses. In case the perception of government corruption is missing, we use the perception of business corruption as the overall perception. The corruption perception at
the national level is just the average response of the overall perception at the individual level.


## Removing spaces from column names

In [None]:
df.columns = [c.replace(' ', '') for c in df.columns]

In [None]:
df.isnull().any()

In [None]:
df.columns

## Top 10 Happiest countries

In [None]:
#select Country_name as country,avg(Social_support) as average_Social_support from happy group by Country_name order by average_Social_support desc limit 10
fd=df.groupby(["Countryname"]).mean().sort_values(["Ladderscore"],ascending=False)["Ladderscore"].head(10).reset_index()

In [None]:
fd

In [None]:
ladder_avg=df['Ladderscore'].mean()

## select Countryname from df where Ladderscore > ladder_avg
head()-to limit the answer  and  reset_index()->to add the indexing 


In [None]:
df[df.Ladderscore > ladder_avg].Countryname.head()

## select * from df group by Regionalindicator order by Ladderscore Regionalindicator

In [None]:
regional=df.groupby(["Regionalindicator"]).mean().sort_values(['Ladderscore','Regionalindicator'],ascending=[False,False]).reset_index()

In [None]:
regional

## Barplot according to Ladder score

In [None]:
sns.catplot(y='Regionalindicator',x='Ladderscore',data=regional,kind='bar')

## Perception of Corruption

In [None]:
reg_corruption=df.groupby(["Regionalindicator"]).mean().sort_values(['Perceptionsofcorruption','Regionalindicator'],ascending=[False,False]).reset_index()

In [None]:
reg_corruption

In [None]:
sns.catplot(y='Regionalindicator',x='Perceptionsofcorruption',data=reg_corruption,kind='bar',palette='husl')

> Central and Eastern Europe has highest perception of corruption

## Dependency of Ladder score on other parameters

In [None]:
def lmplot(ycomp,df):
    sns.lmplot(y=ycomp,x='Ladderscore',data=df,aspect=1,scatter_kws={'color':'grey'},\
           line_kws={'color':'red','linewidth':'3'},markers='x',scatter=True,height=6)

In [None]:
y_variable=['LoggedGDPpercapita', 'Socialsupport', 'Healthylifeexpectancy',
       'Freedomtomakelifechoices', 'Generosity', 'Perceptionsofcorruption',]
for i in y_variable:
    lmplot(i,df)

## From the above 'lmplot' , relationship of Ladder score with various parameters can be easily interpreted 'except' for "Generosity" -> which more or less remains constant

## Now checking 'Generosity' with other parameters

In [None]:
def lmplot(xcomp,df):
    sns.lmplot(x=xcomp,y='Generosity',data=df,aspect=1,scatter_kws={'color':'green'},\
           line_kws={'color':'blue','linewidth':'3'},markers='x',scatter=True,height=6)

In [None]:
x_variable=['LoggedGDPpercapita','Freedomtomakelifechoices', 'Perceptionsofcorruption','Socialsupport']
for i in x_variable:
    lmplot(i,df)

## Some amazing results :-
> increase in Logged GDP per capita => decrease in Generosity: People become less Generous with increase in GDP per capita

> increase in Freedom to make life choices => increase in Generosity

> increase in Generosity => decrease in Perception of Corruption

## Social support and Generosity shows inverse relation

## Checking for 'Social support'

In [None]:
def lmplot(xcomp,df):
    sns.lmplot(x=xcomp,y='Socialsupport',data=df,aspect=1,scatter_kws={'color':'orange'},\
           line_kws={'color':'red','linewidth':'3'},markers='x',scatter=True,height=6)

In [None]:
x_variable=['LoggedGDPpercapita','Freedomtomakelifechoices', 'Perceptionsofcorruption','Generosity']
for i in x_variable:
    lmplot(i,df)

In [None]:
parameters=['Ladderscore','LoggedGDPpercapita', 'Socialsupport',
       'Freedomtomakelifechoices', 'Generosity', 'Perceptionsofcorruption',]

In [None]:
corr_matrix=df[parameters].corr()

In [None]:
plt.figure(figsize=(8,5))
sns.heatmap(corr_matrix,annot=True)

In [None]:
x=sns.PairGrid(df,x_vars=['Perceptionsofcorruption','LoggedGDPpercapita'],y_vars=['Generosity','Socialsupport'],)
x=x.map(sns.regplot,line_kws={'color':'red'})