In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
raw_df= pd.read_csv('../input/world-happiness-report-2021/world-happiness-report-2021.csv')
raw_df.head()

In [None]:
raw_df.shape

In [None]:
raw_df.info()

In [None]:
raw_df.describe().round(2)

In [None]:
raw_df.isna().sum()

In [None]:
df= raw_df.drop(columns= ['Standard error of ladder score', 'upperwhisker', 'lowerwhisker', 
                         'Explained by: Log GDP per capita', 'Explained by: Social support',
                          'Explained by: Healthy life expectancy', 'Explained by: Freedom to make life choices',
                          'Explained by: Generosity',
                          'Explained by: Perceptions of corruption', 'Dystopia + residual', 'Ladder score in Dystopia'])

df.head()

In [None]:
df.columns

## Exploratory Analysis

### By Countries

In [None]:
sns.set_palette('GnBu_r')
sns.set_style('whitegrid')

plt.figure(figsize= (30, 80))

l= [ 'Ladder score', 'Logged GDP per capita', 'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity']

n= 1
for i in l:
    plt.subplot(7, 2, n)
    ax= sns.barplot(x= i, y= 'Country name', data= df, 
                order= df.groupby('Country name')[i].sum().sort_values(ascending= False).head(15).index, palette= 'GnBu_r')
    plt.title('Best 15 Countries - {}'.format(i), fontsize= 20)
    ax.set_ylabel('')
    n+= 1
    
    plt.subplot(7, 2, n)
    ax= sns.barplot(x= i, y= 'Country name', data= df, 
                order= df.groupby('Country name')[i].sum().sort_values(ascending= True).head(15).index, palette= 'YlOrRd_r')
    plt.title('Worst 15 Countries - {}'.format(i), fontsize= 20)
    ax.set_ylabel('')
    n+= 1

# Perceptions of corruption
plt.subplot(7, 2, 13)
ax= sns.barplot(x= 'Perceptions of corruption', y= 'Country name', data= df, 
            order= df.groupby('Country name')['Perceptions of corruption'].sum().sort_values(ascending= True).head(15).index,
            palette= 'GnBu_r')
plt.title('Best 15 Countries - Perceptions of corruption'.format(i), fontsize= 20)
ax.set_ylabel('')

plt.subplot(7, 2, 14)
ax= sns.barplot(x= 'Perceptions of corruption', y= 'Country name', data= df, 
            order= df.groupby('Country name')['Perceptions of corruption'].sum().sort_values(ascending= False).head(15).index,
            palette= 'YlOrRd_r')
plt.title('Worst 15 Countries - Perceptions of corruption'.format(i), fontsize= 20)
ax.set_ylabel('')

In [None]:
plt.figure(figsize= (15,60))

l= [ 'Ladder score', 'Logged GDP per capita', 'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']

n= 1

for i in l:
    
    # Histogram
    plt.subplot(7, 2, n)
    sns.histplot(x= i, data= df, kde= True, bins= 30)
    plt.title('Histogram of {}'.format(i), fontsize= 20)
    n+= 1
    
    # Boxplot
    plt.subplot(7, 2, n)
    sns.boxplot(x= i, data= df)
    plt.title('Boxplot of {}'.format(i), fontsize= 20)
    n+= 1

In [None]:
sns.pairplot(df, height= 4, kind= 'reg')

In [None]:
corr= df.corr()

plt.figure(figsize= (15, 7))

sns.heatmap(corr, annot= True, linewidths= 1, cmap= 'GnBu')
plt.title('Correlation Matrix', fontsize= 20)

### By Regional Indicator

In [None]:
plt.figure(figsize= (10, 7))
plt.pie(x= df['Regional indicator'].value_counts(), labels= df['Regional indicator'].value_counts().index,
       explode= [0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.1], autopct= '%.1f%%', shadow= True,
       startangle= 90, pctdistance= .9)

plt.title('Countries / Region', fontsize= 20)

plt.show()

In [None]:
df_region= df.groupby('Regional indicator').mean().round(2)

df_region

In [None]:
plt.figure(figsize= (30, 40))

l= [ 'Ladder score', 'Logged GDP per capita', 'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']

n= 1
for i in l:
    plt.subplot(4, 2, n)
    ax= sns.barplot(x= i, y= df_region.index, data= df_region, 
                order= df_region.sort_values(i, ascending= False).index, palette= 'GnBu_r')
    plt.title(i, fontsize= 20)
    ax.set_ylabel('')
    n+= 1