In [None]:
!pip install bubbly

# 0. Import the libraries

In [None]:
# for basic operations
import numpy as np 
import pandas as pd 

# for visualizations
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')

# for advanced visualizations
from bubbly.bubbly import bubbleplot 
import plotly.offline as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go

# for algebra
from math import pi

# for providing path
import os
print(os.listdir("../input"))

# 1. Importing the data

In [None]:
# Read the dataset and show the DataFrame
wh = pd.read_csv('../input/world-happiness-report/2020.csv')
wh

# 2. Removing columns

In [None]:
# Removing columns and renaming to make the data more readable

wh = wh.rename(columns = {'Country name':'Country','Regional indicator':'Region','Standard error of ladder score':'Std error','Logged GDP per capita':'lg GDP'
                         ,'Healthy life expectancy':'Life expectancy','Freedom to make life choices':'Life choices','Perceptions of corruption':'Corruption'
                         ,'Ladder score':'Score'})
wh = wh.drop(columns = {'Ladder score in Dystopia','Explained by: Log GDP per capita','Explained by: Social support','Explained by: Healthy life expectancy'
                       ,'Explained by: Freedom to make life choices','Explained by: Generosity','Explained by: Perceptions of corruption','Dystopia + residual'})
wh.head()

In [None]:
# Describe the dataset

wh.describe()

# 3. Global Rank

In [None]:
# Making a Global Rank
# There are 2 options: 1. A Rank from 1 to 153 beeing 1 the Country with the higher Score.
#                      2. A Rank from 153 to 1 beeing 153 the Country with the higher Score.
wh['Global Rank'] = wh['Score'].rank(method = 'max')
wh

In [None]:
# Our analysis is focused in 'Latin America and Caribbean' Region

wh['Region'].unique()

In [None]:
# Filter the Latin America and Caribbean and sorting by Ranking

whlat = wh.loc[wh['Region']=='Latin America and Caribbean',:]
whlat

In [None]:
# Making a Global Rank
# There are 2 options: 1. A Rank from 1 to 21 beeing 1 the Country with the higher Score.
#                      2. A Rank from 21 to 1 beeing 21 the Country with the higher Score.

whlat['Latin Rank'] = whlat['Score'].rank(method = 'max')
whlat

# 4. Visualize the data
### 4.1 Global Ranking vs GDP

In [None]:
# Plotting Global Ranking vs GDP for the Latin American and Caribbean Countries

import warnings
warnings.filterwarnings('ignore')
figure = bubbleplot(dataset = whlat, x_column = 'lg GDP', y_column = 'Global Rank',
                   bubble_column = 'Global Rank', size_column='Global Rank', color_column = 'Country',
                   x_title = 'GDP per capita', y_title = 'Latin Rank',x_logscale=True, y_logscale=False,title = 'Global Ranking vs GDP', scale_bubble=1, height=800)
iplot(figure, config = {'scrollzoom':True})

### 4.2 Ranking vs Life Expectancy 

In [None]:
# Plotting Global Ranking vs GDP for the Latin American and Caribbean Countries

figure = bubbleplot(dataset = whlat,x_column = 'Life expectancy', y_column = 'Latin Rank',
                   bubble_column = 'Global Rank', size_column='Global Rank', color_column = 'Country',
                   x_title = 'Life expectancy', y_title = 'Latin America Rank',title='Ranking vs Life Expectancy',x_logscale=False, y_logscale=False, scale_bubble=1, height=800,width=1200)
iplot(figure, config = {'scrollzoom':True})

### 4.3 Spider charts

In [None]:
# Setting the parameters to make a Spider chart, we divide to the higher number of each column, to obtain numbers from 0 to 1 and make te plot easy to read

whpct = {
    'Country':whlat['Country'],
    'GDP':whlat['lg GDP']/wh['lg GDP'].max(),
    'Score':whlat['Score']/wh['Score'].max(),
    'Life expectancy':whlat['Life expectancy']/wh['Life expectancy'].max(),
    'Corruption':whlat['Corruption']/wh['Corruption'].max(),
    'Ranking':whlat['Global Rank']/wh['Global Rank'].max()}

# Convert 'whpct' to DataFrame

whpct = pd.DataFrame(whpct)

whpct

In [None]:
plt.rcParams['figure.figsize'] = (20, 20)
 
# Set data
df = pd.DataFrame({
'group': ['A','B'],
'Score': [0.76, 0],
'GDP': [0.85, 0],
'Life expectancy': [0.89, 0],
'Corruption': [0.9, 0],
'Ranking':[0.64,0],
})


# number of variable
categories=list(df)[1:]
N = len(categories)

    
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
values=df.loc[0].drop('group').values.flatten().tolist()
values += values[:1]
values
 
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]

# Initialise the spider plot
ax = plt.subplot(221, polar=True)
 
# Draw one axe per variable + add labels
plt.xticks(angles[:-1], categories, color='grey', size=14)
 
# Draw ylabels
ax.set_rlabel_position(0)
plt.yticks([0,0.40,0.50,0.60,0.70,0.80,0.90,1], ['0','0.4','0.5','0.6','0.7','0.8','0.9','1'], color="grey",size=10)
plt.ylim(0,1.2)

# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')

# Fill area
ax.fill(angles, values, 'b', alpha=0.1)

# Go through labels and adjust alignment based on where
# it is in the circle.

for label, angle in zip(ax.get_xticklabels(), angles):
     label.set_horizontalalignment('center')
    
# Title
ax.set_title('Argentina', fontsize = 25)

#Plot 2

# Set data22
df2 = pd.DataFrame({
'group': ['A','B'],
'Score': [0.81, 0],
'GDP': [0.83, 0],
'Life expectancy': [0.86, 0],
'Corruption': [0.82, 0],
'Ranking':[0.79,0],
})

# number of variable
categories2=list(df2)[1:]
N2 = len(categories2)
    
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
values=df2.loc[0].drop('group').values.flatten().tolist()
values += values[:1]
values
 
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(N2) * 2 * pi for n in range(N)]
angles += angles[:1]

# Initialise the spider plot
ax = plt.subplot(222, polar=True)
 
# Draw one axe per variable + add labels
plt.xticks(angles[:-1], categories, color='grey', size=14)
 
# Draw ylabels
ax.set_rlabel_position(0)
plt.yticks([0,0.40,0.50,0.60,0.70,0.80,0.90,1], ['0','0.4','0.5','0.6','0.7','0.8','0.9','1'], color="grey",size=10)
plt.ylim(0,1.2)

# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')

# Fill area
ax.fill(angles, values, 'y', alpha=0.1)

# Go through labels and adjust alignment based on where
# it is in the circle.

for label, angle in zip(ax.get_xticklabels(), angles):
     label.set_horizontalalignment('center')
    
# Title
ax.set_title('Brasil', fontsize = 25)

#Plot 3

# Set data22
df3 = pd.DataFrame({
'group': ['A','B'],
'Score': [0.47, 0],
'GDP': [0.64, 0],
'Life expectancy': [0.72, 0],
'Corruption': [0.73, 0],
'Ranking':[0.07,0],
})

# number of variable
categories=list(df3)[1:]
N = len(categories)
    
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
values=df3.loc[0].drop('group').values.flatten().tolist()
values += values[:1]
values
 
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]

# Initialise the spider plot
ax = plt.subplot(223, polar=True)
 
# Draw one axe per variable + add labels
plt.xticks(angles[:-1], categories, color='grey', size=14)
 
# Draw ylabels
ax.set_rlabel_position(0)
plt.yticks([0,0.40,0.50,0.60,0.70,0.80,0.90,1], ['0','0.4','0.5','0.6','0.7','0.8','0.9','1'], color="grey",size=10)
plt.ylim(0,1.2)

# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')

# Fill area
ax.fill(angles, values, 'r', alpha=0.1)

# Go through labels and adjust alignment based on where
# it is in the circle.

for label, angle in zip(ax.get_xticklabels(), angles):
     label.set_horizontalalignment('center')
    
# Title
ax.set_title('Haiti', fontsize = 25)

#Plot 4

# Set data 4
df4 = pd.DataFrame({
'group': ['A','B'],
'Score': [0.91, 0],
'GDP': [0.84, 0],
'Life expectancy': [0.92, 0],
'Corruption': [0.84, 0],
'Ranking':[0.90,0],
})

# number of variable
categories=list(df4)[1:]
N = len(categories)
    
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
values=df4.loc[0].drop('group').values.flatten().tolist()
values += values[:1]
values
 
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]

# Initialise the spider plot
ax = plt.subplot(224, polar=True)
 
# Draw one axe per variable + add labels
plt.xticks(angles[:-1], categories, color='grey', size=14)
 
# Draw ylabels
ax.set_rlabel_position(0)
plt.yticks([0,0.40,0.50,0.60,0.70,0.80,0.90,1], ['0','0.4','0.5','0.6','0.7','0.8','0.9','1'], color="grey",size=10)
plt.ylim(0,1.2)

# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')

# Fill area
ax.fill(angles, values, 'g', alpha=0.1)

# Go through labels and adjust alignment based on where
# it is in the circle.

for label, angle in zip(ax.get_xticklabels(), angles):
     label.set_horizontalalignment('center')
    
# Title
ax.set_title('Costa Rica', fontsize = 25)


# Show the graph
plt.show()

### 4.4 Region vs Score - Box plot  

In [None]:
# Calculate the score mean by Region
data = [wh.groupby('Region'),wh.groupby(['Region'])['Score'].mean()]
data=pd.DataFrame(data)

In [None]:
ax = sns.boxplot(orient = 'h',x=wh['Score'],y=wh['Region'], palette='Set3')
plt.title(label='Score by Region',loc='center',fontsize=40, pad = 40)

In [None]:
# plot Happinnes in latin america region
ax = plt.figure(figsize=(20,10))
ax = plt.subplot(221)
ax = sns.boxplot(orient = 'h',x=wh['Score'],
                data= wh.loc[wh['Region']=='Latin America and Caribbean',:], color='tab:green')
ax.set_ylabel('Latin America', fontsize=15)
plt.title(label='Happiness',loc='left',fontsize=20)

# plot Life expectancy in latin america region
ax = plt.subplot(222)
ax = sns.boxplot(orient = 'h',x=wh['Life expectancy'],
                data= wh.loc[wh['Region']=='Latin America and Caribbean',:], color='tab:purple')
ax.set_ylabel('Latin America', fontsize=15)
plt.title(label='Life expectancy',loc='left',fontsize=20)

# plot GDP in latin america region
ax = plt.subplot(223)
ax = sns.boxplot(orient = 'h',x=wh['lg GDP'],
                data= wh.loc[wh['Region']=='Latin America and Caribbean',:], color='tab:pink')
ax.set_ylabel('Latin America', fontsize=15)
plt.title(label='GDP',loc='left',fontsize=20)

# plot Social support in latin america region
ax = plt.subplot(224)
ax = sns.boxplot(orient = 'h',x=wh['Social support'],
                data= wh.loc[wh['Region']=='Latin America and Caribbean',:], color='tab:blue')
ax.set_ylabel('Latin America', fontsize=15)
plt.title(label='Social support',loc='left',fontsize=20)

### 4.5 Correlation World Happiness
This chart is really powerfull it give us an idea of which variables are more influent for the happiness around the world

In [None]:
# Select the columns to analyze
whcorr = wh.iloc[:,np.r_[0:3,6:13]]

In [None]:
# Increase the size of the heatmap.
plt.figure(figsize=(20, 7))
# Store heatmap object in a variable to easily access it when you want to include more features (such as title).
# Set the range of values to be displayed on the colormap from -1 to 1, and set the annotation to True to display the correlation values on the heatmap.
heatmap = sns.heatmap(whcorr.corr(), vmin=-1, vmax=1, annot=True)
# Give a title to the heatmap. Pad defines the distance of the title from the top of the heatmap.
heatmap.set_title('Correlation Heatmap', fontdict={'fontsize':24}, pad=25)

*If we analize the first row for example we see which variables affects more the world happiness, this variables are:*
* 1-GDP
* 2-Life expectancy
* 3-Social support
* 4-Life choices
* 5-Corruption

*Also we see the relationships between other variables like Life expectancy - GDP, this two variables have a strong correlation*

# 5. Conclusion
*We analyze from diferents angles what affects World Happiness around the world.*

*This is a EDA report and It was made by Maximiliano Pona and Matias Ferraro.*

*Social: https://www.linkedin.com/in/maximilianoezequielpona/*

*Social: https://www.linkedin.com/in/matiasnferraro/*

**Thanks for reading**