# This is a basic project using the plotly library to explore the world happiness dataset. 

**Plotly is used to create interactive charts and graphs.**

In [None]:
#import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import sklearn
import numpy as np

In [None]:
#csv imports and table manipulations

#read in and setup world happiness table 2005-2020
data = pd.read_csv('/kaggle/input/world-happiness-report-2021/world-happiness-report.csv')
data.rename(columns={'Country name': 'Country', 
                    'Life Ladder': 'Ladder score', 
                    'Log GDP per capita': 'Logged GDP per capita', 
                    'Healthy life expectancy at birth': 'Healthy life expectancy'}, inplace=True)
data = data[['Country','year', 'Ladder score', 'Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']]

#read in and set up world happiness dataset 2021
data_2021 = pd.read_csv('/kaggle/input/world-happiness-report-2021/world-happiness-report-2021.csv')
data_2021['year'] = 2021
data_2021.rename(columns={'Country name': 'Country'}, inplace=True)
data_2021 = data_2021[['Country', 'year', 'Regional indicator', 'Ladder score', 'Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']]

#add 2021 data to the end of the 'data' table
frames = [data, data_2021[['Country', 'year', 'Ladder score', 'Logged GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']]]
data = pd.concat(frames)

#2020 pop data
pop = pd.read_csv('../input/population-by-country-2020/population_by_country_2020.csv')
pop.rename(columns={'Country (or dependency)': 'Country'}, inplace=True)

#2005-2020 pop data
data_pop = pd.read_csv('../input/population-by-country-19602020/world_pop.csv', skiprows=4)
data_pop = data_pop[['Country Name', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020']]
data_pop.rename(columns={'Country Name': 'Country'}, inplace=True)
data_pop = data_pop.melt(['Country'], var_name='year', value_name='Population')
data_pop['year'] = data_pop['year'].astype(int)
data['year'] = data['year'].astype(int)

#add in region to data table
data=pd.merge(
    data, 
    data_2021[['Country','Regional indicator']], 
    on='Country',
    how='left')

#add in population data to both tables
data=pd.merge(data, data_pop, on=["Country", "year"])

data_2021=pd.merge(
    data_2021, 
    pop[['Country','Population (2020)']], 
    on='Country',
    how='left')

# add discrete categories for happiness
# not at all happy 0-3.000
# not happy 3.001-4.500
# somewhat happy 4.501-6.000
# happy 6.001-7.500
# the happiest 7.501-10

categories = pd.cut(data['Ladder score'],bins=[0,3,4.5,6,7.5,10],labels=['Not at all happy','Not happy','Somewhat happy','Happy', 'The happiest'])
data.insert(3, 'Happiness Label', categories)

categories = pd.cut(data_2021['Ladder score'],bins=[0,3,4.5,6,7.5,10],labels=['Not at all happy','Not happy','Somewhat happy','Happy', 'The happiest'])
data_2021.insert(4, 'Happiness Label', categories)


# 14 countries in the data table do not have a regional indicator in the data table - they must be added in manually
data.loc[405, 'Regional indicator']='Latin America and Caribbean' #Cuba
data.loc[1553:1557, 'Regional indicator']='Latin America and Caribbean'#Trinidad and Tobago
data.loc[33:36, 'Regional indicator']='Sub-Saharan Africa' #Angola
data.loc[161:162, 'Regional indicator']='Latin America and Caribbean' #Belize
data.loc[175:177, 'Regional indicator']='South Asia' #Bhutan
data.loc[308:312, 'Regional indicator']='Sub-Saharan Africa' #'Central African Republic'
data.loc[446:449, 'Regional indicator']='Sub-Saharan Africa' #Djibouti
data.loc[1418:1420, 'Regional indicator']='Sub-Saharan Africa' #Somalia
data.loc[1468:1472, 'Regional indicator']='Sub-Saharan Africa' #Sudan
data.loc[1436:1439, 'Regional indicator']='Sub-Saharan Africa' #South Sudan
data.loc[637, 'Regional indicator']='Latin America and Caribbean' #Guyana
data.loc[1210, 'Regional indicator']= 'Middle East and North Africa'#Oman
data.loc[1308:1312, 'Regional indicator']= 'Middle East and North Africa'#Qatar
data.loc[1473, 'Regional indicator']= 'Latin America and Caribbean'#Suriname



In [None]:
# Create a correlation table with pandas
data2021_corr = data_2021.corr(method='pearson')

# Set up the correlation plot
fig = go.Figure(go.Heatmap(
    # Set the appropriate x, y and z values
    z=data2021_corr.values.tolist(),
    x=data2021_corr.columns,
    y=data2021_corr.columns,
    # Set the color scale,
    colorscale='darkmint', 
    # Set min and max values
    zmin=-1, zmax=1))

fig.update_layout(title_text = "Correlation Plot")
fig.show()

The correlation plot shows that GDP has the highest correlation with ladder score, closely followed by healthy life expectancy then social support

In [None]:
fig = px.bar(data_frame = data_2021,
            x='Regional indicator', y='Ladder score',
            color='Regional indicator')

my_buttons = [{
    'label': "Bar Plot", 
    'method': 'update',
    'args': [{'type':'bar'}]},
    {'label': 'Scatterplot',
    'method':'update',
    'args':[{"type":'scatter', 'mode':'markers'}]}]

fig.update_layout({'updatemenus':[{'type':'buttons',
                                  'direction':'down',
                                  'x':1.3, 'y':0.2,
                                  'showactive':True,
                                  'active':0,
                                  'buttons':my_buttons}]},
                 title_text = "Use of Buttons to Switch Between a Bar Chart and Scatterplot <br><sup>Ladder Score by Region</sup>")

fig.show()

This visualization uses 2 buttons to switch between type of chart shown.

This is demonstrating the distribution of ladder scores by region using either a bar plot or scatterplot.

In [None]:
fig = px.bar(data_frame=data_2021, 
             x='Country', y='Ladder score', title='Interactive Bar Plot of Ladder Scores by Country and Region <br><sup>Highlighting Outliers by Region</sup>',
             color='Regional indicator',
             color_discrete_map={
                 'Western Europe': 'rgb(165, 200, 228)', #blue
                 'North America and ANZ': 'rgb(192, 236, 204)', #green
                 'Middle East and North Africa': 'rgb(249, 240, 193)', #yellow
                 'Latin America and Caribbean': 'rgb(244, 205, 166)', #orange
                 'Central and Eastern Europe':  'rgb(246, 168, 166)', #red
                 'East Asia': 'rgb(216, 195, 224)', #purple
                 'Southeast Asia': 'rgb(164, 220, 252)', #light blue
                 'Commonwealth of Independent States': 'rgb(249, 227, 235)', #pink
                 'Sub-Saharan Africa': 'rgb(204, 183, 229)', #darker purple
                 'South Asia': 'rgb(220, 210, 200)'}, #brown
             hover_data=['Ladder score', 'Logged GDP per capita'],
             hover_name='Country'
             )

costarica_annotation = {'x': 'Costa Rica', 'y': 7.08, 'showarrow': True, 'arrowhead': 4,
                    'font': {'color': 'black', 'size': 10}, 'text': 'Costa Rica'}

czech_annotation = {'x': 'Czech Republic', 'y':6.9, 'showarrow': True, 'arrowhead': 4,
                    'font': {'color': 'black', 'size': 10}, 'text': 'Czech Republic'
}

taiwan_annotation = {'x': 'Taiwan Province of China', 'y':6.584, 'showarrow': True, 'arrowhead': 4,
                    'font': {'color': 'black', 'size': 10}, 'text': 'Taiwan'}

mauritius_annotation = {'x': 'Mauritius', 'y':6.049, 'showarrow': True, 'arrowhead': 4,
                    'font': {'color': 'black', 'size': 10}, 'text': 'Mauritius'}

#singapore = {'x': , 'y':}

fig.update_layout({'annotations': [costarica_annotation, czech_annotation, taiwan_annotation, mauritius_annotation]})


# Show the plot
fig.show()

The countries Costa Rica, Czech Republic, Taiwan, and Mauritius are much happier countries than their counterparts in the same region.

In [None]:
fig=make_subplots(rows=4, cols=1, shared_xaxes=True, vertical_spacing=0.02)

fig.add_trace(go.Box(x=data_2021["Regional indicator"], y=data_2021["Ladder score"], name='Ladder Score',
                    hovertemplate='<b>%{text}</b>',
                    text = data_2021['Country']), row=1, col=1)
fig.add_trace(go.Box(x=data_2021["Regional indicator"], y=data_2021["Logged GDP per capita"], name='Logged GDP per capita',
                    hovertemplate='<b>%{text}</b>',
                    text = data_2021['Country']), row=2, col=1)
fig.add_trace(go.Box(x=data_2021["Regional indicator"], y=data_2021["Healthy life expectancy"], name='Healthy life expectancy',
                    hovertemplate='<b>%{text}</b>',
                    text = data_2021['Country']), row=3, col=1)
fig.add_trace(go.Box(x=data_2021["Regional indicator"], y=data_2021["Social support"], name='Social support',
                    hovertemplate='<b>%{text}</b>',
                    text = data_2021['Country']), row=4, col=1)

fig.update_layout(height=1000, width=950, title_text="Distribution of Ladder Scores and Features <br><sup>Highlighting Outliers by Region</sup>")
fig.show()

These box plots shows which countries are outliers for certain features in their region.

Haiti consistently ranks really low compared to other countries in Latin America and the Caribbean.

Luxembourg has an unusually high GDP and Singapore has unusually high life expectancy.

In [None]:
data=data.sort_values('year')

px.scatter(data, x="Logged GDP per capita", y="Ladder score", animation_frame='year', animation_group="Country",
           size="Population", hover_name="Country",
           log_x=True, size_max=55, range_x=[6.5,12], range_y=[2,9],
          color='Regional indicator',
          title = 'Animation of Ladder Scores and GDP by Country from 2005-2021')

In [None]:
fig=go.Figure()
fig.add_trace(go.Bar(x=data_2021['Country'],
                 y=data_2021['Ladder score'],
                 name='Ladder Scores'))
#fig.add_trace(go.Scatter(x=data_2021['Country'],
#                        y=data_2021['Healthy life expectancy']/5,
#                        name='Healthy life expectancy',
#                        mode='lines+markers'))
fig.add_trace(go.Scatter(x=data_2021['Country'],
                        y=data_2021['Logged GDP per capita'],
                        name='Logged GDP per capita',
                        mode='lines+markers'))
fig.update_layout(title_text = 'Ladder Scores vs GDP by Country')

fig.show()

This shows where GDP per capita does not contribute to a country's happiness and which countries have a high GDP yet ranked very low in happiness. This is accomplished by using a bar chart and a line chart on top of one another. 

Also interesting to note, in the last 100 countries or so, it seems GDP doesn't make much of a difference at all in terms of happiness ranking. GDP is all over the place in the latter half of the chart. 