In [2]:
#Dependencies

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px # for visualization 
import os
import plotly.graph_objects as go
from scipy.stats import linregress
import scipy.stats as sts

In [3]:
#bring in csv file to work with

csvpath_2016 = os.path.join('..','Resources', '2016.csv')
csv_2016 = pd.read_csv(csvpath_2016)


In [4]:
# Restructure / Rename
# datasets from 2015 - 2017.
#added region as as column************

# Convert / fix 2015 Data

csv_2016_restructured = csv_2016[["Happiness Rank",
                                 "Country",
                                  "Region",
                                  "Happiness Score",
                                  "Economy (GDP per Capita)",
                                  "Family",
                                  "Health (Life Expectancy)",
                                  "Freedom",
                                  "Generosity",
                                  "Trust (Government Corruption)"]]

csv_2016_restructured = csv_2016_restructured.rename(columns={"Happiness Rank": "Overall rank",
                                      "Country": "Country",
                                      "Region": "Region",                        
                                      "Happiness Score": "Score",
                                      "Economy (GDP per Capita)": "GDP per capita",
                                      "Family": "Social support",
                                      "Health (Life Expectancy)": "Healthy life expectancy",
                                      "Freedom": "Freedom to make life choices",
                                      "Generosity": "Generosity",
                                      "Trust (Government Corruption)": "Perceptions of corruption"})
csv_2016_restructured.head(15)

Unnamed: 0,Overall rank,Country,Region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Denmark,Western Europe,7.526,1.44178,1.16374,0.79504,0.57941,0.36171,0.44453
1,2,Switzerland,Western Europe,7.509,1.52733,1.14524,0.86303,0.58557,0.28083,0.41203
2,3,Iceland,Western Europe,7.501,1.42666,1.18326,0.86733,0.56624,0.47678,0.14975
3,4,Norway,Western Europe,7.498,1.57744,1.1269,0.79579,0.59609,0.37895,0.35776
4,5,Finland,Western Europe,7.413,1.40598,1.13464,0.81091,0.57104,0.25492,0.41004
5,6,Canada,North America,7.404,1.44015,1.0961,0.8276,0.5737,0.44834,0.31329
6,7,Netherlands,Western Europe,7.339,1.46468,1.02912,0.81231,0.55211,0.47416,0.29927
7,8,New Zealand,Australia and New Zealand,7.334,1.36066,1.17278,0.83096,0.58147,0.49401,0.41904
8,9,Australia,Australia and New Zealand,7.313,1.44443,1.10476,0.8512,0.56837,0.47407,0.32331
9,10,Sweden,Western Europe,7.291,1.45181,1.08764,0.83121,0.58218,0.38254,0.40867


In [5]:
#see how many regions there are in dataset
#csv_2015_restructured["Region"].value_counts()
  
# renaming regions in dataset to be able to have less groups for better visualization
#Making "Asia" one large group instead of three
csv_2016_restructured["Region"]= csv_2016_restructured["Region"].replace("Southeastern Asia", "Asia") 
csv_2016_restructured["Region"]= csv_2016_restructured["Region"].replace("Southern Asia", "Asia")
csv_2016_restructured["Region"]= csv_2016_restructured["Region"].replace("Eastern Asia", "Asia")

#making Europe into one group instead of two separate ones
csv_2016_restructured["Region"]= csv_2016_restructured["Region"].replace("Central and Eastern Europe", "Europe")
csv_2016_restructured["Region"]= csv_2016_restructured["Region"].replace("Western Europe", "Europe")

#counting to make sure I did it correctly

csv_2016_restructured["Region"].value_counts()
  

Europe                             50
Sub-Saharan Africa                 38
Latin America and Caribbean        24
Asia                               22
Middle East and Northern Africa    19
North America                       2
Australia and New Zealand           2
Name: Region, dtype: int64

In [13]:
#happiness score vs. gdp

#establish x and y values
x_values = csv_2016_restructured["Score"]
y_values = csv_2016_restructured["GDP per capita"]

#px.scatter
fig = px.scatter(csv_2016_restructured, x_values, y_values, color="Region", hover_name="Country",
                range_x=[0,8], title="Happiness Score compared to GDP per Capita Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='GDP per Capita')

#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))


#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()


slope:0.29
intercept:-0.58
R-squared: 0.6246
y=0.29x + -0.58


In [12]:
#happiness score vs social support

#establish x and y values
x_values = csv_2016_restructured["Score"]
y_values = csv_2016_restructured["Social support"]

#px.scatter
fig = px.scatter(csv_2016_restructured, x_values, y_values, color="Region", hover_name="Country",
                range_x=[0,8], title="Happiness Score compared to Social Score Support ",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Social Support Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))


#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.17
intercept:-0.14
R-squared: 0.5465
y=0.17x + -0.14


In [14]:
#happiness score vs healthy life expectancy

#establish x and y values
x_values = csv_2016_restructured["Score"]
y_values = csv_2016_restructured["Healthy life expectancy"]

df = px.data.iris() # iris is a pandas DataFrame

#px.scatter
fig = px.scatter(csv_2016_restructured, x_values, y_values, color="Region", hover_name="Country",
                range_x=[0,8], title="Happiness Score compared to Healthy Life Expectancy Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Healthy Life Expectancy')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))


#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.15
intercept:-0.27
R-squared: 0.5858
y=0.15x + -0.27


In [15]:
#happiness score vs freedom to make life choices

#establish x and y values
x_values = csv_2016_restructured["Score"]
y_values = csv_2016_restructured["Freedom to make life choices"]

#px.scatter
fig = px.scatter(csv_2016_restructured, x_values, y_values, color="Region", hover_name="Country",
                range_x=[0,8], title="Happiness Score compared to Freedom to Make Life Choices Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Freedom to Make Life Choices Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))

#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.07
intercept:-0.02
R-squared: 0.3213
y=0.07x + -0.02


In [16]:
#happiness score vs generosity

#establish x and y values
x_values = csv_2016_restructured["Score"]
y_values = csv_2016_restructured["Generosity"]

#px.scatter
fig = px.scatter(csv_2016_restructured, x_values, y_values, color="Region", hover_name="Country",
                range_x=[0,8], title="Happiness Score compared to Generosity Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Generosity Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))

#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.02
intercept:0.14
R-squared: 0.0246
y=0.02x + 0.14


In [17]:
#happiness score vs perception of corruption

#establish x and y values
x_values = csv_2016_restructured["Score"]
y_values = csv_2016_restructured["Perceptions of corruption"]

#px.scatter
fig = px.scatter(csv_2016_restructured, x_values, y_values, color="Region", hover_name="Country",
                range_x=[0,8], title="Happiness Score compared to Perception of Corruption Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Perception of Corruption Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))

#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()


slope:0.04
intercept:-0.07
R-squared: 0.1616
y=0.04x + -0.07
