In [1]:
#Dependencies

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px # for visualization 
import os
import plotly.graph_objects as go
from scipy.stats import linregress
import scipy.stats as sts

In [2]:
#import 2018 data

csvpath_2018 = os.path.join('..','Resources', '2018.csv')
csv_2018 = pd.read_csv(csvpath_2018)
csv_2018.head(15)
csv_2018.dropna(how="all")

#import happy_region df so we can merge and make region column

csvpath_region = os.path.join('..','happy_regions.csv')
csv_happy_regions = pd.read_csv(csvpath_region)

#lets take out just country and region columns
new_region_df = csv_happy_regions[["Country", "Region"]]

#rename column so it matches with the newly structured csv below
new_region_df = new_region_df.rename(columns={"Country": "Country or region"})

#let's merge df with region category with csv 2018
merged_2018 = pd.merge(csv_2018, new_region_df, on="Country or region")
#merged_2018



In [13]:
#write it as csv so we can look at it
#output file for data
output_file = os.path.join("../clean_csvs/2018_csv_regions.csv")
#open output file
with open(output_file, 'w') as datafile:
    merged_2018.to_csv(output_file)

merged_2018.head(10)

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,Region
0,1,Finland,7.632,1.305,1.592,0.874,0.681,0.202,0.393,Europe
1,2,Norway,7.594,1.456,1.582,0.861,0.686,0.286,0.34,Europe
2,3,Denmark,7.555,1.351,1.59,0.868,0.683,0.284,0.408,Europe
3,4,Iceland,7.495,1.343,1.644,0.914,0.677,0.353,0.138,Europe
4,5,Switzerland,7.487,1.42,1.549,0.927,0.66,0.256,0.357,Europe
5,6,Netherlands,7.441,1.361,1.488,0.878,0.638,0.333,0.295,Europe
6,7,Canada,7.328,1.33,1.532,0.896,0.653,0.321,0.291,North America
7,8,New Zealand,7.324,1.268,1.601,0.876,0.669,0.365,0.389,Australia and New Zealand
8,9,Sweden,7.314,1.355,1.501,0.913,0.659,0.285,0.383,Europe
9,10,Australia,7.272,1.34,1.573,0.91,0.647,0.361,0.302,Australia and New Zealand


In [14]:
#happiness score vs. gdp

#establish x and y values
x_values = merged_2018["Score"]
y_values = merged_2018["GDP per capita"]

#px.scatter
fig = px.scatter(merged_2018, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to GDP per Capita Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='GDP per Capita Score')

#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))


#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.28
intercept:-0.61
R-squared: 0.6572
y=0.28x + -0.61


In [15]:
#happiness score vs social support

#establish x and y values
x_values = merged_2018["Score"]
y_values = merged_2018["Social support"]

#px.scatter
fig = px.scatter(merged_2018, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Social Support Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Social Support Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))


#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.2
intercept:0.14
R-squared: 0.5587
y=0.2x + 0.14


In [12]:
#happiness score vs healthy life expectancy

#establish x and y values
x_values = merged_2018["Score"]
y_values = merged_2018["Healthy life expectancy"]

df = px.data.iris() # iris is a pandas DataFrame

#px.scatter
fig = px.scatter(merged_2018, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Healthy Life Expectancy",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Healthy Life Expectancy')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))


#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.17
intercept:-0.31
R-squared: 0.6115
y=0.17x + -0.31


In [16]:
#happiness score vs freedom to make life choices

#establish x and y values
x_values = merged_2018["Score"]
y_values = merged_2018["Freedom to make life choices"]

#px.scatter
fig = px.scatter(merged_2018, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Freedom to Make Life Choices Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Freedom to Make Life Choices Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))

#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.08
intercept:0.03
R-squared: 0.2913
y=0.08x + 0.03


In [17]:
#happiness score vs generosity

#establish x and y values
x_values = merged_2018["Score"]
y_values = merged_2018["Generosity"]

#px.scatter
fig = px.scatter(merged_2018, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Generosity Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Generosity Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))

#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.01
intercept:0.11
R-squared: 0.0194
y=0.01x + 0.11


In [18]:
#happiness score vs perception of corruption

#establish x and y values
x_values = merged_2018["Score"]
y_values = merged_2018["Perceptions of corruption"]

#px.scatter
fig = px.scatter(merged_2018, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Perception of Corruption Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Perception of Corruption Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))

#linear regression
#use linregress from dependencies to inplement linear regression\
#creating mask because returning nan for this
mask = ~np.isnan(x_values) & ~np.isnan(y_values)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values[mask], y_values[mask])
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.04
intercept:-0.09
R-squared: nan
y=0.04x + -0.09
