In [19]:
#Dependencies

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px # for visualization 
import os
import plotly.graph_objects as go
from scipy.stats import linregress
import scipy.stats as sts

In [29]:
#import 2019 data

csvpath_2019 = os.path.join('..','Resources', '2019.csv')
csv_2019 = pd.read_csv(csvpath_2019)
csv_2019.head()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [30]:
#import happy_region df so we can merge and make region column

csvpath_region = os.path.join('..','happy_regions.csv')
csv_happy_regions = pd.read_csv(csvpath_region)

#lets take out just country and region columns
new_region_df = csv_happy_regions[["Country", "Region"]]

#rename column so it matches with the newly structured csv below
new_region_df = new_region_df.rename(columns={"Country": "Country or region"})

#let's merge df with region category with csv 2019
merged_2019 = pd.merge(csv_2019, new_region_df, on="Country or region")
#merged_2019

In [31]:
#write it as csv so we can look at it
#output file for data
output_file = os.path.join("../clean_csvs/2019_csv_regions.csv")
#open output file
with open(output_file, 'w') as datafile:
    merged_2019.to_csv(output_file)

merged_2019

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,Region
0,1,Finland,7.769,1.340,1.587,0.986,0.596,0.153,0.393,Europe
1,2,Denmark,7.600,1.383,1.573,0.996,0.592,0.252,0.410,Europe
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341,Europe
3,4,Iceland,7.494,1.380,1.624,1.026,0.591,0.354,0.118,Europe
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298,Europe
...,...,...,...,...,...,...,...,...,...,...
144,151,Yemen,3.380,0.287,1.163,0.463,0.143,0.108,0.077,Middle East and Northern Africa
145,152,Rwanda,3.334,0.359,0.711,0.614,0.555,0.217,0.411,Sub-Saharan Africa
146,153,Tanzania,3.231,0.476,0.885,0.499,0.417,0.276,0.147,Sub-Saharan Africa
147,154,Afghanistan,3.203,0.350,0.517,0.361,0.000,0.158,0.025,Asia


In [23]:
#happiness score vs. gdp

#establish x and y values
x_values = merged_2019["Score"]
y_values = merged_2019["GDP per capita"]

#px.scatter
fig = px.scatter(merged_2019, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to GDP per Capita Score",
                width= 1000, height =600, trendline="ols") # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='GDP per Capita Score')

#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))


#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.28
intercept:-0.61
R-squared: 0.6343
y=0.28x + -0.61


In [24]:
#happiness score vs social support

#establish x and y values
x_values = merged_2019["Score"]
y_values = merged_2019["Social support"]

#px.scatter
fig = px.scatter(merged_2019, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Social Support Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Social Support Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))


#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.21
intercept:0.09
R-squared: 0.5969
y=0.21x + 0.09


In [28]:
#happiness score vs healthy life expectancy

#establish x and y values
x_values = merged_2019["Score"]
y_values = merged_2019["Healthy life expectancy"]

df = px.data.iris() # iris is a pandas DataFrame

#px.scatter
fig = px.scatter(merged_2019, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Healthy Life Expectancy Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Healthy Life Expectancy')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))


#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.17
intercept:-0.18
R-squared: 0.6077
y=0.17x + -0.18


In [25]:
#happiness score vs freedom to make life choices

#establish x and y values
x_values = merged_2019["Score"]
y_values = merged_2019["Freedom to make life choices"]

#px.scatter
fig = px.scatter(merged_2019, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Freedom to Make Life Choices Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Freedom to Make Life Choices Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))

#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.07
intercept:0.01
R-squared: 0.3115
y=0.07x + 0.01


In [27]:
#happiness score vs generosity

#establish x and y values
x_values = merged_2019["Score"]
y_values = merged_2019["Generosity"]

#px.scatter
fig = px.scatter(merged_2019, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Generosity Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Generosity Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))

#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.01
intercept:0.15
R-squared: 0.0069
y=0.01x + 0.15


In [26]:
#happiness score vs perception of corruption

#establish x and y values
x_values = merged_2019["Score"]
y_values = merged_2019["Perceptions of corruption"]

#px.scatter
fig = px.scatter(merged_2019, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Perception of Corruption Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Perception of Corruption Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))

#linear regression
#use linregress from dependencies to inplement linear regression\
#creating mask because returning nan for this
mask = ~np.isnan(x_values) & ~np.isnan(y_values)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values[mask], y_values[mask])
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.04
intercept:-0.08
R-squared: 0.1685
y=0.04x + -0.08
