In [1]:
#Dependencies

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px # for visualization 
import os
import plotly.graph_objects as go
from scipy.stats import linregress
import scipy.stats as sts

In [2]:
#bring in csv file to work with

csvpath_2017 = os.path.join('..','Resources', '2017.csv')
csv_2017 = pd.read_csv(csvpath_2017)

In [3]:
#import happy_region df so we can merge and make region column

csvpath_region = os.path.join('..','happy_regions.csv')
csv_happy_regions = pd.read_csv(csvpath_region)

#lets take out just country and region columns
new_region_df = csv_happy_regions[["Country", "Region"]]

#rename column so it matches with the newly structured csv below
new_region_df = new_region_df.rename(columns={"Country": "Country or region"})

#check to see if it worked
#new_region_df

In [4]:
# Restructure / Rename
# datasets from 2015 - 2017.
#added region as as column************

# Convert / fix 2017 Data

csv_2017_restructured = csv_2017[["Happiness.Rank",
                                  "Country",
                                  "Happiness.Score",
                                  "Economy..GDP.per.Capita.",
                                  "Family",
                                  "Health..Life.Expectancy.",
                                  "Freedom",
                                  "Generosity",
                                  "Trust..Government.Corruption."]]

csv_2017_restructured = csv_2017_restructured.rename(columns={"Happiness.Rank": "Overall rank",
                                      "Country": "Country or region",
                                      "Happiness.Score": "Score",
                                      "Economy..GDP.per.Capita.": "GDP per capita",
                                      "Family": "Social support",
                                      "Health..Life.Expectancy.": "Healthy life expectancy",
                                      "Freedom": "Freedom to make life choices",
                                      "Generosity": "Generosity",
                                      "Trust..Government.Corruption.": "Perceptions of corruption"})
csv_2017_restructured.head()

#let's merge df with region category with csv 2017
merged_2017 = pd.merge(csv_2017_restructured, new_region_df, on="Country or region")
#merged_2017


In [5]:
#write it as csv so we can look at it
#output file for data
output_file = os.path.join("clean_csvs/2017_csv_regions.csv")
#open output file
with open(output_file, 'w') as datafile:
    merged_2017.to_csv(output_file)

merged_2017.head(10)

FileNotFoundError: [Errno 2] No such file or directory: 'clean_csvs/2017_csv_regions.csv'

In [12]:
#happiness score vs. gdp

#establish x and y values
x_values = merged_2017["Score"]
y_values = merged_2017["GDP per capita"]

#px.scatter
fig = px.scatter(merged_2017, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to GDP per Capita Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='GDP per Capita')

#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))


#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.3
intercept:-0.63
R-squared: 0.6842
y=0.3x + -0.63


In [13]:
#happiness score vs social support

#establish x and y values
x_values = merged_2017["Score"]
y_values = merged_2017["Social support"]

#px.scatter
fig = px.scatter(merged_2017, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Social Support Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Social Support Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))


#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.19
intercept:0.18
R-squared: 0.5693
y=0.19x + 0.18


In [14]:
#happiness score vs healthy life expectancy

#establish x and y values
x_values = merged_2017["Score"]
y_values = merged_2017["Healthy life expectancy"]

df = px.data.iris() # iris is a pandas DataFrame

#px.scatter
fig = px.scatter(merged_2017, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Healthy Life Expectancy Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Healthy Life Expectancy')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))


#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.16
intercept:-0.31
R-squared: 0.6273
y=0.16x + -0.31


In [15]:
#happiness score vs freedom to make life choices

#establish x and y values
x_values = merged_2017["Score"]
y_values = merged_2017["Freedom to make life choices"]

#px.scatter
fig = px.scatter(merged_2017, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Freedom to Make Life Choices Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Freedom to Make Life Choices Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))

#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.08
intercept:0.0
R-squared: 0.3314
y=0.08x + 0.0


In [16]:
#happiness score vs generosity

#establish x and y values
x_values = merged_2017["Score"]
y_values = merged_2017["Generosity"]

#px.scatter
fig = px.scatter(merged_2017, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Generosity Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Generosity Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))

#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.02
intercept:0.15
R-squared: 0.0241
y=0.02x + 0.15


In [17]:
#happiness score vs perception of corruption

#establish x and y values
x_values = merged_2017["Score"]
y_values = merged_2017["Perceptions of corruption"]

#px.scatter
fig = px.scatter(merged_2017, x_values, y_values, color="Region", hover_name="Country or region",
                range_x=[0,8], title="Happiness Score compared to Perception of Corruption Score",
                width= 1000, height =600) # use trendline="ols" to see all lin reg equations

#set axes labels
fig.update_xaxes(title_text='Happiness Score')
fig.update_yaxes(title_text='Perception of Corruption Score')


#formatting
#marker size and colors
fig.update_traces(marker=dict(size=10,
                              line=dict(width=1, 
                                        color='DarkSlateGrey')))

#linear regression
#use linregress from dependencies to inplement linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

#print out all variables calculated above (rvalue, pvalue...)
print(f"slope:{round(slope, 2)}")
print(f"intercept:{round(intercept,2)}")
res = sts.linregress(x_values, y_values)
print(f"R-squared: {res.rvalue**2:.4f}")


#print out line eq for reference
line_eq = (f"y={round(slope, 2)}x + {round(intercept, 2)}")
print(line_eq)

fig.show()

slope:0.04
intercept:-0.09
R-squared: 0.1996
y=0.04x + -0.09
