In [1]:
# Basic Libraries
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt # we only need pyplot
sb.set() # set the default Seaborn style for graphics

# Import essential models and functions from sklearn
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import explained_variance_score, mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor

## Gradient Boosting (GDP_PER_CAPITA)

In [2]:
# Load the dataset
df = pd.read_csv('final_version_dataset.csv')

# Select the feature variables and target variable
X = df[['GDP_PER_CAPITA']]
y = df['HAPPINESS_SCORE']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a gradient boosting regressor object
gbr = GradientBoostingRegressor(learning_rate=0.1, n_estimators=100, random_state=42)

# Fit the model to the training data
gbr.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = gbr.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
evs = explained_variance_score(y_test, y_pred)

# Round the evaluation metrics to 4 decimal places
mse_rounded = round(mse, 4)
r2_rounded = round(r2, 4)
evs_rounded = round(evs, 4)

# Print the evaluation metrics with 4 decimal places
print('Mean squared error:', mse_rounded)
print('R2 score:', r2_rounded)
print('Explained variance score:', evs_rounded)

Mean squared error: 0.4415
R2 score: 0.6453
Explained variance score: 0.6483


## Gradient Boosting (SOCIAL_SUPPORT)

In [3]:
# Load the dataset
df = pd.read_csv('final_version_dataset.csv')

# Select the feature variables and target variable
X = df[['SOCIAL_SUPPORT']]
y = df['HAPPINESS_SCORE']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a gradient boosting regressor object
gbr = GradientBoostingRegressor(learning_rate=0.1, n_estimators=100, random_state=42)

# Fit the model to the training data
gbr.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = gbr.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
evs = explained_variance_score(y_test, y_pred)

# Round the evaluation metrics to 4 decimal places
mse_rounded = round(mse, 4)
r2_rounded = round(r2, 4)
evs_rounded = round(evs, 4)

# Print the evaluation metrics with 4 decimal places
print('Mean squared error:', mse_rounded)
print('R2 score:', r2_rounded)
print('Explained variance score:', evs_rounded)

Mean squared error: 0.52
R2 score: 0.5822
Explained variance score: 0.5863


## Gradient Boosting (LIFE_EXPECTANCY)

In [4]:
# Load the dataset
df = pd.read_csv('final_version_dataset.csv')

# Select the feature variables and target variable
X = df[['LIFE_EXPECTANCY']]
y = df['HAPPINESS_SCORE']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a gradient boosting regressor object
gbr = GradientBoostingRegressor(learning_rate=0.1, n_estimators=100, random_state=42)

# Fit the model to the training data
gbr.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = gbr.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
evs = explained_variance_score(y_test, y_pred)

# Round the evaluation metrics to 4 decimal places
mse_rounded = round(mse, 4)
r2_rounded = round(r2, 4)
evs_rounded = round(evs, 4)

# Print the evaluation metrics with 4 decimal places
print('Mean squared error:', mse_rounded)
print('R2 score:', r2_rounded)
print('Explained variance score:', evs_rounded)

Mean squared error: 0.5194
R2 score: 0.5828
Explained variance score: 0.5904


## Multi-variate Gradient Boosting

In [5]:
# Load the dataset
df = pd.read_csv('final_version_dataset.csv')

# Select the feature variables and target variable
X = df[['GDP_PER_CAPITA','SOCIAL_SUPPORT','LIFE_EXPECTANCY']]
y = df['HAPPINESS_SCORE']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a gradient boosting regressor object
gbr = GradientBoostingRegressor(learning_rate=0.1, n_estimators=100, random_state=42)

# Fit the model to the training data
gbr.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = gbr.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
evs = explained_variance_score(y_test, y_pred)

# Round the evaluation metrics to 4 decimal places
mse_rounded = round(mse, 4)
r2_rounded = round(r2, 4)
evs_rounded = round(evs, 4)

# Print the evaluation metrics with 4 decimal places
print('Mean squared error:', mse_rounded)
print('R2 score:', r2_rounded)
print('Explained variance score:', evs_rounded)

Mean squared error: 0.3499
R2 score: 0.7189
Explained variance score: 0.7214
