In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import r2_score
import pickle

Load the preprocessed data

In [2]:
data = pd.read_excel('final_cleaned_happiness_data.xlsx')

Define Target and Features

In [3]:
X = data.drop(columns=['Happiness Score'])  # Features
y = data['Happiness Score']                 # Target

Convert Categorical Variables to Numeric Using One-Hot Encoding

In [4]:
X = pd.get_dummies(X, drop_first=True)

Split Data into Train/Test Sets

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Feature Selection for Reduced Feature Set

In [6]:
selector = SelectKBest(score_func=f_regression, k=5)
X_train_reduced = selector.fit_transform(X_train, y_train)
X_test_reduced = selector.transform(X_test)

Function to Evaluate Models

In [7]:
def evaluate_model(model, X_train, X_test, y_train, y_test, model_name):
    # Fit the model
    model.fit(X_train, y_train)
    # Predict on test set
    y_pred = model.predict(X_test)
    # Calculate R-squared score
    score = r2_score(y_test, y_pred)
    print(f"R-squared score for {model_name}: {score}")
    # Cross-validation
    cv_score = cross_val_score(model, X_train, y_train, cv=5, scoring='r2').mean()
    print(f"Cross-validation score for {model_name}: {cv_score}\n")
    return model

Initialize and Evaluate Different Models

Model 1: Linear Regression

In [8]:
model_lr = LinearRegression()
evaluate_model(model_lr, X_train, X_test, y_train, y_test, "Linear Regression")

R-squared score for Linear Regression: -0.05816431804579647
Cross-validation score for Linear Regression: -0.05337972500220474



Model 2: Random Forest Regressor

In [9]:
model_rf = RandomForestRegressor(random_state=42)
evaluate_model(model_rf, X_train, X_test, y_train, y_test, "Random Forest Regressor")

R-squared score for Random Forest Regressor: -0.029714748338692365
Cross-validation score for Random Forest Regressor: -0.024314404029685655



Model 3: Gradient Boosting Regressor

In [10]:
model_gb = GradientBoostingRegressor(random_state=42)
evaluate_model(model_gb, X_train, X_test, y_train, y_test, "Gradient Boosting Regressor")

R-squared score for Gradient Boosting Regressor: -0.008940789680710548
Cross-validation score for Gradient Boosting Regressor: -0.006341794297976789



Model 4: Support Vector Regressor

In [11]:
model_svr = SVR()
evaluate_model(model_svr, X_train, X_test, y_train, y_test, "Support Vector Regressor")

R-squared score for Support Vector Regressor: -0.070085518244698
Cross-validation score for Support Vector Regressor: -0.04781232752707676



Model 5: K-Nearest Neighbors Regressor

In [12]:
model_knn = KNeighborsRegressor(n_neighbors=5)
evaluate_model(model_knn, X_train, X_test, y_train, y_test, "K-Nearest Neighbors Regressor")

R-squared score for K-Nearest Neighbors Regressor: -0.19371568302392772
Cross-validation score for K-Nearest Neighbors Regressor: -0.1838992290676357



Save All Models

In [13]:
models = {
    "Linear Regression": model_lr,
    "Random Forest Regressor": model_rf,
    "Gradient Boosting Regressor": model_gb,
    "Support Vector Regressor": model_svr,
    "K-Nearest Neighbors Regressor": model_knn
}

Save each model as a .model file

In [14]:
for model_name, model in models.items():
    filename = f"{model_name.replace(' ', '_')}.model"
    with open(filename, 'wb') as file:
        pickle.dump(model, file)
        print(f"Saved {model_name} as {filename}")

Saved Linear Regression as Linear_Regression.model
Saved Random Forest Regressor as Random_Forest_Regressor.model
Saved Gradient Boosting Regressor as Gradient_Boosting_Regressor.model
Saved Support Vector Regressor as Support_Vector_Regressor.model
Saved K-Nearest Neighbors Regressor as K-Nearest_Neighbors_Regressor.model
