In [18]:
from sklearn.ensemble import RandomForestRegressor
import os 
import pandas as pd
import pickle
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

In [19]:
# Load data
data_path = "../../data"
X_train = pd.read_csv(os.path.join(data_path, "X_train_pca.csv"))
y_train_org = pd.read_csv(os.path.join(data_path, "y_train_pca.csv"))
X_test = pd.read_csv(os.path.join(data_path, "X_test_pca.csv"))
y_test_org = pd.read_csv(os.path.join(data_path, "y_test_pca.csv"))

In [20]:
traits = ['Extraversion', 'Agreeableness', 'Conscientiousness', 'Emotional Stability', 'Openness']

In [21]:
# Create results directory
results_path = "../../results"
specific_results_path = os.path.join("../../results", "rf_regression")
os.makedirs(results_path, exist_ok=True)
os.makedirs(specific_results_path, exist_ok=True)

In [22]:
# Cycle through traits

for trait in traits:
    print(f"Processing {trait}")
    y_train = y_train_org[trait]
    y_test = y_test_org[trait]

    random_forest_model = RandomForestRegressor(random_state=27)
    random_forest_model.fit(X_train, y_train)

    y_pred = random_forest_model.predict(X_test)
    y_true = y_test

    # Compute metrics
    # Calculate Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_true, y_pred)
    # Calculate Mean Squared Error (MSE)
    mse = mean_squared_error(y_true, y_pred)
    # Calculate Root Mean Squared Error (RMSE)
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    # Calculate R-squared (R2) Score
    r2 = r2_score(y_true, y_pred)
    # Calculate Mean Absolute Percentage Error (MAPE)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    # Calculate Mean Percentage Error (MPE)
    mpe = (1/len(y_true)) * sum(((y_true[i] - y_pred[i]) / y_true[i]) * 100 for i in range(len(y_true)))

    print("Mean Absolute Error (MAE):", mae)
    print("Mean Squared Error (MSE):", mse)
    print("Root Mean Squared Error (RMSE):", rmse)
    print("R-squared (R2) Score:", r2)
    print("Mean Absolute Percentage Error (MAPE):", mape)
    print("Mean Percentage Error (MPE):", mpe)
    print("\n\n")

    # Add metrics to dict
    metrics = {"mae": mae, "mse": mse, "rmse": rmse, "r2": r2, "mape": mape}

    # Save model and metrics 
    curr_result_path = os.path.join(specific_results_path, trait)
    os.makedirs(curr_result_path, exist_ok=True)
    with open(os.path.join(curr_result_path, f'rf_model.pkl'), 'wb') as file:
        pickle.dump(random_forest_model, file)
    with open(os.path.join(curr_result_path, f'perf_metrics.pkl'), 'wb') as file:
        pickle.dump(metrics, file)

Processing Extraversion
Mean Absolute Error (MAE): 0.8611538461538465
Mean Squared Error (MSE): 1.0116942307692312
Root Mean Squared Error (RMSE): 1.0058301202336462
R-squared (R2) Score: 0.1167402038139439
Mean Absolute Percentage Error (MAPE): 0.22155789111147198
Mean Percentage Error (MPE): -6.937077430000786



Processing Agreeableness
Mean Absolute Error (MAE): 0.5877163461538462
Mean Squared Error (MSE): 0.5415104038461538
Root Mean Squared Error (RMSE): 0.7358739048547338
R-squared (R2) Score: 0.13235625081071134
Mean Absolute Percentage Error (MAPE): 0.1203804951483629
Mean Percentage Error (MPE): -1.5216058735064626



Processing Conscientiousness
Mean Absolute Error (MAE): 0.5632908653846157
Mean Squared Error (MSE): 0.49955588221153907
Root Mean Squared Error (RMSE): 0.706792672720607
R-squared (R2) Score: 0.1636847730921518
Mean Absolute Percentage Error (MAPE): 0.11486202700364119
Mean Percentage Error (MPE): -1.9845813252291955



Processing Emotional Stability
Mean Absol

In [23]:
# Test load
with open(os.path.join(curr_result_path, f'perf_metrics.pkl'), 'rb') as file:
    metrics = pickle.load(file)
