In [None]:
#Importing Data

from sklearn.model_selection import train_test_split
import pandas as pd
import os


base_path = os.path.abspath(os.path.join('..'))
file_path = os.path.join(base_path, 'datasets', 'cleaned_life_expectancy_data.csv')
cleaned = pd.read_csv(file_path)

X = cleaned.drop(columns=['Life Expectancy'])
y = cleaned['Life Expectancy']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Training with different number of estimators

from sklearn.ensemble import AdaBoostRegressor

results = {}

for n_estimators in range(1, 501):
    adaboost = AdaBoostRegressor(n_estimators=n_estimators, random_state=42)
    adaboost.fit(X_train, y_train)
    y_pred = adaboost.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results[n_estimators] = {
        'mse': mse,
        'r2': r2
    }

best_n_estimators = max(results, key=lambda k: results[k]['r2'])
best_result = results[best_n_estimators]

print(f"Best Number of Estimators: {best_n_estimators}")
print(f"Best Mean Squared Error: {best_result['mse']}")
print(f"Best R^2 Score: {best_result['r2']}")

Best Number of Estimators: 19
Best Mean Squared Error: 21.267089258983436
Best R^2 Score: 0.7713572716765045


In [None]:
adaboost = AdaBoostRegressor(n_estimators=19, random_state=42)
adaboost.fit(X_train, y_train)
y_pred = adaboost.predict(X_test)

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score

mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(y_test, y_pred)
print(f"R^2 Score: {r2}")

cv_scores = -cross_val_score(adaboost, X, y, cv=5, scoring='neg_mean_squared_error')
print(f"Mean 5-Fold Cross-Validation MSE Score: {cv_scores.mean()}")

Mean Squared Error: 21.267089258983436
R^2 Score: 0.7713572716765045
Mean 10-Fold Cross-Validation MSE Score: 22.66319037331178


In [18]:
from sklearn.ensemble import RandomForestRegressor

# Train the Random Forest Regressor
random_forest = RandomForestRegressor(n_estimators=100, random_state=42)
random_forest.fit(X_train, y_train)
y_pred_rf = random_forest.predict(X_test)

# Calculate metrics
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f"Random Forest Mean Squared Error: {mse_rf}")

r2_rf = r2_score(y_test, y_pred_rf)
print(f"Random Forest R^2 Score: {r2_rf}")

cv_scores_rf = -cross_val_score(random_forest, X, y, cv=5, scoring='neg_mean_squared_error')
print(f"Random Forest Mean 10-Fold Cross-Validation MSE Score: {cv_scores_rf.mean()}")

Random Forest Mean Squared Error: 5.747902264300438
Random Forest R^2 Score: 0.9382042347289574
Random Forest Mean 10-Fold Cross-Validation MSE Score: 7.076091229005639
