In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import os


base_path = os.path.abspath(os.path.join('..'))
file_path = os.path.join(base_path, 'datasets', 'cleaned_life_expectancy_data.csv')
cleaned = pd.read_csv(file_path)

X = cleaned.drop(columns=['Life Expectancy'])
y = cleaned['Life Expectancy']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Lasso

In [2]:
from sklearn.linear_model import Lasso
import numpy as np
from sklearn.model_selection import GridSearchCV

param_grid_lasso = {
    'alpha': np.logspace(-4, 4, 100)
}

lasso = Lasso()
grid_search_lasso = GridSearchCV(lasso, param_grid_lasso, cv=10, scoring='neg_mean_squared_error')
grid_search_lasso.fit(X_train, y_train)
best_params_lasso = grid_search_lasso.best_params_

print("Best hyperparameters for Lasso with extended alpha range:")
print(best_params_lasso)

lasso = Lasso(alpha=best_params_lasso['alpha'])
lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_test)

Best hyperparameters for Lasso with extended alpha range:
{'alpha': np.float64(0.010476157527896652)}


In [3]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)
print(f'R2 Score: {r2}')

from sklearn.model_selection import cross_val_score
scores = cross_val_score(lasso, X_train, y_train, cv=10, scoring='neg_mean_squared_error')
mean_mse = -scores.mean()
print(f'Mean Cross-Validated MSE: {mean_mse}')


Mean Squared Error: 13.620269324638118
R2 Score: 0.8535683232922618
Mean Cross-Validated MSE: 11.957891918146078


## Ridge

In [4]:
from sklearn.linear_model import Ridge

param_grid_ridge = {
    'alpha': np.logspace(-10, 10, 100)
}

ridge = Ridge()
grid_search_ridge = GridSearchCV(ridge, param_grid_ridge, cv=10, scoring='neg_mean_squared_error')
grid_search_ridge.fit(X_train, y_train)
best_params_ridge = grid_search_ridge.best_params_

print(f"Best alpha for Ridge: {best_params_ridge['alpha']}")

ridge = Ridge(alpha=best_params_ridge['alpha'])
ridge.fit(X_train, y_train)
y_pred_ridge = ridge.predict(X_test)

Best alpha for Ridge: 1.2618568830660184


In [5]:
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
print(f'Mean Squared Error (Ridge): {mse_ridge}')

r2_ridge = r2_score(y_test, y_pred_ridge)
print(f'R2 Score (Ridge): {r2_ridge}')

scores_ridge = cross_val_score(ridge, X_train, y_train, cv=10, scoring='neg_mean_squared_error')
mean_mse_ridge = -scores_ridge.mean()
print(f'Mean Cross-Validated MSE (Ridge): {mean_mse_ridge}')

Mean Squared Error (Ridge): 13.579136551749109
R2 Score (Ridge): 0.854010542220406
Mean Cross-Validated MSE (Ridge): 11.988993688708101


## ElasticNet

In [6]:
from sklearn.linear_model import ElasticNet

param_grid_elastic_net = {
    'alpha': np.logspace(-10, 10, 100),
    'l1_ratio': np.linspace(-10, 10, 100)
}

elastic_net = ElasticNet()
grid_search_elastic_net = GridSearchCV(elastic_net, param_grid_elastic_net, cv=10, scoring='neg_mean_squared_error')
grid_search_elastic_net.fit(X_train, y_train)
best_params_elastic_net = grid_search_elastic_net.best_params_

print("Best hyperparameters for ElasticNet:")
print(best_params_elastic_net)

elastic_net = ElasticNet(alpha=best_params_elastic_net["alpha"], l1_ratio=best_params_elastic_net["l1_ratio"])
elastic_net.fit(X_train, y_train)
y_pred_elastic_net = elastic_net.predict(X_test)

95000 fits failed out of a total of 100000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1000 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\karti\LifeExpectancy\.venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\karti\LifeExpectancy\.venv\Lib\site-packages\sklearn\base.py", line 1466, in wrapper
    estimator._validate_params()
  File "c:\Users\karti\LifeExpectancy\.venv\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\karti\LifeExpectancy\.venv\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in vali

Best hyperparameters for ElasticNet:
{'alpha': np.float64(0.004750810162102793), 'l1_ratio': np.float64(0.9090909090909083)}


In [7]:
mse_elastic_net = mean_squared_error(y_test, y_pred_elastic_net)
print(f'Mean Squared Error (ElasticNet): {mse_elastic_net}')

r2_elastic_net = r2_score(y_test, y_pred_elastic_net)
print(f'R2 Score (ElasticNet): {r2_elastic_net}')

scores_elastic_net = cross_val_score(elastic_net, X_train, y_train, cv=10, scoring='neg_mean_squared_error')
mean_mse_elastic_net = -scores_elastic_net.mean()
print(f'Mean Cross-Validated MSE (ElasticNet): {mean_mse_elastic_net}')

Mean Squared Error (ElasticNet): 13.601766618190995
R2 Score (ElasticNet): 0.8537672461082579
Mean Cross-Validated MSE (ElasticNet): 11.963264766868145
