In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor

In [2]:
os.chdir('../')

In [3]:
processed_data = pd.read_csv('data/selected/data.csv')

In [5]:
X, y = processed_data.drop('Ilg', axis=1), processed_data['Ilg']

In [6]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
modelos_regresion = [
    LinearRegression(),
    Ridge(),
    Lasso(),
    ElasticNet(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    SVR(),
    KNeighborsRegressor(),
    GaussianProcessRegressor()
]

In [8]:
from sklearn.model_selection import KFold, cross_val_score

kf = KFold(n_splits=5, shuffle=True, random_state=42)

for modelo in modelos_regresion:
    scores = cross_val_score(modelo, X_scaled, y, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-scores)
    print(f"{modelo.__class__.__name__} - RMSE: {rmse_scores.mean():.2f} ± {rmse_scores.std():.2f}")

LinearRegression - RMSE: 153.70 ± 42.38
Ridge - RMSE: 157.65 ± 47.68
Lasso - RMSE: 153.97 ± 42.88
ElasticNet - RMSE: 401.63 ± 106.80
DecisionTreeRegressor - RMSE: 210.58 ± 63.22
RandomForestRegressor - RMSE: 176.49 ± 45.98
GradientBoostingRegressor - RMSE: 174.27 ± 38.95
SVR - RMSE: 934.59 ± 148.17
KNeighborsRegressor - RMSE: 449.03 ± 146.08
GaussianProcessRegressor - RMSE: 948.91 ± 169.22
