In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor

In [2]:
os.chdir('../')

In [3]:
selected_data = pd.read_csv('data/processed/data.csv')

In [4]:
X, y = selected_data.drop('Ilg', axis=1), selected_data['Ilg']

In [5]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
modelos_regresion = [
    LinearRegression(),
    Ridge(),
    Lasso(),
    ElasticNet(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    SVR(),
    KNeighborsRegressor(),
    GaussianProcessRegressor()
]

In [7]:
from sklearn.model_selection import KFold, cross_val_score

kf = KFold(n_splits=5, shuffle=True, random_state=42)

for modelo in modelos_regresion:
    scores = cross_val_score(modelo, X_scaled, y, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-scores)
    print(f"{modelo.__class__.__name__} - RMSE: {rmse_scores.mean():.2f} ± {rmse_scores.std():.2f}")

LinearRegression - RMSE: 997.99 ± 237.58
Ridge - RMSE: 993.99 ± 234.07
Lasso - RMSE: 997.00 ± 236.41
ElasticNet - RMSE: 927.62 ± 176.02
DecisionTreeRegressor - RMSE: 958.82 ± 205.24
RandomForestRegressor - RMSE: 934.30 ± 191.88
GradientBoostingRegressor - RMSE: 943.39 ± 196.41
SVR - RMSE: 937.86 ± 148.39
KNeighborsRegressor - RMSE: 937.84 ± 120.54
GaussianProcessRegressor - RMSE: 1174.33 ± 315.30
