In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor

In [2]:
os.chdir('../')

In [3]:
selected_data = pd.read_csv('data/processed/data.csv')

In [4]:
X, y = selected_data.drop('Ilg', axis=1), selected_data['Ilg']

In [5]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
modelos_regresion = [
    LinearRegression(),
    Ridge(),
    Lasso(),
    ElasticNet(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    SVR(),
    KNeighborsRegressor(),
    GaussianProcessRegressor()
]

In [7]:
from sklearn.model_selection import KFold, cross_val_score

kf = KFold(n_splits=5, shuffle=True, random_state=42)

for modelo in modelos_regresion:
    scores = cross_val_score(modelo, X_scaled, y, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-scores)
    print(f"{modelo.__class__.__name__} - RMSE: {rmse_scores.mean():.2f} ± {rmse_scores.std():.2f}")

LinearRegression - RMSE: 1002.43 ± 243.10
Ridge - RMSE: 998.52 ± 239.65
Lasso - RMSE: 1001.45 ± 241.92
ElasticNet - RMSE: 932.62 ± 181.04
DecisionTreeRegressor - RMSE: 966.30 ± 214.66
RandomForestRegressor - RMSE: 948.03 ± 204.42
GradientBoostingRegressor - RMSE: 947.33 ± 201.75
SVR - RMSE: 937.69 ± 148.52
KNeighborsRegressor - RMSE: 989.11 ± 108.26
GaussianProcessRegressor - RMSE: 1120.66 ± 283.90
