In [13]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor

In [2]:
os.chdir('../')

In [3]:
processed_data = pd.read_csv('data/processed/data.csv')

In [7]:
X, y = processed_data.drop('ILG', axis=1), processed_data['ILG']

In [10]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [11]:
modelos_regresion = [
    LinearRegression(),
    Ridge(),
    Lasso(),
    ElasticNet(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    SVR(),
    KNeighborsRegressor(),
    GaussianProcessRegressor()
]

In [14]:
from sklearn.model_selection import KFold, cross_val_score

kf = KFold(n_splits=5, shuffle=True, random_state=42)

for modelo in modelos_regresion:
    scores = cross_val_score(modelo, X_scaled, y, scoring='neg_mean_squared_error', cv=kf)
    rmse_scores = np.sqrt(-scores)
    print(f"{modelo.__class__.__name__} - RMSE: {rmse_scores.mean():.2f} ± {rmse_scores.std():.2f}")

LinearRegression - RMSE: 155.50 ± 43.29
Ridge - RMSE: 158.08 ± 46.73
Lasso - RMSE: 155.21 ± 43.40
ElasticNet - RMSE: 390.93 ± 94.30
DecisionTreeRegressor - RMSE: 205.54 ± 68.65
RandomForestRegressor - RMSE: 181.61 ± 48.80
GradientBoostingRegressor - RMSE: 172.32 ± 37.07
SVR - RMSE: 932.99 ± 148.09
KNeighborsRegressor - RMSE: 374.55 ± 121.20
GaussianProcessRegressor - RMSE: 814.90 ± 326.74
