# Modelos para entrenar

#### Importar librerías

In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


#### Load del Dataset

In [12]:
path_xtrain = '/Users/benjavitale/Documents/ML/TP_F/alquiler_procesado_Xtrain.csv'
path_xtest = '/Users/benjavitale/Documents/ML/TP_F/alquiler_procesado_Xtest.csv'

path_ytrain = '/Users/benjavitale/Documents/ML/TP_F/alquiler_procesado_ytrain.csv'
path_ytest = '/Users/benjavitale/Documents/ML/TP_F/alquiler_procesado_ytest.csv'
X_train = pd.read_csv(path_xtrain, low_memory=False)
X_test = pd.read_csv(path_xtest, low_memory=False)
y_train = pd.read_csv(path_ytrain, low_memory=False)
y_test = pd.read_csv(path_ytest, low_memory=False)

y_train = y_train.values.ravel()
y_test = y_test.values.ravel()


#### Función para imprimir métricas

In [13]:
def print_metrics(y_train, y_pred_train, y_test, y_pred_test):
    mae_train = mean_absolute_error(y_train, y_pred_train)
    mae_test = mean_absolute_error(y_test, y_pred_test)
    
    r2_train = r2_score(y_train, y_pred_train)
    r2_test = r2_score(y_test, y_pred_test)
    
    mse_train = mean_squared_error(y_train, y_pred_train)
    mse_test = mean_squared_error(y_test, y_pred_test)
    
    rmse_train = np.sqrt(mse_train)
    rmse_test = np.sqrt(mse_test)

    metrics = {
        'MAE': [mae_train, mae_test],
        'R²': [r2_train, r2_test],
        'MSE': [mse_train, mse_test],
        'RMSE': [rmse_train, rmse_test]
    }
    metrics_df = pd.DataFrame(metrics, index=['Entrenamiento', 'Prueba'])

    print(metrics_df)


## Regresión Lineal

In [30]:
linear_model = LinearRegression()
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
linear_model.fit(X_train_scaled, y_train)

y_pred_train = linear_model.predict(X_train_scaled)
y_pred_test = linear_model.predict(X_test_scaled)

print_metrics(y_train, y_pred_train, y_test, y_pred_test)

                        MAE        R²           MSE           RMSE
Entrenamiento  54278.960428  0.624003  1.251647e+10  111877.017513
Prueba         53645.717510  0.659836  1.236765e+10  111209.930880


## Gradient Boosting con Grind Search Cross Validation

In [15]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings

warnings.filterwarnings("ignore", category=UserWarning)  
warnings.filterwarnings("ignore", category=FutureWarning)

gbr_model = GradientBoostingRegressor(random_state=42)

param_grid = {
    'n_estimators': [80, 100],        
    'learning_rate': [0.5, 0.6],    
    'max_depth': [5, 6],
    'min_samples_split': [2,3],
    'min_samples_leaf': [1,2]                       
}

grid_search = GridSearchCV(
    estimator=gbr_model, 
    param_grid=param_grid, 
    scoring='neg_mean_squared_error',  
    cv=3,                             
    verbose=2,                      
    n_jobs=-1                         
)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
print("Mejores Hiperparámetros Encontrados:")
print(best_params)

best_gbr_model = grid_search.best_estimator_

y_pred_test = best_gbr_model.predict(X_test)
mse_test = mean_squared_error(y_test, y_pred_test)
mae_test = mean_absolute_error(y_test, y_pred_test)

print(f"MSE en conjunto de prueba: {mse_test:.4f}")
print(f"MAE en conjunto de prueba: {mae_test:.4f}")

Fitting 3 folds for each of 32 candidates, totalling 96 fits
[CV] END learning_rate=0.5, max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=80; total time=   4.8s
[CV] END learning_rate=0.5, max_depth=5, min_samples_leaf=1, min_samples_split=3, n_estimators=80; total time=   4.7s
[CV] END learning_rate=0.5, max_depth=5, min_samples_leaf=1, min_samples_split=3, n_estimators=80; total time=   4.7s
[CV] END learning_rate=0.5, max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=80; total time=   4.9s
[CV] END learning_rate=0.5, max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=80; total time=   5.1s
[CV] END learning_rate=0.5, max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   5.8s
[CV] END learning_rate=0.5, max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   5.8s
[CV] END learning_rate=0.5, max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=

In [16]:
y_pred_train = best_gbr_model.predict(X_train)
print_metrics(y_train, y_pred_train, y_test, y_pred_test)

                        MAE        R²           MSE          RMSE
Entrenamiento  13927.429719  0.985859  4.707220e+08  21696.129137
Prueba         19310.329274  0.939168  2.211727e+09  47029.004466


In [17]:
gbr_model = GradientBoostingRegressor(
    n_estimators=100, 
    learning_rate=0.6, 
    max_depth=7, 
    random_state=42
)

gbr_model.fit(X_train, y_train)

y_pred_train = gbr_model.predict(X_train)
y_pred_test = gbr_model.predict(X_test)

print_metrics(y_train, y_pred_train, y_test, y_pred_test)

                        MAE        R²           MSE          RMSE
Entrenamiento   7725.735946  0.993649  2.114227e+08  14540.381264
Prueba         16022.650226  0.923034  2.798322e+09  52899.164718


## Red Neuronal

In [26]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

nn_model = Sequential()
nn_model.add(Dense(64, input_dim=X_train.shape[1], activation='relu', kernel_regularizer=l2(0.0001)))
nn_model.add(Dropout(0.2))
nn_model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.00001)))
nn_model.add(Dropout(0.2))
nn_model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.00001)))
nn_model.add(Dropout(0.2))
nn_model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.00001)))
nn_model.add(Dropout(0.2))
nn_model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.00001)))
nn_model.add(Dropout(0.2))
nn_model.add(Dense(1, activation='linear'))

optimizer = Adam(learning_rate=0.0005)
nn_model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

history = nn_model.fit(X_train_scaled, y_train, epochs=40, batch_size=16, validation_data=(X_test_scaled, y_test))

y_pred_train = nn_model.predict(X_train_scaled).ravel()
y_pred_test = nn_model.predict(X_test_scaled).ravel()

print_metrics(y_train, y_pred_train.ravel(), y_test, y_pred_test.ravel())


Epoch 1/40
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 676us/step - loss: 44505706496.0000 - mae: 111676.1172 - val_loss: 35176325120.0000 - val_mae: 79267.8516
Epoch 2/40
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 608us/step - loss: 29821218816.0000 - mae: 80143.5703 - val_loss: 23249221632.0000 - val_mae: 73759.3281
Epoch 3/40
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 599us/step - loss: 20605067264.0000 - mae: 72050.7344 - val_loss: 14467225600.0000 - val_mae: 57519.2266
Epoch 4/40
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 655us/step - loss: 15272458240.0000 - mae: 61561.0625 - val_loss: 13865213952.0000 - val_mae: 54774.9922
Epoch 5/40
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 630us/step - loss: 15660104704.0000 - mae: 61311.3203 - val_loss: 12630452224.0000 - val_mae: 54023.7773
Epoch 6/40
[1m1382/1382[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 

In [35]:
import joblib

# Supongamos que tienes un MinMaxScaler llamado 'scaler'
joblib.dump(scaler, 'scaler.pkl')  # Guarda el objeto de normalización en un archivo
joblib.dump(nn_model,'nn_model.pkl')
joblib.dump(best_gbr_model,'best_gbr_model.pkl')
joblib.dump(linear_model,'linear_model.pkl')
joblib.dump(X_train,'X_train.pkl')

['X_train.pkl']