# Modelos para entrenar

#### Importar librerías

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


#### Load del Dataset

In [None]:
path_xtrain = '/Users/benjavitale/Documents/ML/TP_F/alquiler_procesado_Xtrain.csv'
path_xtest = '/Users/benjavitale/Documents/ML/TP_F/alquiler_procesado_Xtest.csv'

path_ytrain = '/Users/benjavitale/Documents/ML/TP_F/alquiler_procesado_ytrain.csv'
path_ytest = '/Users/benjavitale/Documents/ML/TP_F/alquiler_procesado_ytest.csv'
X_train = pd.read_csv(path_xtrain, low_memory=False)
X_test = pd.read_csv(path_xtest, low_memory=False)
y_train = pd.read_csv(path_ytrain, low_memory=False)
y_test = pd.read_csv(path_ytest, low_memory=False)

y_train = y_train.values.ravel()
y_test = y_test.values.ravel()


#### Función para imprimir métricas

In [None]:
def print_metrics(y_train, y_pred_train, y_test, y_pred_test):
    mae_train = mean_absolute_error(y_train, y_pred_train)
    mae_test = mean_absolute_error(y_test, y_pred_test)
    
    r2_train = r2_score(y_train, y_pred_train)
    r2_test = r2_score(y_test, y_pred_test)
    
    mse_train = mean_squared_error(y_train, y_pred_train)
    mse_test = mean_squared_error(y_test, y_pred_test)
    
    rmse_train = np.sqrt(mse_train)
    rmse_test = np.sqrt(mse_test)

    metrics = {
        'MAE': [mae_train, mae_test],
        'R²': [r2_train, r2_test],
        'MSE': [mse_train, mse_test],
        'RMSE': [rmse_train, rmse_test]
    }
    metrics_df = pd.DataFrame(metrics, index=['Entrenamiento', 'Prueba'])

    print(metrics_df)


## Regresión Lineal

In [None]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

y_pred_train = linear_model.predict(X_train)
y_pred_test = linear_model.predict(X_test)

print_metrics(y_train, y_pred_train, y_test, y_pred_test)

## Gradient Boosting con Grind Search Cross Validation

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings

warnings.filterwarnings("ignore", category=UserWarning)  
warnings.filterwarnings("ignore", category=FutureWarning)

gbr_model = GradientBoostingRegressor(random_state=42)

param_grid = {
    'n_estimators': [80, 100],        
    'learning_rate': [0.5, 0.6],    
    'max_depth': [5, 6],
    'min_samples_split': [2,3],
    'min_samples_leaf': [1,2]                       
}

grid_search = GridSearchCV(
    estimator=gbr_model, 
    param_grid=param_grid, 
    scoring='neg_mean_squared_error',  
    cv=3,                             
    verbose=2,                      
    n_jobs=-1                         
)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
print("Mejores Hiperparámetros Encontrados:")
print(best_params)

best_gbr_model = grid_search.best_estimator_

y_pred_test = best_gbr_model.predict(X_test)
mse_test = mean_squared_error(y_test, y_pred_test)
mae_test = mean_absolute_error(y_test, y_pred_test)

print(f"MSE en conjunto de prueba: {mse_test:.4f}")
print(f"MAE en conjunto de prueba: {mae_test:.4f}")

In [None]:
y_pred_train = best_gbr_model.predict(X_train)
print_metrics(y_train, y_pred_train, y_test, y_pred_test)

In [None]:
gbr_model = GradientBoostingRegressor(
    n_estimators=100, 
    learning_rate=0.6, 
    max_depth=7, 
    random_state=42
)

gbr_model.fit(X_train, y_train)

y_pred_train = gbr_model.predict(X_train)
y_pred_test = gbr_model.predict(X_test)

print_metrics(y_train, y_pred_train, y_test, y_pred_test)

## Red Neuronal

In [None]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

nn_model = Sequential()
nn_model.add(Dense(128, input_dim=X_train.shape[1], activation='relu', kernel_regularizer=l2(0.001)))
nn_model.add(Dropout(0.2))
nn_model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
nn_model.add(Dropout(0.2))
nn_model.add(Dense(1, activation='linear'))

optimizer = Adam(learning_rate=0.0005)
nn_model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

history = nn_model.fit(X_train_scaled, y_train, epochs=30, batch_size=16, validation_data=(X_test_scaled, y_test))

y_pred_train = nn_model.predict(X_train_scaled).ravel()
y_pred_test = nn_model.predict(X_test_scaled).ravel()

print_metrics(y_train, y_pred_train.ravel(), y_test, y_pred_test.ravel())
