# Modelos para entrenar

#### Importar librerías

In [50]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pandas as pd
import numpy as np


#### División del Dataset

In [51]:
path_toia = '/Users/victoria/Desktop/alquiler_procesado.csv'
df = pd.read_csv(path_toia, low_memory=False)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/victoria/Desktop/alquiler_procesado.csv'

In [52]:
path_benja = '/Users/benjavitale/Documents/ML/TP_F/alquiler_procesado.csv'
df = pd.read_csv(path_benja, low_memory=False)

In [53]:
X = df.drop(columns=['precio_pesos_constantes'])  
y = df['precio_pesos_constantes']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")



X_train shape: (174908, 69)
X_test shape: (43727, 69)
y_train shape: (174908,)
y_test shape: (43727,)


#### Hago función para imprimir métricas

In [54]:
def print_metrics(y_train, y_pred_train, y_test, y_pred_test):
    # Calcular métricas
    mae_train = mean_absolute_error(y_train, y_pred_train)
    mae_test = mean_absolute_error(y_test, y_pred_test)
    
    r2_train = r2_score(y_train, y_pred_train)
    r2_test = r2_score(y_test, y_pred_test)
    
    mse_train = mean_squared_error(y_train, y_pred_train)
    mse_test = mean_squared_error(y_test, y_pred_test)
    
    rmse_train = np.sqrt(mse_train)
    rmse_test = np.sqrt(mse_test)

    # Crear DataFrame con las métricas
    metrics = {
        'MAE': [mae_train, mae_test],
        'R²': [r2_train, r2_test],
        'MSE': [mse_train, mse_test],
        'RMSE': [rmse_train, rmse_test]
    }
    metrics_df = pd.DataFrame(metrics, index=['Entrenamiento', 'Prueba'])

    print(metrics_df)


## Regresión Lineal

In [55]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

y_pred_train = linear_model.predict(X_train)
y_pred_test = linear_model.predict(X_test)

print_metrics(y_train, y_pred_train, y_test, y_pred_test)

                        MAE        R²           MSE          RMSE
Entrenamiento  15138.741846  0.817592  1.953475e+09  44198.127947
Prueba         14886.800961  0.805958  1.811218e+09  42558.412291


## XGBOOST

In [56]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear el modelo Gradient Boosting
gbr_model = GradientBoostingRegressor(
    n_estimators=100, 
    learning_rate=0.1, 
    max_depth=3, 
    random_state=42
)

# Entrenar el modelo
gbr_model.fit(X_train, y_train)

# Hacer predicciones
y_pred_train = gbr_model.predict(X_train)
y_pred_test = gbr_model.predict(X_test)




print_metrics(y_train, y_pred_train, y_test, y_pred_test)

                       MAE        R²           MSE          RMSE
Entrenamiento  3433.860240  0.991065  9.568986e+07   9782.119270
Prueba         3506.521912  0.985569  1.347056e+08  11606.275768


## Red Neuronal

In [57]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Normalizar los datos si es necesario
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Crear el modelo
nn_model = Sequential()
nn_model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dense(1, activation='linear'))  # Salida para regresión

# Compilar el modelo
nn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Entrenar el modelo
nn_model.fit(X_train_scaled, y_train, epochs=15, batch_size=32, validation_data=(X_test_scaled, y_test))

# Predicciones
y_pred_train = nn_model.predict(X_train_scaled)
y_pred_test = nn_model.predict(X_test_scaled)

# Métricas
print_metrics(y_train, y_pred_train.ravel(), y_test, y_pred_test.ravel())


Epoch 1/15


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5466/5466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 396us/step - loss: 11476112384.0000 - mae: 29848.7520 - val_loss: 8493653504.0000 - val_mae: 26975.7441
Epoch 2/15
[1m5466/5466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 389us/step - loss: 10110289920.0000 - mae: 27028.9570 - val_loss: 8038120960.0000 - val_mae: 22934.4766
Epoch 3/15
[1m5466/5466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 386us/step - loss: 9621452800.0000 - mae: 24428.1621 - val_loss: 7682275328.0000 - val_mae: 23306.1348
Epoch 4/15
[1m5466/5466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 393us/step - loss: 8967611392.0000 - mae: 23928.0000 - val_loss: 7425671680.0000 - val_mae: 22670.9355
Epoch 5/15
[1m5466/5466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 387us/step - loss: 8808158208.0000 - mae: 23209.0957 - val_loss: 7182736384.0000 - val_mae: 20759.4688
Epoch 6/15
[1m5466/5466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 383us/step - loss: 7