# Modelos para entrenar

#### Importar librerías

In [50]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pandas as pd
import numpy as np


#### División del Dataset

In [51]:
path_toia = '/Users/victoria/Desktop/alquiler_procesado.csv'
df = pd.read_csv(path_toia, low_memory=False)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/victoria/Desktop/alquiler_procesado.csv'

In [52]:
path_benja = '/Users/benjavitale/Documents/ML/TP_F/alquiler_procesado.csv'
df = pd.read_csv(path_benja, low_memory=False)

In [53]:
X = df.drop(columns=['precio_pesos_constantes'])  
y = df['precio_pesos_constantes']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")



X_train shape: (174908, 69)
X_test shape: (43727, 69)
y_train shape: (174908,)
y_test shape: (43727,)


#### Hago función para imprimir métricas

In [54]:
def print_metrics(y_train, y_pred_train, y_test, y_pred_test):
    # Calcular métricas
    mae_train = mean_absolute_error(y_train, y_pred_train)
    mae_test = mean_absolute_error(y_test, y_pred_test)
    
    r2_train = r2_score(y_train, y_pred_train)
    r2_test = r2_score(y_test, y_pred_test)
    
    mse_train = mean_squared_error(y_train, y_pred_train)
    mse_test = mean_squared_error(y_test, y_pred_test)
    
    rmse_train = np.sqrt(mse_train)
    rmse_test = np.sqrt(mse_test)

    # Crear DataFrame con las métricas
    metrics = {
        'MAE': [mae_train, mae_test],
        'R²': [r2_train, r2_test],
        'MSE': [mse_train, mse_test],
        'RMSE': [rmse_train, rmse_test]
    }
    metrics_df = pd.DataFrame(metrics, index=['Entrenamiento', 'Prueba'])

    print(metrics_df)


## Regresión Lineal

In [55]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

y_pred_train = linear_model.predict(X_train)
y_pred_test = linear_model.predict(X_test)

print_metrics(y_train, y_pred_train, y_test, y_pred_test)

                        MAE        R²           MSE          RMSE
Entrenamiento  15138.741846  0.817592  1.953475e+09  44198.127947
Prueba         14886.800961  0.805958  1.811218e+09  42558.412291


## XGBOOST

In [59]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear el modelo Gradient Boosting
gbr_model = GradientBoostingRegressor(
    n_estimators=150, 
    learning_rate=0.1, 
    max_depth=3, 
    random_state=42
)

# Entrenar el modelo
gbr_model.fit(X_train, y_train)

# Hacer predicciones
y_pred_train = gbr_model.predict(X_train)
y_pred_test = gbr_model.predict(X_test)




print_metrics(y_train, y_pred_train, y_test, y_pred_test)

                       MAE        R²           MSE         RMSE
Entrenamiento  2678.794905  0.995096  5.251709e+07  7246.867623
Prueba         2767.907966  0.989954  9.377503e+07  9683.751044


## Red Neuronal

In [58]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Normalizar los datos si es necesario
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Crear el modelo
nn_model = Sequential()
nn_model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dense(1, activation='linear'))  # Salida para regresión

# Compilar el modelo
nn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Entrenar el modelo
nn_model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, validation_data=(X_test_scaled, y_test))

# Predicciones
y_pred_train = nn_model.predict(X_train_scaled)
y_pred_test = nn_model.predict(X_test_scaled)

# Métricas
print_metrics(y_train, y_pred_train.ravel(), y_test, y_pred_test.ravel())


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m5466/5466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 398us/step - loss: 11319412736.0000 - mae: 29947.0625 - val_loss: 8480977408.0000 - val_mae: 26360.7441
Epoch 2/20
[1m5466/5466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 380us/step - loss: 9360728064.0000 - mae: 26216.3965 - val_loss: 8003245056.0000 - val_mae: 24254.1680
Epoch 3/20
[1m5466/5466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 372us/step - loss: 9520183296.0000 - mae: 24493.5078 - val_loss: 7643404288.0000 - val_mae: 22465.2656
Epoch 4/20
[1m5466/5466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 379us/step - loss: 9127671808.0000 - mae: 24031.2891 - val_loss: 7390640640.0000 - val_mae: 22322.8145
Epoch 5/20
[1m5466/5466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 372us/step - loss: 8125684736.0000 - mae: 23206.3320 - val_loss: 7152235520.0000 - val_mae: 22742.1191
Epoch 6/20
[1m5466/5466[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 372us/step

In [60]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Normalizar los datos
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Crear el modelo
nn_model = Sequential()
nn_model.add(Dense(128, input_dim=X_train.shape[1], activation='relu', kernel_regularizer=l2(0.001)))
nn_model.add(Dropout(0.2))
nn_model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
nn_model.add(Dropout(0.2))
nn_model.add(Dense(1, activation='linear'))

# Compilar el modelo con tasa de aprendizaje ajustada
optimizer = Adam(learning_rate=0.0005)
nn_model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

# Entrenar el modelo
history = nn_model.fit(X_train_scaled, y_train, epochs=30, batch_size=16, validation_data=(X_test_scaled, y_test))

# Predicciones
y_pred_train = nn_model.predict(X_train_scaled).ravel()
y_pred_test = nn_model.predict(X_test_scaled).ravel()

# Métricas
print_metrics(y_train, y_pred_train.ravel(), y_test, y_pred_test.ravel())


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m10932/10932[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 460us/step - loss: 10962220032.0000 - mae: 29481.0879 - val_loss: 8445754368.0000 - val_mae: 26372.7891
Epoch 2/30
[1m10932/10932[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 452us/step - loss: 9542863872.0000 - mae: 26167.0840 - val_loss: 7971095552.0000 - val_mae: 22915.6816
Epoch 3/30
[1m10932/10932[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 452us/step - loss: 9399122944.0000 - mae: 23765.5488 - val_loss: 7610060288.0000 - val_mae: 21267.8711
Epoch 4/30
[1m10932/10932[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 458us/step - loss: 8997250048.0000 - mae: 22364.9902 - val_loss: 7335232512.0000 - val_mae: 21165.1484
Epoch 5/30
[1m10932/10932[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 453us/step - loss: 8687687680.0000 - mae: 21959.1406 - val_loss: 7082760704.0000 - val_mae: 19821.6055
Epoch 6/30
[1m10932/10932[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0