## Predicción de precios de casas con Redes Neuronales
### Objetivo
Predecir el valor de un producto dado un historico de precios.

### Integrantes:

- Nery Fuentes - 15004337
- Luis Barreno - 24007314

### Importación de librerias

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import joblib

### Carga de datos

In [2]:
df = pd.read_excel("HousePricePrediction.xlsx")

df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotArea,LotConfig,BldgType,OverallCond,YearBuilt,YearRemodAdd,Exterior1st,BsmtFinSF2,TotalBsmtSF,SalePrice
0,0,60,RL,8450,Inside,1Fam,5,2003,2003,VinylSd,0.0,856.0,208500.0
1,1,20,RL,9600,FR2,1Fam,8,1976,1976,MetalSd,0.0,1262.0,181500.0
2,2,60,RL,11250,Inside,1Fam,5,2001,2002,VinylSd,0.0,920.0,223500.0
3,3,70,RL,9550,Corner,1Fam,5,1915,1970,Wd Sdng,0.0,756.0,140000.0
4,4,60,RL,14260,FR2,1Fam,5,2000,2000,VinylSd,0.0,1145.0,250000.0


In [16]:
df=df.dropna()

In [3]:
# Separar características y etiquetas
y_column="SalePrice"

X = df.drop(columns=[y_column])
y = df[y_column]

### Normalizar datos

In [4]:
# Codificar variables categóricas
categorical_columns = ['MSZoning', 'LotConfig', 'BldgType', 'Exterior1st']
encoder = OneHotEncoder(sparse_output=False, drop='first')
X_encoded = pd.DataFrame(encoder.fit_transform(X[categorical_columns]), columns=encoder.get_feature_names_out())
X = pd.concat([X.drop(columns=categorical_columns), X_encoded], axis=1)

# Escalar los datos
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [7]:
print(X_train)

[[ 1.41341051  2.41970004 -1.01312116 ... -0.40481551 -0.13985672
  -0.01851217]
 [ 0.29668566 -0.87361603 -0.33835076 ... -0.40481551 -0.13985672
  -0.01851217]
 [-0.70017817  0.06733141 -0.02918139 ... -0.40481551 -0.13985672
  -0.01851217]
 ...
 [-0.39043833 -0.16790545 -0.2997997  ... -0.40481551  7.15017482
  -0.01851217]
 [-0.19581254 -0.87361603 -0.25313262 ... -0.40481551  7.15017482
  -0.01851217]
 [-0.71085885 -0.16790545 -0.32034336 ...  2.47026108 -0.13985672
  -0.01851217]]


### Crear y entrenar el modelo

In [39]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError

# Definición del modelo
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dense(1)
])

# Configurar optimizador
optimizer = Adam(learning_rate=0.001)

# Compilar modelo
model.compile(
    optimizer=optimizer,
    loss=MeanSquaredError(),
    metrics=[MeanAbsoluteError()]
)

# Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Entrenamiento
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)



Epoch 1/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 39242706944.0000 - mean_absolute_error: 181313.3125 - val_loss: 37830627328.0000 - val_mean_absolute_error: 181070.6875
Epoch 2/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 39068323840.0000 - mean_absolute_error: 181710.1250 - val_loss: 37732634624.0000 - val_mean_absolute_error: 180813.8594
Epoch 3/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 38987104256.0000 - mean_absolute_error: 180630.9688 - val_loss: 37256880128.0000 - val_mean_absolute_error: 179604.9531
Epoch 4/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 36885884928.0000 - mean_absolute_error: 177021.6562 - val_loss: 35734155264.0000 - val_mean_absolute_error: 175769.1250
Epoch 5/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 36829687808.0000 - mean_absolute_error: 176375.8750 - v

### Evaluación del modelo

In [40]:
from sklearn.metrics import mean_squared_error, mean_absolute_error,r2_score

# Realizar predicciones en el conjunto de prueba
y_pred = model.predict(X_test)
# Calcular métricas
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)

print(f"Métricas del modelo en el conjunto de prueba:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
r2 = r2_score(y_test, y_pred)
print(f"R^2: {r2:.2f}")

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Métricas del modelo en el conjunto de prueba:
Mean Squared Error (MSE): 2666866168.39
Root Mean Squared Error (RMSE): 51641.71
Mean Absolute Error (MAE): 31636.53
R^2: 0.65


### Guardar modelo

In [41]:
# Guardar el modelo y el escalador
model.save('house_price_model.h5')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(encoder, 'encoder.pkl')



['encoder.pkl']

In [None]:
from sklearn.compose import ColumnTransformer

# Crear el ColumnTransformer para las columnas categóricas
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first'), ['MSZoning', 'LotConfig', 'BldgType', 'Exterior1st'])
    ],
    remainder='passthrough'  # Deja las demás columnas sin cambios
)

# Ajustar y transformar las características de entrenamiento
#X_train_processed = preprocessor.fit_transform(X_train)

# Guardar el preprocesador para usarlo en Flask
joblib.dump(preprocessor, 'preprocessor.pkl')

['preprocessor.pkl']

: 