In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, cross_val_score
import joblib

# 1. Cargar datos
df = pd.read_csv("./properties_20250525_233557.csv")

# 2. Limpiar columnas innecesarias
df_clean = df.drop(columns=["precio_arriendo", "closets"])
y = df_clean["precio_venta"]
X = df_clean.drop(columns=["precio_venta"])

# 3. Columnas categóricas y numéricas
categorical_cols = X.select_dtypes(include=["object"]).columns.tolist()
numerical_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
numerical_cols = [col for col in numerical_cols if col != "precio_venta"]

# 4. Preprocesamiento
numeric_transformer = SimpleImputer(strategy="median")
categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])
preprocessor = ColumnTransformer([
    ("num", numeric_transformer, numerical_cols),
    ("cat", categorical_transformer, categorical_cols)
])

# 5. Pipeline del modelo
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=100, random_state=42))
])

# 6. Dividir en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 7. Entrenar modelo
model.fit(X_train, y_train)

# 8. Validación cruzada sobre el set de entrenamiento
cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring="r2")
print(f"CV R² mean: {cv_scores.mean():.3f}, std: {cv_scores.std():.3f}")

# 9. Evaluación en conjunto de prueba
test_score = model.score(X_test, y_test)
print(f"Test R²: {test_score:.3f}")

# 10. Guardar modelo
joblib.dump(model, "estimacion_aptos_v1.pkl")


CV R² mean: -247.404, std: 493.983
Test R²: -2.988


['estimacion_aptos_v1.pkl']