In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures
from sklearn.impute import SimpleImputer

In [None]:
# Cargar y limpiar datos
housing_data = pd.read_csv('./datasets/housing.csv')
housing_data = housing_data.dropna()

In [None]:
# Crear nuevas características
housing_data["rooms_per_household"] = housing_data["total_rooms"] / housing_data["households"]
housing_data["bedrooms_per_room"] = housing_data["total_bedrooms"] / housing_data["total_rooms"]
housing_data["population_per_household"] = housing_data["population"] / housing_data["households"]

In [None]:
# Separar características y variable objetivo
X = housing_data.drop(columns="median_house_value")
y = housing_data["median_house_value"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Preprocesamiento manual
# Imputación de valores faltantes en características numéricas
numeric_features = X_train.select_dtypes(include=["float64", "int64"]).columns
imputer = SimpleImputer(strategy="mean")
X_train[numeric_features] = imputer.fit_transform(X_train[numeric_features])
X_test[numeric_features] = imputer.transform(X_test[numeric_features])

In [None]:
# Codificación de características categóricas
categorical_feature = "ocean_proximity"
encoder = OneHotEncoder(drop="first", sparse=False)
X_train_encoded = encoder.fit_transform(X_train[[categorical_feature]])
X_test_encoded = encoder.transform(X_test[[categorical_feature]])

In [None]:
# Agregar características categóricas codificadas a las numéricas
X_train = np.hstack([X_train[numeric_features], X_train_encoded])
X_test = np.hstack([X_test[numeric_features], X_test_encoded])

In [None]:
# Generar características polinómicas
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

In [None]:
# Entrenar el modelo de regresión lineal
model = LinearRegression()
model.fit(X_train_poly, y_train)

In [None]:
# Evaluar el modelo
train_score_poly = r2_score(y_train, model.predict(X_train_poly))
test_score_poly = r2_score(y_test, model.predict(X_test_poly))

In [None]:
f"Training Score: {train_score_poly:.4f}", f"Test Score: {test_score_poly:.4f}"