# Housing Price Prediction – Modeling
This notebook builds regression models using the preprocessed housing dataset to predict log-transformed housing prices. 
It follows the modeling portion of the project, focusing on applying multiple algorithms and tuning them.

In [None]:
# Imports and Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor

In [None]:
# Load Preprocessed Data
df = pd.read_csv('processed_housing.csv')
df.head()

In [None]:
# Define Target and Features
X = df.drop('price_log', axis=1)
y = df['price_log']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# Preprocessing Setup
numeric_features = ['bedrooms', 'bathrooms', 'floors', 'waterfront', 'view',
                    'condition', 'grade', 'sale_year', 'sale_month',
                    'was_renovated', 'effective_year', 'effective_age',
                    'sqft_living_log', 'sqft_lot_log', 'sqft_living15_log',
                    'sqft_lot15_log']
categorical_features = ['zipcode']

numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [None]:
# Gradient Boosting Regressor
from sklearn.ensemble import GradientBoostingRegressor

gb_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', GradientBoostingRegressor(random_state=42))
])

gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred_gb)
rmse = np.sqrt(mean_squared_error(y_test, y_pred_gb))
r2 = r2_score(y_test, y_pred_gb)

print("Gradient Boosting Results:")
print(f"MAE: {mae:.3f}")
print(f"RMSE: {rmse:.3f}")
print(f"R²: {r2:.3f}")

In [None]:
# TODO Sangit: Implement Linear Regression model pipeline

In [None]:
# TODO Rose: Implement Decision Tree model pipeline