<a href="https://colab.research.google.com/github/sayalikamble4567/2026-BUS4-118S-Sec-02-Special-Topics-MIS/blob/main/Predict_house_prices.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
# Generate sample data (comes from excel worksheet generated by chatGPT)
df = pd.read_excel('/Housing_Price_Dataset_With_Location.xlsx')
# Features and target
X = df[['Square_Feet', 'Location']]
y = df['Price']
# Preprocessing: One-hot encode the location column
preprocessor = ColumnTransformer(
transformers=[
('Location', OneHotEncoder(sparse_output=False), ['Location'])
], remainder='passthrough')
# Create pipeline with preprocessing and model
model = Pipeline(steps=[
('preprocessor', preprocessor),
('regressor', LinearRegression())
])
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42)
# Train model
model.fit(X_train, y_train)
# Evaluate the model on the test set ---
from sklearn.metrics import mean_absolute_error, r2_score
y_pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R^2:", r2_score(y_test, y_pred))
# Make prediction for a new house: 2000 sq ft in Downtown
new_house = pd.DataFrame({'Square_Feet': [2000], 'Location': ['Downtown']})
predicted_price = model.predict(new_house)
print(f"Predicted price for a 2000 sq ft house in Downtown: ${predicted_price[0]:,.2f}")
# Display model coefficients
feature_names = (model.named_steps['preprocessor']
.named_transformers_['Location']
.get_feature_names_out(['Location'])).tolist() + ['Square_Feet']
coefficients = model.named_steps['regressor'].coef_
print("\nModel Coefficients:")
for feature, coef in zip(feature_names, coefficients):
  print(f"{feature}: {coef:.2f}")

# The square footage coefficient represents how much the house price is
# expected to increase for each additional one square foot (assuming the
# location stays the same). This value shows the direct relationship between
# size and price in the linear regression model.

# Location affects price through one-hot encoded variables, which allow the
# model to assign a separate price adjustment for Downtown, Suburb, and Rural
# homes. Each location coefficient represents how much the price increases or
# decreases compared to the modelâ€™s reference structure. These location effects
# are added on top of the square footage effect to produce the final predicted
# price.


MAE: 41249.78285171216
R^2: 0.8988322109499896
Predicted price for a 2000 sq ft house in Downtown: $344,624.23

Model Coefficients:
Location_Downtown: -3579.74
Location_Rural: 2058.17
Location_Suburb: 1521.56
Square_Feet: 148.21
