<a href="https://colab.research.google.com/github/roywang01-boop/AAI2026/blob/main/Part1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load dataset
df = pd.read_csv("house_prices.csv")

# Features and target
X = df[["square_footage", "location"]]
y = df["price"]

# One-hot encode location
preprocessor = ColumnTransformer(
    transformers=[
        ("location", OneHotEncoder(handle_unknown="ignore"), ["location"])
    ],
    remainder="passthrough"
)

# Create model
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", LinearRegression())
])

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train model
model.fit(X_train, y_train)

# prediction: 2000 sq ft in Downtown
new_house = pd.DataFrame({
    "square_footage": [2000],
    "location": ["Downtown"]
})

predicted_price = model.predict(new_house)[0]

print(f"\nPredicted price for 2000 sq ft house in Downtown: ${predicted_price:,.2f}")

# coefficients
ohe = model.named_steps["preprocessor"].named_transformers_["location"]
feature_names = ohe.get_feature_names_out(["location"]).tolist() + ["square_footage"]
coeffs = model.named_steps["regressor"].coef_

print("\nModel Coefficients:")
for name, coef in zip(feature_names, coeffs):
    print(f"{name}: {coef:.2f}")

# explanation
sqft_coef = coeffs[-1]

print("\nExplanation:")
print(f"- The square_footage coefficient ({sqft_coef:.2f}) means that for every additional square foot,")
print(f"  the house price increases by about ${sqft_coef:.2f}, holding location constant.")
print("- Location coefficients show how much more or less expensive each location is")
print("  compared to the baseline location after encoding.")


Predicted price for 2000 sq ft house in Downtown: $923,963.38

Model Coefficients:
location_Downtown: 333483.67
location_Rural: -266446.03
location_Suburb: -67037.64
square_footage: 307.64

Explanation:
- The square_footage coefficient (307.64) means that for every additional square foot,
  the house price increases by about $307.64, holding location constant.
- Location coefficients show how much more or less expensive each location is
  compared to the baseline location after encoding.
