<a href="https://colab.research.google.com/github/linhb03/Ai118Project/blob/dev/KingCountry_house_price.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This project uses the "House Sales in King County, USA" dataset, which is a publicly available resource from Kaggle.

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from google.colab import files

print("Please upload your kc_house_data.csv file.")
uploaded = files.upload()
file_name = next(iter(uploaded))
df = pd.read_csv(file_name)
df = df.sample(n=2000, random_state=42)

X = df[['sqft_living', 'zipcode']].copy()
y = df['price']

X['zipcode'] = X['zipcode'].astype(str)

preprocessor = ColumnTransformer(
    transformers=[
        ('zipcode_encoder', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), ['zipcode'])
    ], remainder='passthrough')

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model.fit(X_train, y_train)
print("\nModel trained successfully!")

new_house = pd.DataFrame({
    'sqft_living': [2000],
    'zipcode': ['98178']
})
predicted_price = model.predict(new_house)
print(f"\nPredicted price for a 2000 sq ft house in zipcode 98178: ${predicted_price[0]:,.2f}")

feature_names = (model.named_steps['preprocessor']
                 .named_transformers_['zipcode_encoder']
                 .get_feature_names_out(['zipcode'])).tolist() + ['sqft_living']

coefficients = model.named_steps['regressor'].coef_
print("\nModel Coefficients:")
for feature, coef in zip(feature_names, coefficients):
    print(f"- {feature}: {coef:,.2f}")

Please upload your kc_house_data.csv file.


Saving kc_house_data.csv.zip to kc_house_data.csv (3).zip

Model trained successfully!

Predicted price for a 2000 sq ft house in zipcode 98178: $344,205.21

Model Coefficients:
- zipcode_98001: -224,590.01
- zipcode_98002: -177,842.32
- zipcode_98003: -194,434.14
- zipcode_98004: 670,487.03
- zipcode_98005: 141,338.56
- zipcode_98006: -4,229.79
- zipcode_98007: 26,371.12
- zipcode_98008: 82,995.28
- zipcode_98010: -177,801.97
- zipcode_98011: -118,555.41
- zipcode_98014: -155,164.11
- zipcode_98019: -217,323.27
- zipcode_98022: -149,602.02
- zipcode_98023: -233,812.74
- zipcode_98024: 249,184.37
- zipcode_98027: -79,049.58
- zipcode_98028: -113,827.60
- zipcode_98029: -16,957.80
- zipcode_98030: -285,694.15
- zipcode_98031: -222,420.96
- zipcode_98032: -176,171.33
- zipcode_98033: 161,861.27
- zipcode_98034: -17,838.41
- zipcode_98038: -244,502.70
- zipcode_98039: 1,404,471.20
- zipcode_98040: 324,446.32
- zipcode_98042: -202,484.60
- zipcode_98045: -115,245.56
- zipcode_98052: 7,513.