<a href="https://colab.research.google.com/github/khietvuarong/ML-Basics-Exercise/blob/main/Part_1_Predict_house_prices_based_on_square_footage_and_location.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
# Cites:
# Data Source:California Housing Dataset from scikit-learn
# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html
# Improvement base on the starter code

import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Load dataset from scikit-learn.org
# Improvment
housing = fetch_california_housing(as_frame=True)
df = housing.frame

# Select relevant features
# AveRooms approximates house size
# Latitude & Longitude approximate location
X = df[['AveRooms', 'Latitude', 'Longitude']]
y = df['MedHouseVal'] * 100000  # Convert to dollars

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make prediction for a new house
# Example: 6 average rooms, Downtown-like coordinates
# Editable!
new_house = pd.DataFrame({
    'AveRooms': [6],
    'Latitude': [34.05],    # Los Angeles area
    'Longitude': [-118.25]
})

predicted_price = model.predict(new_house)

#Print the predicted house price.
#This line used to check if the predicted house price is negative.
if predicted_price[0] < 0:
    print("Result came out negative, try different variable")
else:
    print(f"Predicted house price: ${predicted_price[0]:,.2f}")

# Evaluate model
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)

print(f"\nModel R² Score: {r2:.3f}")

# Display model coefficients
print("\nModel Coefficients:")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.2f}")



Predicted house price: $233,724.49

Model R² Score: 0.298

Model Coefficients:
AveRooms: 13442.86
Latitude: -77833.46
Longitude: -78858.61
