In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

In [2]:
# 1) Load data
df = pd.read_csv("house_price_dataset.csv")
df

Unnamed: 0,Size_SqFt,Bedrooms,Age_Years,Distance_to_City_km,Garage_Size,Price_USD
0,3674,1,48,19.24,1,593734.81
1,1360,5,48,47.07,3,298020.68
2,1794,5,11,48.71,1,373774.81
3,1630,2,38,14.91,1,297743.79
4,1595,5,1,15.96,2,381918.60
...,...,...,...,...,...,...
95,2817,2,35,10.52,3,486283.45
96,1315,4,37,21.16,3,295610.68
97,3842,3,39,35.28,3,628461.25
98,955,1,19,7.78,3,211704.32


In [3]:
# 2) Features and target
feature_cols = ["Size_SqFt", "Bedrooms", "Age_Years", "Distance_to_City_km", "Garage_Size"]
target_col = "Price_USD"
X = df[feature_cols]
y = df[target_col]

In [4]:
# 3) Train / Test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42
)

In [8]:
# 4) Pipeline: scale features then linear regression
# A Pipeline is a tool in machine learning that lets you connect steps together so they run automatically in the correct order.
pipe = Pipeline(steps=[
    ("scale", MinMaxScaler()),
    ("lr", LinearRegression())
])

In [9]:
# 5) Fit
pipe.fit(X_train, y_train)

In [10]:
# 6) Evaluate the model accuracy...we will explain this metrics in the next week!
y_test_pred = pipe.predict(X_test)
mse = mean_squared_error(y_test, y_test_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_test_pred)
print(f"RMSE: {rmse:.2f}")
print(f"R²:   {r2:.3f}")

RMSE: 17033.52
R²:   0.987


In [12]:
# 7) Save pipeline for reuse
import joblib
# joblib is a Python library used to save and load machine‑learning models.
joblib.dump(pipe, "house_price_pipe.joblib")
print("Saved model → house_price_pipe.joblib")

Saved model → house_price_pipe.joblib


In [13]:
#Load & predict(even in a different python script) 
pipe = joblib.load("house_price_pipe.joblib")

X_new = pd.DataFrame({
    "Size_SqFt": [2500],
    "Bedrooms": [3],
    "Age_Years": [10],
    "Distance_to_City_km": [15],
    "Garage_Size": [2]
})

pred = pipe.predict(X_new)[0]
print("Predicted Price:", pred)

Predicted Price: 472843.75557053334
