# Housing Price Prediction â€“ Interactive Linear Regression

This notebook demonstrates:
- Scatter plot of housing data
- Linear Regression using scikit-learn
- Interactive cost function & best-fit line
- Gradient Descent visualization
- Overfitting vs Underfitting
- Real housing CSV example

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from ipywidgets import interact, FloatSlider
plt.style.use('seaborn-v0_8')

## Generate Synthetic Housing Data

In [None]:
np.random.seed(42)
X = np.random.rand(50, 1) * 2000
y = 3 * X.squeeze() + 20000 + np.random.randn(50) * 2000

## Scatter Plot of Data

In [None]:
plt.scatter(X, y)
plt.xlabel('House Size (sq ft)')
plt.ylabel('Price')
plt.title('Housing Data')
plt.show()

## Linear Regression (Scikit-learn)

In [None]:
model = LinearRegression()
model.fit(X, y)
w_best = model.coef_[0]
b_best = model.intercept_
w_best, b_best

## Cost Function

In [None]:
def cost_function(w, b):
    y_pred = w * X.squeeze() + b
    return np.mean((y_pred - y) ** 2)

## Interactive Best-Fit Line & Cost Function

In [None]:

def interactive_plot(w):
    b = b_best
    fig, axes = plt.subplots(1, 2, figsize=(12,4))

    axes[0].scatter(X, y)
    axes[0].plot(X, w * X + b, color='red')
    axes[0].set_title(f"Regression Line (w={w:.2f})")

    w_vals = np.linspace(w_best - 5, w_best + 5, 100)
    costs = [cost_function(wi, b) for wi in w_vals]
    axes[1].plot(w_vals, costs)
    axes[1].scatter(w, cost_function(w, b), color='red')
    axes[1].set_title("Cost Function")

    plt.show()

interact(interactive_plot, w=FloatSlider(min=w_best-5, max=w_best+5, step=0.1, value=w_best))


## Gradient Descent from Scratch

In [None]:

w, b = 0.0, 0.0
lr = 1e-7
epochs = 50
w_history = []

for _ in range(epochs):
    y_pred = w * X.squeeze() + b
    dw = np.mean((y_pred - y) * X.squeeze())
    db = np.mean(y_pred - y)
    w -= lr * dw
    b -= lr * db
    w_history.append(w)


In [None]:
plt.plot(w_history)
plt.axhline(w_best, color='red', linestyle='--')
plt.title('Gradient Descent Convergence')
plt.show()

## Overfitting vs Underfitting

In [None]:

from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

models = {
    "Underfit (Degree 1)": make_pipeline(PolynomialFeatures(1), LinearRegression()),
    "Good Fit (Degree 2)": make_pipeline(PolynomialFeatures(2), LinearRegression()),
    "Overfit (Degree 10)": make_pipeline(PolynomialFeatures(10), LinearRegression())
}

X_plot = np.linspace(0, 2000, 200).reshape(-1,1)

plt.figure(figsize=(12,4))
for i, (name, model) in enumerate(models.items(), 1):
    plt.subplot(1,3,i)
    model.fit(X, y)
    plt.scatter(X, y)
    plt.plot(X_plot, model.predict(X_plot), color='red')
    plt.title(name)
plt.tight_layout()
plt.show()


## Real Housing Data (CSV Example)

In [None]:

import pandas as pd

df = pd.DataFrame({
    'size_sqft': [800,900,1000,1100,1200,1300,1400,1500,1600,1700,1800],
    'price': [35000,38000,42000,46000,50000,54000,59000,63000,68000,72000,77000]
})

df.to_csv('house_prices.csv', index=False)
df


In [None]:

df = pd.read_csv('house_prices.csv')
X_real = df[['size_sqft']]
y_real = df['price']

model = LinearRegression()
model.fit(X_real, y_real)

plt.scatter(X_real, y_real)
plt.plot(X_real, model.predict(X_real), color='red')
plt.title('Real Housing Data Regression')
plt.show()
