In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 1. Professional Data Ingestion
data = pd.read_csv('data/portland_housing.csv')
X = data['size_sqft'].values
y = data['price_k'].values
m = len(y) # Total training examples [cite: 524]

# 2. Preprocessing & Feature Scaling
# Normalizing features to ensure stable Gradient Descent convergence
X_norm = (X - np.mean(X)) / np.std(X)
X_bias = np.c_[np.ones((m, 1)), X_norm] # Adding intercept term x0 = 1 [cite: 548]

# 3. Batch Gradient Descent Implementation
def compute_cost(X, y, theta):
    """Calculates Mean Squared Error J(theta)[cite: 581]."""
    return (1/(2*m)) * np.sum((X.dot(theta) - y)**2)

def batch_gradient_descent(X, y, theta, alpha, iterations):
    """Iteratively updates theta via the LMS rule [cite: 651-654]."""
    cost_history = []
    for i in range(iterations):
        gradients = (1/m) * X.T.dot(X.dot(theta) - y)
        theta = theta - alpha * gradients # Update rule [cite: 618-619, 743]
        cost_history.append(compute_cost(X, y, theta))
    return theta, cost_history

# 4. Analytical Solution: Normal Equation
# theta = (X^T X)^-1 X^T y [cite: 834]
theta_normal = np.linalg.inv(X_bias.T.dot(X_bias)).dot(X_bias.T).dot(y)

# 5. Training and Comparison
alpha = 0.01
iterations = 1500
theta_init = np.zeros(2)
theta_gd, history = batch_gradient_descent(X_bias, y, theta_init, alpha, iterations)

print(f"Gradient Descent Result: {theta_gd}")
print(f"Normal Equation Result: {theta_normal}")

# Convergence Visualization
plt.plot(history)
plt.xlabel('Iterations')
plt.ylabel('Cost J(theta)')
plt.title('Convergence Analysis')
plt.show()