In [1]:
import numpy as np

# 1. Hypothesis function in vectorized form
def hypothesis(X, theta):
    return np.dot(X, theta)

# 2. Loss function (mean squared error) in vectorized form
def compute_cost(X, y, theta):
    m = len(y)
    prediction = hypothesis(X, theta)
    cost = (1 / (2 * m)) * np.sum((prediction - y) ** 2)
    return cost

# 3. Gradient descent step
def gradient_step(X, y, theta, learning_rate):
    m = len(y)
    prediction = hypothesis(X, theta)
    gradient = (1 / m) * np.dot(X.T, (prediction - y))
    theta = theta - learning_rate * gradient
    return theta

# 4. Finding optimal parameters using gradient descent
def gradient_descent(X, y, theta, learning_rate, iterations):
    for i in range(iterations):
        theta = gradient_step(X, y, theta, learning_rate)
        if i % 1000 == 0: 
            print(f"Iteration {i}: Cost = {compute_cost(X, y, theta)}")
            print(f"Current Parameters: {theta}")
    return theta

# 5. Analytical solution with regularization (Ridge Regression)
def analytical_solution(X, y, alpha=1e-5):
    n = X.shape[1]
    I = np.eye(n) 
    return np.linalg.inv(X.T @ X + alpha * I) @ X.T @ y

# 6. Normalize data with protection against division by zero
def normalize(X):
    mean = np.mean(X, axis=0)
    std_dev = np.std(X, axis=0)
    std_dev[std_dev == 0] = 1 
    return (X - mean) / std_dev

# 7. Compare results from both methods
def compare_results(X, y):
    # Normalize the feature matrix
    X = normalize(X)

    # Initializing parameters (with bias term)
    theta_init = np.random.randn(X.shape[1]) * 0.01  # Random small values for initialization

    # Finding parameters using gradient descent
    theta_gd = gradient_descent(X, y, theta_init, learning_rate=0.05, iterations=10000)

    # Finding parameters using the analytical solution
    theta_analytical = analytical_solution(X, y)

    print("Gradient Descent Parameters:", theta_gd)
    print("Analytical Solution Parameters:", theta_analytical)

# Sample data: area (sq ft), number of bathrooms, number of bedrooms
X = np.array([[2100, 3, 4],
              [1600, 2, 3],
              [2400, 4, 5],
              [1416, 2, 3],
              [3000, 4, 5]])

# Add a column of ones for the intercept term (bias)
X = np.c_[np.ones(X.shape[0]), X]

# House prices (target values)
y = np.array([400000, 330000, 369000, 232000, 539900])

# Compare the results of both methods
compare_results(X, y)

Iteration 0: Cost = 74070773872.67436
Current Parameters: [4.71081795e-03 4.69273815e+03 3.87846675e+03 3.87847435e+03]
Iteration 1000: Cost = 70190402694.72072
Current Parameters: [ 4.71081795e-03  1.71610168e+05 -4.15831751e+04 -4.15831675e+04]
Iteration 2000: Cost = 70190099886.36334
Current Parameters: [ 4.71081795e-03  1.73791969e+05 -4.26500526e+04 -4.26500450e+04]
Iteration 3000: Cost = 70190099815.86847
Current Parameters: [ 4.71081795e-03  1.73825258e+05 -4.26663309e+04 -4.26663233e+04]
Iteration 4000: Cost = 70190099815.85205
Current Parameters: [ 4.71081795e-03  1.73825766e+05 -4.26665793e+04 -4.26665717e+04]
Iteration 5000: Cost = 70190099815.85205
Current Parameters: [ 4.71081795e-03  1.73825774e+05 -4.26665831e+04 -4.26665755e+04]
Iteration 6000: Cost = 70190099815.85205
Current Parameters: [ 4.71081795e-03  1.73825774e+05 -4.26665831e+04 -4.26665755e+04]
Iteration 7000: Cost = 70190099815.85205
Current Parameters: [ 4.71081795e-03  1.73825774e+05 -4.26665831e+04 -4.26665