Got it! Here's the simplest and cleanest Python program for linear regression using vector form of gradient descent, for single variate and multivariate, all in one.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset (CSV must have multiple features and a target column 'y')
df = pd.read_csv("data.csv")

# Separate input features (X) and target (y)
X = df.drop("y", axis=1).values  # Shape: (m, n)
y = df["y"].values.reshape(-1, 1)  # Shape: (m, 1)

# Add bias column (ones) to X => shape becomes (m, n+1)
m = X.shape[0]  # Number of examples
X = np.hstack([np.ones((m, 1)), X])  # Add bias term (intercept)

# Initialize parameters (theta): shape (n+1, 1)
theta = np.zeros((X.shape[1], 1))

# Hyperparameters
alpha = 0.01       # Learning rate
epochs = 1000      # Iterations
cost_history = []  # To store cost at each step

# Gradient Descent
for _ in range(epochs):
    predictions = X @ theta                      # y_hat = Xθ
    errors = predictions - y                     # error = y_hat - y
    gradient = (1/m) * (X.T @ errors)            # gradient = (1/m) * Xᵀ(Xθ - y)
    theta -= alpha * gradient                    # theta update
    cost = (1/(2*m)) * (errors.T @ errors)       # cost = (1/2m) * (Xθ - y)ᵀ(Xθ - y)
    cost_history.append(cost[0, 0])

# Final parameters
print("Final θ (theta):", theta.ravel())
print(f"Final equation: y = {theta[0][0]:.2f} + " + " + ".join([f"{theta[i][0]:.2f}*x{i}" for i in range(1, theta.shape[0])]))

# Plot cost reduction
plt.plot(cost_history)
plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.title("Cost Function Reduction Over Time")
plt.show()
