Linear Regression Using Normal Equation
Write a Python function that performs linear regression using the normal equation. The function should take a matrix X (features) and a vector y (target) as input, and return the coefficients of the linear regression model. Round your answer to four decimal places, -0.0 is a valid result for rounding a very small number.

Example:
Input:
X = [[1, 1], [1, 2], [1, 3]], y = [1, 2, 3]
Output:
[0.0, 1.0]
Reasoning:
The linear model is y = 0.0 + 1.0*x, perfectly fitting the input data.

In [None]:
import numpy as np

def linear_regression_normal_equation(X: list[list[float]], y: list[float]) -> list[float]:
    """
    Perform Linear Regression using the Normal Equation.

    :param X: List of feature lists, where each row represents a data sample.
    :param y: List of target values.
    :return: List of rounded coefficients of the linear regression model.
    """

    # Convert lists to NumPy arrays
    X = np.array(X, dtype=np.float64)
    y = np.array(y, dtype=np.float64)

    # Add bias term (column of ones) to X
    ones = np.ones((X.shape[0], 1))  # Create a column of ones
    X = np.hstack((ones, X))  # Concatenate ones with X

    # Compute theta using the Normal Equation: theta = (X^T * X)^(-1) * X^T * y
    theta = np.linalg.inv(X.T @ X) @ X.T @ y

    # Round to 4 decimal places
    return np.round(theta, 4).tolist()

# Example usage
X = [[1], [2], [3]]
y = [1, 2, 3]
print(linear_regression_normal_equation(X, y))

In [None]:
import numpy as np

class LinearRegressionNP:
    def __init__(self, n_features, learning_rate=1e-3):
        rng = np.random.default_rng(0)
        self.w = rng.normal(0, 0.01, size=(n_features,))  # (F,)
        self.b = 0.0
        self.lr = learning_rate

    def forward(self, X):
        return X @ self.w + self.b  # (N,F)@(F,) -> (N,)

    def loss(self, y_pred, y_true):
        err = y_pred - y_true
        return np.mean(err**2)

    def backward(self, X, y_true):
        N = X.shape[0]
        y_pred = self.forward(X)
        err = y_pred - y_true                      # (N,)
        dw = (2.0 / N) * (X.T @ err)               # (F,)
        db = (2.0 / N) * np.sum(err)               # scalar
        self.w -= self.lr * dw
        self.b -= self.lr * db
        return self.loss(y_pred, y_true)

    def train(self, X, y, epochs=1000, verbose_every=100):
        for e in range(epochs):
            cur_loss = self.backward(X, y)
            if e % verbose_every == 0:
                print(f"epoch {e}: loss={cur_loss:.6f}")

# Example usage
if __name__ == "__main__":
    rng = np.random.default_rng(42)
    N, F = 200, 2
    X = rng.uniform(-5, 5, size=(N, F))           # N x F
    true_w = np.array([3.0, 5.0])                 # ground truth
    true_b = 2.0
    noise = rng.normal(0, 0.3, size=N)
    y = X @ true_w + true_b + noise               # (N,)

    model = LinearRegressionNP(n_features=F, learning_rate=5e-2)
    model.train(X, y, epochs=1000, verbose_every=200)

    print("\nLearned params:")
    print("w:", model.w)
    print("b:", model.b)
    # test
    x_test = np.array([1.5, -2.0])
    print("prediction:", model.forward(x_test[None, :])[0])