In [2]:
import numpy as np
import pandas as pd

In [3]:
def ols(X: np.ndarray, y: np.ndarray) -> np.ndarray:
    return np.linalg.inv(X.T @ X) @ X.T @ y

In [None]:
def ols_gram_schmidt(X: np.ndarray, y: np.ndarray) -> np.ndarray:
    raise NotImplementedError

In [4]:
def ols_svd(X: np.ndarray, y: np.ndarray) -> np.ndarray:

    U, S, V_T = np.linalg.svd(a=X, full_matrices=False)
    S: np.ndarray = np.diag(S)

    return V_T.T @ np.linalg.inv(S) @ U.T @ y

In [10]:
def ols_qr(X: np.ndarray, y: np.ndarray) -> np.ndarray:

    Q, R = np.linalg.qr(a=X)

    # X'X = (QR).T(QR) = R.T Q.T Q R
    # (X'X)-1X'y = (QR).T(QR)^-1(QR).Ty = (R.T Q.T Q R)^-1 R.T Q.T y = (R.T R)^-1 R.T Q.T y = R^-1 Q.T y
    
    return np.linalg.inv(R) @ Q.T @ y

In [11]:
y: np.ndarray = np.random.normal(loc=0.0, scale=1.0, size=(100, 1))
X: np.ndarray = np.random.normal(loc=0.0, scale=1.0, size=(100, 10))

In [12]:
ols(X=X, y=y)

array([[-0.00805791],
       [-0.12307501],
       [-0.07806972],
       [ 0.16176667],
       [ 0.00613117],
       [ 0.03289824],
       [ 0.05440614],
       [-0.04665317],
       [ 0.02624644],
       [-0.07776617]])

In [13]:
ols_svd(X=X, y=y)

array([[-0.00805791],
       [-0.12307501],
       [-0.07806972],
       [ 0.16176667],
       [ 0.00613117],
       [ 0.03289824],
       [ 0.05440614],
       [-0.04665317],
       [ 0.02624644],
       [-0.07776617]])

In [14]:
ols_qr(X=X, y=y)

array([[-0.00805791],
       [-0.12307501],
       [-0.07806972],
       [ 0.16176667],
       [ 0.00613117],
       [ 0.03289824],
       [ 0.05440614],
       [-0.04665317],
       [ 0.02624644],
       [-0.07776617]])

In [None]:
# TODO: do the same thing for Ridge Regression - illustrate how OLS loads highly on small eigenvalues and ridge directly penalizes small eigenvalues
# TODO: this is because when we take the inverse cov of the design matrix, the inverse of a number becomes a much larger number and vice-versa 