In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
with open("hitters.x.csv") as f:
    import csv
    X_colnames = next(csv.reader(f))

X = np.loadtxt("hitters.x.csv", delimiter=",", skiprows=1)
(n,d) = X.shape

y = np.loadtxt("hitters.y.csv", skiprows=1)

1., 2.

In [None]:
X_ = np.hstack((np.ones((n,1)) ,(X - np.mean(X, axis=0)) / np.std(X, axis=0)))

P = np.block([
    [np.zeros((1, d+1))],
    [np.zeros((d, 1)), np.eye(d)]
])

3.

In [None]:
log10_l = np.linspace(-3, 7, num = 100)
l = np.power(10, log10_l)

M = (X_.T @ X_)[np.newaxis, ...] + l.reshape((-1, 1, 1)) * P[np.newaxis, ...]
t = np.linalg.pinv(M) @ X_.T @ y

In [None]:
fig, ax = plt.subplots()
ax.scatter(
    log10_l, np.log10(np.linalg.norm(t[:,1:], axis=-1, ord=2)), 
    s=2, label=r"$\log_{10}||\hat{\mathbf{\theta}}||_2$"
)
ax.scatter(
    log10_l, np.log10(np.abs(t[:,0])), 
    s=2, label=r"$\log_{10}|\hat{\theta}_0|$"
)
ax.set_xlabel(r"$\log_{10}\lambda$")
ax.legend()

4.

In [None]:
def ridge (X_, y, l): 
    return np.linalg.pinv(X_.T @ X_ + l * P) @ X_.T @ y
def lsq (X_, y):
    return np.linalg.pinv(X_.T @ X_) @ X_.T @ y

In [None]:
np.abs(ridge(X_, y, 1e-10) - lsq(X_, y))

In [None]:
np.abs(ridge(X_, y, 1e10))

5.

In [None]:
def make_folds(X, y, K):
    rng = np.random.default_rng(seed=1234)
    D = np.hstack((X_, np.reshape(y, (-1, 1))))
    rng.shuffle(D, axis=0)
    (n, d) = X.shape
    subs = tuple(np.split(D[:-(n%K)], K, axis=0))
    train_k, test_k = zip(*(
        (np.vstack(subs[:i] + subs[i+1:]), x)
        for i, x in enumerate(subs)
    ))
    train = np.stack(train_k)
    test = np.stack(test_k)
    
    X_proj = np.vstack((
        np.eye(d), 
        np.zeros((1, d)),
    ))
    
    Y_proj = np.hstack((np.zeros(d), np.ones(1)))
 
    return (
        (train @ X_proj, train @ Y_proj),
        (test @ X_proj, test @ Y_proj),
    )

((X_train, Y_train), (X_test, Y_test)) = make_folds(X_, y, K=5)

In [None]:
log10_l = np.linspace(-7, 5, num = 100)
l = np.power(10, log10_l)
M = np.einsum("aki,akj->aij", X_train, X_train)[np.newaxis, ...] + (l.reshape((-1, 1, 1)) * P[np.newaxis, ...])[:, np.newaxis, ...]
t = np.einsum("abik,bjk,bj->abi", np.linalg.pinv(M), X_train, Y_train)
R = np.mean(np.square(np.einsum("bij,abj->abi", X_test, t) - Y_test[np.newaxis, ...]), axis=(-1, -2))

In [None]:
fig, ax = plt.subplots()
ax.scatter(log10_l, np.log10(R), s=2)
ax.set_xlabel(r"$\log_{10}\lambda$")
ax.set_ylabel(r"$\log_{10}\hat{R}(D_\text{test})$")

6.

In [None]:
lopt = l[np.argmin(R)]
topt = ridge(X_, y, lopt)
topt

In [None]:
np.sqrt(np.mean((X_ @ topt - y)**2))

In [None]:
import pandas as pd
pd.DataFrame(columns=("t0", *X_colnames), data=topt.reshape(1,-1)).T.sort_values(by=0, ascending=False)