# Generalized Cross-Validation with a Single Variable
*This notebook shows the equivalence of two approaches for computing the Generalized Cross-Validation (GCV)*

## Import Dependencies

In [1]:
import numpy as np

## Generate Random Data

In [2]:
np.random.seed(0)
n, p = 25, 3
sigma = 0.1
X = np.random.random_sample((n, p))
beta = np.random.random_sample(p)
y = np.dot(X, beta) + np.random.normal(scale=sigma, size=n)

## Compute LOOCV

In [3]:
def compute_loocv(X, y, Gamma):
    A = np.dot(np.conj(X.T), X).real + np.dot(Gamma.T, Gamma)
    A_inv = np.linalg.inv(A)
    b_hat = np.dot(A_inv, np.dot(np.conj(X.T), y))
    y_hat = np.dot(X, b_hat)
    h = np.array([np.dot(x_i.conj(), np.dot(A_inv, x_i)).real for x_i in X])
    return np.sum((np.abs(y - y_hat) / (1 - h))**2) / len(y)

## Compute GCV Approach 1
We first rotate the regression problem, then compute the LOOCV.

In [4]:
def compute_gcv1(X, y, Gamma):
    U, S, Vt = np.linalg.svd(X)
    n = len(y)
    W = [[np.exp(2j*np.pi*i*j/n) / np.sqrt(n) for j in range(n)] for i in range(n)]
    Q = np.dot(W, U.T)
    X_prime = np.dot(Q, X)
    y_prime = np.dot(Q, y)
    return compute_loocv(X_prime, y_prime, Gamma)

## Compute GCV Approach 2

In [5]:
def compute_gcv2(X, y, Gamma):
    A = np.dot(X.T, X) + np.dot(Gamma.T, Gamma)
    A_inv = np.linalg.inv(A)
    b_hat = np.dot(A_inv, np.dot(X.T, y))
    y_pred = np.dot(X, b_hat)
    mse = np.mean((y - y_pred)**2)
    n = len(y)
    h_value = np.trace(np.identity(n) - np.dot(X, np.dot(A_inv, X.T))) / n
    return mse / h_value**2

## Compare the Two Approaches to Computing GCV
Show that the two approaches are equal when using a single regularizer, but not equal for multiple regularizers.

In [6]:
# Compare for a single regularizer
Gamma = np.identity(p)*np.random.uniform()
gcv1 = compute_gcv1(X, y, Gamma)
gcv2 = compute_gcv2(X, y, Gamma)
print("%f ~ %f" % (gcv1, gcv2))

# Compare for multiple regularizers
Gamma = np.diag(np.random.uniform(size=p))
gcv1 = compute_gcv1(X, y, Gamma)
gcv2 = compute_gcv2(X, y, Gamma)
print("%f != %f" % (gcv1, gcv2))

0.008029 ~ 0.008029
0.007757 != 0.007772
