In [1]:
import numpy as np

# Load the data

For $k = 0, 1, 2$ we have the following files:
* Xtrk.csv - the training sequences.
* Xtek.csv - the test sequences.
* Ytrk.csv - labels for the training sequences

In [38]:
Xtr0_mat100 = np.genfromtxt("data/Xtr0_mat100.csv", delimiter='')
Ytr0 = np.genfromtxt("data/Ytr0.csv", delimiter=',', skip_header=1)

Xtr1_mat100 = np.genfromtxt("data/Xtr1_mat100.csv", delimiter='')
Ytr1 = np.genfromtxt("data/Ytr1.csv", delimiter=',', skip_header=1)

Xtr2_mat100 = np.genfromtxt("data/Xtr2_mat100.csv", delimiter='')
Ytr2 = np.genfromtxt("data/Ytr2.csv", delimiter=',', skip_header=1)


In [58]:
Xtr0_mat100.shape

(2000, 100)

# Implementing some kernels

## Gaussian Kernel

In [52]:
def gaussian(x,y, sigma):
    exp_term = np.linalg.norm(x-y)**2 /(2*sigma)
    return(np.exp(-exp_term))

# Naive computation of the gaussian kernel that can be easily improved
def gaussian_kernel(X,sigma):
    n = X.shape[0]
    K = np.eye(n) # One along the diagonals because K(x,x) = exp(0) = 1
    for i in range(n):
        for j in range(i+1,n):
            val = gaussian(X[i], X[j], sigma)
            K[i,j] = val
            K[j,i] = val
    return(K)
    

## Polynomial Kernel

## Kernel Ridge Regression

* Consider RKHS $\mathcal H$, associated to a p.d. kernel K on $\mathcal X$
* Let $y = (y_1, \dots, y_n)^T \in \mathbb R ^n$
* Let $\alpha = (\alpha_1, \dots, \alpha_n)^T \in \mathbb R ^n$
* Let $K$ be the $n\times n$ Gram Matrix such that $K_{i,j} = K(x_i, x_j)$
* We can then write
$$
(\hat f(x_1), \dots, \hat f(x_n))^T = K\alpha
$$
* The norm is $||\hat f||^2_{\mathcal H} = \alpha^T K \alpha$
* KRR $\leftrightarrow \text{argmin}_{\alpha \in \mathbb R^n} \frac{1}{n} (K\alpha - y)^T(K\alpha - y) + \lambda \alpha^T K \alpha$
* Solution for $\lambda > 0$:
$$
\alpha = (K+\lambda nI)^{-1}y
$$


In [108]:
def KRR(X, y, lambd, kernel = "gaussian", sigma=0.5 ):
    """
    make lambd a list so we can test multiple regularisations without having to compute the kernel multiple times
    returns the predictions for each value of lambd
    """
    assert X.shape[0] == y.shape[0]
    assert len(lambd) > 0
    
    # Compute the kernel matrix for our data
    if kernel=="gaussian":
        K = gaussian_kernel(X, sigma)
    y_preds = []
    loss = []
    for l in lambd:
        assert l >= 0
        # find the parameter alpha
        alpha = np.linalg.solve((K + l*n*np.eye(n)), y)
        # predict
        loss_lambda = MSE(y, l, alpha, K)
        print(f"The MSE for the chosen parameters (lambda = {l:.2f} and sigma = {sigma:.2f}) is : {loss_lambda:.4f}")
        y_preds += [K @ alpha]
        loss += [loss_lambda]
    return(y_preds, loss)
    

In [109]:
def MSE(y, lambd, alpha, K):
    n = y.shape[0]
    data_term = (np.linalg.norm(np.dot(K, alpha.reshape(-1,1)) - y)**2)/n
    reg_term = alpha @ K @ alpha
    return(data_term + lambd * reg_term)

In [110]:
KRR(Xtr0_mat100, Ytr0[:,1], [0, 0.1, 0.2,0.3,0.4, 0.5,1,1.5,2,2.5,3], sigma = 0.5)

The MSE for the chosen parameters (lambda = 0.00 and sigma = 0.50) is : 998.5560
The MSE for the chosen parameters (lambda = 0.10 and sigma = 0.50) is : 503.3328
The MSE for the chosen parameters (lambda = 0.20 and sigma = 0.50) is : 512.7811
The MSE for the chosen parameters (lambda = 0.30 and sigma = 0.50) is : 525.0479
The MSE for the chosen parameters (lambda = 0.40 and sigma = 0.50) is : 538.6433
The MSE for the chosen parameters (lambda = 0.50 and sigma = 0.50) is : 552.7041
The MSE for the chosen parameters (lambda = 1.00 and sigma = 0.50) is : 618.3160
The MSE for the chosen parameters (lambda = 1.50 and sigma = 0.50) is : 669.7054
The MSE for the chosen parameters (lambda = 2.00 and sigma = 0.50) is : 708.8776
The MSE for the chosen parameters (lambda = 2.50 and sigma = 0.50) is : 739.2341
The MSE for the chosen parameters (lambda = 3.00 and sigma = 0.50) is : 763.2896


([array([-1.01863407e-10,  9.99999999e-01,  1.00000000e+00, ...,
         -2.18278728e-10, -5.96628524e-10, -2.32830644e-10]),
  array([0.43943344, 0.4340629 , 0.4323965 , ..., 0.43665257, 0.43673756,
         0.43516752]),
  array([0.40155027, 0.39691609, 0.39560559, ..., 0.39893663, 0.39962271,
         0.39869787]),
  array([0.36985701, 0.36567165, 0.3645303 , ..., 0.36742687, 0.36824697,
         0.36755341]),
  array([0.34283881, 0.33899753, 0.33797011, ..., 0.34057553, 0.34142318,
         0.34085396]),
  array([0.31951253, 0.31595399, 0.3150136 , ..., 0.31739725, 0.3182362 ,
         0.317747  ]),
  array([0.23844113, 0.23581735, 0.23514116, ..., 0.23685358, 0.23755287,
         0.23724959]),
  array([0.19019411, 0.18810968, 0.18757713, ..., 0.18892539, 0.18950268,
         0.18927723]),
  array([0.15818779, 0.15645765, 0.15601755, ..., 0.15713157, 0.15761983,
         0.15743917]),
  array([0.1354025 , 0.13392336, 0.13354812, ..., 0.1344979 , 0.13492   ,
         0.13476888]),


## Kernel Logistic Regression

- Binary Classificaiton setup: $\mathcal Y = \{-1, 1\}$
- $\mathcal l_{0/1}(f(x),y) = \mathbb 1\{yf(x) < 0 \}$ (0 if $y = \text{sign}f(x)$, 1 otherwise)
- $\mathcal l_{\text{logistic}}(f(x),y) = -\log p(y|f(x)) = \log(1 + e^{-yf(x)})$ where $p(y|f(x)) = \sigma(y(f(x))$
- solve WKRR