In [55]:
%matplotlib inline
import numpy as np
from numpy.random import Generator
import matplotlib.pyplot as plt
import scipy.spatial

In [56]:
x_tr = np.load('X_train.npy')
y_tr = np.load('y_train.npy')

In [57]:
def squared_distances(X1, X2):
    """Compute the matrix of pairwise squared-distances between all points in X1 and in X2.
    """
    return scipy.spatial.distance.cdist(X1, X2, metric='seuclidean')

In [58]:
# TODO: put your code here
def kernel_matrix(X1, X2, kernel_type, param):
    # X1 : array of shape n x d
    # X2 : array of shape m x d
    if kernel_type == 'linear':
        return X1 @ X2.T
    elif kernel_type == 'polynomial':
        exponent = param
        return (X1 @ X2.T + 1) ** exponent
    elif kernel_type == 'gaussian':
        lengthscale = param
        return np.exp(-squared_distances(X1, X2) / (2 * lengthscale ** 2))
    else:
        raise ValueError(kernel_type)

In [59]:
def krls_train(x, y, reg_par, kernel_type, kernel_par):
    w = np.linalg.solve(kernel_matrix(x, x, kernel_type, kernel_par) + reg_par * x.shape[0] * np.eye(x.shape[0]), y)
    return w

def krls_predict(x_ts, x_tr, w, kernel_type, kernel_par):
    k =  kernel_matrix(x_ts, x_tr, kernel_type, kernel_par)
    return np.sign(k @ w)

In [60]:
def calc_err(Ypred, Ytrue):
    # Calculate number of correct predictions
    correct_predictions = np.sum(Ypred == Ytrue)
    
    # Total number of samples
    n = len(Ytrue)
    
    # error = [1 - (sum of correct predictions)/n] * 100
    error_percentage = (1 - correct_predictions / n) * 100
    
    return error_percentage

#test remove before sending

In [61]:
ker_par = 2.0
reg_param = 0.1
ker_type = "gaussian"

w_krls = krls_train(x_tr, y_tr, reg_param, ker_type, ker_par)
ypred_tr = krls_predict(x_tr, x_tr, w_krls, ker_type, ker_par)

error = calc_err(ypred_tr, y_tr)
accuracy = 100 - error

print(f"Training Error: {error:.2f}%")
print(f"Training Accuracy: {accuracy:.2f}%")

# Show percentage of correct predictions
n = len(y_tr)
correct = np.sum(ypred_tr == y_tr)
print(f"\nCorrect predictions: {correct}/{n} ({accuracy:.1f}%)")
print(f"Incorrect predictions: {n-correct}/{n} ({error:.1f}%)")

Training Error: 14.86%
Training Accuracy: 85.14%

Correct predictions: 613/720 (85.1%)
Incorrect predictions: 107/720 (14.9%)


In [62]:
def _make_kfold_indices(n_tot, num_folds, rng):
    perm = rng.choice(n_tot, size=n_tot, replace=False)
    return np.array_split(perm, num_folds)


def krls_kfold_valerr(x_tr, y_tr, split_idx, reg_par, kernel_type, kernel_par):
    """
    Compute train/val errors over *given* folds (split_idx is a list of index arrays).
    """
    n_tot = x_tr.shape[0]
    assert len(split_idx) > 1 and all(idx.dtype == int for idx in split_idx)
    assert sum(len(idx) for idx in split_idx) == n_tot

    tr_errs, val_errs = [], []
    for val_ids in split_idx:
        val_mask = np.zeros(n_tot, dtype=bool)
        val_mask[val_ids] = True

        kf_x_tr, kf_y_tr = x_tr[~val_mask], y_tr[~val_mask]
        kf_x_val, kf_y_val = x_tr[val_mask], y_tr[val_mask]

        w_krls = krls_train(kf_x_tr, kf_y_tr, reg_par=reg_par, kernel_type=kernel_type, kernel_par=kernel_par)
        pred_tr  = krls_predict(kf_x_tr,  kf_x_tr, w_krls, kernel_type=kernel_type, kernel_par=kernel_par)
        pred_val = krls_predict(kf_x_val, kf_x_tr, w_krls, kernel_type=kernel_type, kernel_par=kernel_par)

        tr_errs.append(calc_err(pred_tr,  kf_y_tr))
        val_errs.append(calc_err(pred_val, kf_y_val))

    return np.asarray(tr_errs), np.asarray(val_errs)

np.random.default_rng(42)
def _check_random_generator(rng: int | Generator | None) -> Generator:
    """Convert rng into a np.random.Generator instance."""
    if rng is None:
        return np.random.default_rng()
    if isinstance(rng, np.random.Generator):
        return rng
    if isinstance(rng, (int, np.integer)):
        return np.random.default_rng(rng)

def krls_kfoldcv(x_tr, y_tr, num_folds, reg_par_list, kernel_type, kernel_par_list, rng=None):
    """
    Choose the best parameters for both the regularizer and the kernel parameter according to K-Fold CV.
    """
    n_tot = x_tr.shape[0]
    assert num_folds > 1 and num_folds <= n_tot

    # Precompute shared folds once for fair comparison
    rng = _check_random_generator(rng)
    split_idx = _make_kfold_indices(n_tot, num_folds, rng)

    errors = np.zeros((len(reg_par_list), len(kernel_par_list)))
    for i, reg_par in enumerate(reg_par_list):
        for j, kernel_par in enumerate(kernel_par_list):
            _, val_error = krls_kfold_valerr(x_tr, y_tr, split_idx, reg_par, kernel_type, kernel_par)
            errors[i, j] = np.mean(val_error)

    best_i, best_j = np.unravel_index(np.argmin(errors), errors.shape)
    best_reg_par = reg_par_list[best_i]
    best_kernel_par = kernel_par_list[best_j]
    best_err = errors[best_i, best_j]

    print(f"The best error ({best_err:.5f}) was obtained with "
          f"lambda={best_reg_par}, kernel parameter={best_kernel_par}")

    return best_reg_par, best_kernel_par, best_err

In [63]:
# K_fold parameters
num_folds = 5
ker_type = 'gaussian'
reg_par_list = [0.001, 0.01, 0.1, 1.0, 10.0]
ker_par_list = [0.01 * (10**i) for i in range(5)]

best_reg_par, best_kernel_par, _ = krls_kfoldcv(x_tr, y_tr, num_folds, reg_par_list, ker_type, ker_par_list)

w_krls = krls_train(x_tr, y_tr, best_reg_par, ker_type, best_kernel_par)
y_pred = krls_predict(x_tr, x_tr, w_krls, ker_type, best_kernel_par)

error = calc_err(ypred_tr, y_tr)
n = len(y_tr)
correct = np.sum(ypred_tr == y_tr)
print(f"Training Error: {error:.2f}%")
print(f"Correct predictions: {correct}/{n} ({accuracy:.1f}%)")

The best error (14.16667) was obtained with lambda=0.001, kernel parameter=1.0
Training Error: 14.86%
Correct predictions: 613/720 (85.1%)


In [64]:
#x_ts = np.load('X_test.npy')
#y_ts = np.load('y_test.npy')

In [65]:
#y_pred = krls_predict(x_ts, y_ts, w_krls, ker_type, best_kernel_par)

#error = calc_err(ypred_tr, y_ts)
#n = len(y_ts)
#correct = np.sum(ypred_tr == y_ts)
#print(f"Training Error: {error:.2f}%")
#print(f"Correct predictions: {correct}/{n} ({accuracy:.1f}%)")