In [2]:
import numpy as np
import sklearn.metrics as skmetrics
import warnings
from utils.load_dataset import load_dataset

warnings.filterwarnings(action='ignore')
np.random.seed = 1

In [5]:
class KernelRidgeRegression(object):

    def __init__(self, kernel_func, degree=None, gamma=None):
        '''
        Class for kernel ridge regression

        Arg(s):
            kernel_func : str
                name of kernel function to use: linear, polynomial, rbf (gaussian)
            degree : int
                p-order for polynomial
            gamma : float
                standard deviation of the Gaussian
        '''

        # Define private variables
        self.__weights = None
        self.__kernel_func = kernel_func
        self.__degree = degree
        self.__gamma = gamma

    def __linear_kernel(self, X1, X2):
        '''
        Computes the linear kernel function on X1 and X2

        Arg(s):
            X1 : numpy[float32]
                N x d feature vector
            X2 : numpy[float32]
                N x d feature vector
        Returns:
            numpy[float32] : N x N kernel matrix
        '''

        # TODO: Implement linear kernel
        kernel = X1 @ X2.T
        return kernel


    def __polynomial_kernel(self, X1, X2, degree):
        '''
        Computes the p-order polynomial kernel function on X1 and X2 with c = 1

        Arg(s):
            X1 : numpy[float32]
                N x d feature vector
            X2 : numpy[float32]
                N x d feature vector
            degree : int
                p-order for polynomial
        Returns:
            numpy[float32] : N x N kernel matrix
        '''

        # TODO: Implement polynomial kernel with c = 1
        c = 1
        kernel = (X1 @ X2.T + c) ** degree
        return kernel

    def __rbf_kernel(self, X1, X2, gamma):
        '''
        Computes the RBF (Gaussian) kernel function on X1 and X2

        Arg(s):
            X1 : numpy[float32]
                N x d feature vector
            X2 : numpy[float32]
                N x d feature vector
            gamma : float
                standard deviation of the Gaussian
        Returns:
            numpy[float32] : N x N kernel matrix
        '''
        
        # TODO: Implement RBF (Gaussian) kernel
        print('over here')
        X1_sq = np.sum(X1**2, axis=1).reshape(-1, 1)
        X2_sq = np.sum(X2**2, axis=1).reshape(1, -1)
        print('a')
        dist_sq = X1_sq + X2_sq - 2 * np.dot(X1, X2.T)
        print('b')

        adjusted_gamma = 1 / (2 * gamma**2)
        print('c')
        kernel = np.exp(-adjusted_gamma * dist_sq)
        print('af')
        return kernel.astype(np.float32)

    def fit(self, X, y, weight_decay=0):
        '''
        Fits the model to X and y via normal equation in kernelized form

        Arg(s):
            X : numpy[float32]
                N x d feature vector
            y : numpy[float32]
                N x 1 ground-truth label
            weight_decay : float
                weight of weight decay term
        '''

        self.__X = X.copy()

        # TODO: Implement the fit function

        if self.__kernel_func == 'linear':
            K = self.__linear_kernel(X, X)
        elif self.__kernel_func == 'polynomial':
            K = self.__polynomial_kernel(X, X, self.__degree)
        elif self.__kernel_func == 'rbf':
            print('yeehaw')
            K = self.__rbf_kernel(X, X, self.__gamma)
        else:
            raise ValueError('Unsupported kernel function: {}'.format(self.__kernel_func))

        self.__weights = np.linalg.inv(K + weight_decay * np.eye(K.shape[0])) @ y

    def predict(self, X):
        '''
        Predicts the real value for each feature vector X

        Arg(s):
            x : numpy[float32]
                N x d feature vector
        Returns:
            numpy[float32] : N x 1 real value vector (\hat{y})
        '''

        # TODO: Implement the predict function

        if self.__kernel_func == 'linear':
            K = self.__linear_kernel(X, self.__X)
        elif self.__kernel_func == 'polynomial':
            K = self.__polynomial_kernel(X, self.__X, self.__degree)
        elif self.__kernel_func == 'rbf':
            K = self.__rbf_kernel(X, self.__X, self.__gamma)
        else:
            raise ValueError('Unsupported kernel function: {}'.format(self.__kernel_func))

        y_hat = K @ self.__weights
        return y_hat

In [6]:
data = load_dataset(1, False)

In [7]:
X = []
y = []

for entry in data:
    features = entry[:6]
    targets = entry[6][10]
    X.append(features)
    y.append(targets)

X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.float32)

In [8]:
weight_decay = 1e-3
degree = 3
gamma = 1

# Shuffle the dataset based on sample indices
shuffled_indices = np.random.permutation(X.shape[0])

# Choose the first 60% as training set, next 20% as validation and the rest as testing
train_split_idx = int(0.60 * X.shape[0])
val_split_idx = int(0.80 * X.shape[0])

train_indices = shuffled_indices[0:train_split_idx]
val_indices = shuffled_indices[train_split_idx:val_split_idx]
test_indices = shuffled_indices[val_split_idx:]

# Select the examples from X and y to construct our training, validation, testing sets
X_train, y_train = X[train_indices, :], y[train_indices]
X_val, y_val = X[val_indices, :], y[val_indices]
X_test, y_test = X[test_indices, :], y[test_indices]

# Instantiate KernelRidgeRegressionSciKit with an rbf kernel with gamma of 1 / (2 * specified gamma ** 2)
# model_scikit = KernelRidgeRegressionSciKit(alpha=weight_decay, kernel='rbf', gamma=1 / (2 * gamma ** 2))

# Instantiate our kernel ridge regression model with an rbf kernel with specified gamma of 1
model_ours = KernelRidgeRegression(kernel_func='rbf', gamma=gamma)

print('Results for Scikit-learn model')

# Train scikit-learn model
# model_scikit.fit(X_train, y_train)

# Score model using mean mean squared error on training set

# mse_scikit_train = skmetrics.mean_squared_error(y_train, model_scikit.predict(X_train))
# print('Training set mean squared error: {:.4f}'.format(mse_scikit_train))

# # Score model using mean squared error validation set

# mse_scikit_val = skmetrics.mean_squared_error(y_val, model_scikit.predict(X_val))
# print('Validation set mean squared error: {:.4f}'.format(mse_scikit_val))

# # Score model using mean squared error testing set

# mse_scikit_test = skmetrics.mean_squared_error(y_test, model_scikit.predict(X_test))
# print('Testing set mean squared error: {:.4f}'.format(mse_scikit_test))

print('Results for our model')

# Train our model
model_ours.fit(X_train, y_train, weight_decay=weight_decay)
print('hey')


# Score model using mean mean squared error on training set

mse_ours_train = skmetrics.mean_squared_error(y_train, model_ours.predict(X_train))
print('Training set mean squared error: {:.4f}'.format(mse_ours_train))

# Score model using mean squared error validation set

mse_ours_val = skmetrics.mean_squared_error(y_val, model_ours.predict(X_val))
print('Validation set mean squared error: {:.4f}'.format(mse_ours_val))

# Score model using mean squared error testing set

mse_ours_test = skmetrics.mean_squared_error(y_test, model_ours.predict(X_test))
print('Testing set mean squared error: {:.4f}'.format(mse_ours_test))


Results for Scikit-learn model
Results for our model
yeehaw
over here
a


: 