In [1]:
import numpy as np
import time
from data_utils import load_dataset

In [2]:
np.random.seed(100)

Below is an implementation of a Gaussian kernel radial basis function (RBF) model. The model uses Cholesky factorization; it takes in hyperparameters theta and lambda.

In [3]:
def RBF_Gaussian_kernel(x_train, y_train, x_test, thet, lamb):
    
    x_train_i = np.expand_dims(x_train, axis=1)
    x_train_j = np.expand_dims(x_train, axis=0)
    
    K = np.exp(-np.sum(np.square(x_train_i-x_train_j)/thet,axis=2))
    
    R = np.linalg.cholesky(K + lamb*np.identity(x_train.shape[0])).T
    
    alpha = np.linalg.multi_dot([np.linalg.inv(R), np.linalg.inv(R).T, y_train])
    
    x_test_i = np.expand_dims(x_test, axis=1)
    
    K_test = np.exp(-np.sum(np.square(x_test_i-x_train_j)/thet,axis=2))
    y_predict = K_test.dot(alpha)
    
    return y_predict

In [7]:
# These are the hyperparameter values that will be tested
thetas = [0.05, 0.1, 0.5, 1, 2]
lambdas = [0.001, 0.01, 0.1, 1]

In [8]:
def validation_RMSE(x_train, y_train, x_valid, y_valid, thet, lamb, testing_runtime=False):
    
    y_predict = RBF_Gaussian_kernel(x_train, y_train, x_valid, thet, lamb)
    RMSE = np.sqrt(np.mean(np.square(y_valid-y_predict)))
    
    if not testing_runtime:
        print("theta={}, lambda={}, RMSE={}".format(thet, lamb, round(RMSE,6)))
    
    return RMSE
    
def estimate_best_param(x_train, y_train, x_valid, y_valid, thetas, lambdas):
    
    """
    Estimates the best theta and lambda values for optimal model results
    (i.e., tunes the model hyperparameters).
    """
    
    N = x_train.shape[0] # number of training points
    RMSE = np.empty((len(thetas), len(lambdas)))
    
    # timing estimate
    print("Estimating running time...")
    t0 = time.time()
    validation_RMSE(x_train, y_train, x_valid, y_valid, thetas[0], lambdas[0], testing_runtime=True)
    print("Estimated running time: {}s".format(round((time.time()-t0)*len(thetas)*len(lambdas),2)))
    
    # start parameter tuning
    print("Beginning parameter tuning...")
    t0 = time.time()
    
    for i in range(len(thetas)):
        for j in range(len(lambdas)):
            RMSE[i,j] = validation_RMSE(x_train, y_train, x_valid, y_valid, thetas[i], lambdas[j])
            
    best_theta_idx, best_lambda_idx = np.unravel_index(np.argmin(RMSE), RMSE.shape)
    best_theta = thetas[best_theta_idx]
    best_lambda = lambdas[best_lambda_idx]
    print("Best: theta={}, lambda={}; with min RMSE={}".format(best_theta, best_lambda, round(RMSE[best_theta_idx,best_lambda_idx],6)))
    return RMSE, best_theta_idx, best_lambda_idx, best_theta, best_lambda

Run mauna_loa dataset:

In [9]:
x_train, x_valid, x_test, y_train, y_valid, y_test = load_dataset('mauna_loa')

RMSE, best_theta_idx, best_lambda_idx, best_theta, best_lambda = estimate_best_param(x_train, y_train, x_valid, y_valid, thetas, lambdas)

x_train = np.vstack([x_train, x_valid])
y_train = np.vstack([y_train, y_valid])
y_predict = RBF_Gaussian_kernel(x_train, y_train, x_test, best_theta, best_lambda)
RMSE_test = np.sqrt(np.mean(np.square(y_test-y_predict)))
print("RMSE of model on test data for theta={} and lambda={}: {}".format(best_theta, best_lambda, round(RMSE_test,6)))

Estimating running time...
Estimated running time: 0.32s
Beginning parameter tuning...
theta=0.05, lambda=0.001, RMSE=1.219709
theta=0.05, lambda=0.01, RMSE=1.117309
theta=0.05, lambda=0.1, RMSE=1.082018
theta=0.05, lambda=1, RMSE=1.092211
theta=0.1, lambda=0.001, RMSE=1.416286
theta=0.1, lambda=0.01, RMSE=1.059137
theta=0.1, lambda=0.1, RMSE=0.965908
theta=0.1, lambda=1, RMSE=0.996726
theta=0.5, lambda=0.001, RMSE=0.347101
theta=0.5, lambda=0.01, RMSE=0.42772
theta=0.5, lambda=0.1, RMSE=0.473741
theta=0.5, lambda=1, RMSE=0.606334
theta=1, lambda=0.001, RMSE=0.124479
theta=1, lambda=0.01, RMSE=0.229495
theta=1, lambda=0.1, RMSE=0.339112
theta=1, lambda=1, RMSE=0.443615
theta=2, lambda=0.001, RMSE=0.201705
theta=2, lambda=0.01, RMSE=0.252404
theta=2, lambda=0.1, RMSE=0.217145
theta=2, lambda=1, RMSE=0.24922
Best: theta=1, lambda=0.001; with min RMSE=0.124479
RMSE of model on test data for theta=1 and lambda=0.001: 0.149773


Run rosenbrock dataset:

In [10]:
x_train, x_valid, x_test, y_train, y_valid, y_test = load_dataset('rosenbrock', n_train=1000, d=2)

RMSE, best_theta_idx, best_lambda_idx, best_theta, best_lambda = estimate_best_param(x_train, y_train, x_valid, y_valid, thetas, lambdas)

x_train = np.vstack([x_train, x_valid])
y_train = np.vstack([y_train, y_valid])
y_predict = RBF_Gaussian_kernel(x_train, y_train, x_test, best_theta, best_lambda)
RMSE_test = np.sqrt(np.mean(np.square(y_test-y_predict)))
print("RMSE of model on test data for theta={} and lambda={}: {}".format(best_theta, best_lambda, round(RMSE_test,6)))

Estimating running time...
Estimated running time: 2.62s
Beginning parameter tuning...
theta=0.05, lambda=0.001, RMSE=0.735463
theta=0.05, lambda=0.01, RMSE=0.738913
theta=0.05, lambda=0.1, RMSE=0.752307
theta=0.05, lambda=1, RMSE=0.808103
theta=0.1, lambda=0.001, RMSE=0.626585
theta=0.1, lambda=0.01, RMSE=0.632028
theta=0.1, lambda=0.1, RMSE=0.647735
theta=0.1, lambda=1, RMSE=0.720521
theta=0.5, lambda=0.001, RMSE=0.351507
theta=0.5, lambda=0.01, RMSE=0.381007
theta=0.5, lambda=0.1, RMSE=0.419093
theta=0.5, lambda=1, RMSE=0.513313
theta=1, lambda=0.001, RMSE=0.257236
theta=1, lambda=0.01, RMSE=0.297407
theta=1, lambda=0.1, RMSE=0.358185
theta=1, lambda=1, RMSE=0.466635
theta=2, lambda=0.001, RMSE=0.19324
theta=2, lambda=0.01, RMSE=0.241027
theta=2, lambda=0.1, RMSE=0.311707
theta=2, lambda=1, RMSE=0.436547
Best: theta=2, lambda=0.001; with min RMSE=0.19324
RMSE of model on test data for theta=2 and lambda=0.001: 0.148124
