In [None]:
from models import *

import numpy as np
import pandas as pd
import math

In [None]:
# Data paths
data_path = 'data/TwinsUK.xls'

# Load data
tw_train_data = pd.read_excel(data_path, sheet_name='Training Set')
tw_test_data = pd.read_excel(data_path, sheet_name='Testing Set')

twins_data = pd.concat([tw_train_data, tw_test_data], ignore_index = True)

In [None]:
latent_dim=50
def mse_scorer(latent_dim, kernel, alpha, gamma, coef0, degree, X_train, X_test):
    '''Takes as input a latent dimension an and all hyperparameters of the KPCA
    model (including kernel type) and returns a list containing the mean squared
    errors of the KPCA model on the Twins train and test data'''

    KPCA_model_= KPCA_model(X_train, latent_dim, kernel, alpha, gamma, coef0, degree)
    train_pred = KPCA_model_.reconstruct(X_train)
    test_pred = KPCA_model_.reconstruct(X_test)
    train_error = math.sqrt(np.mean((X_train - train_pred)**2))
    test_error = math.sqrt(np.mean((X_test - test_pred)**2))
    return (train_error,test_error)

In [None]:
# Determine the best hyperparameters for the cosine KPCA kernel
alpha_vals = [1,2,3,4,5]
cosine_min_score = float("Inf")
cosine_alpha = 0
for i in (alpha_vals):
    score = mse_scorer(latent_dim, "cosine", i, 0, 0, 0, tw_train_data.values,tw_test_data.values)[1]
    if score < cosine_min_score:
        cosine_min_score = score
        cosine_alpha = i

In [None]:
# Determine the best hyperparameters for the RBF KPCA kernel
alpha_vals = [1,2,3,4]
gamma_vals = [0.001, 0.005,0.01,0.05,0.1]

rbf_min_score = float("Inf")
rbf_params = []

for i in alpha_vals:
    for j in gamma_vals:
        score = mse_scorer(latent_dim, "rbf", i, j, 0, 0, tw_train_data.values,tw_test_data.values)[1]
        if score < rbf_min_score:
                rbf_min_score = score
                rbf_params = [i,j]

In [None]:
# Determine the best hyperparameters for the sigmoid KPCA kernel
alpha_vals = [1,2,3,4,5]
gamma_vals = [0.001, 0.005,0.01,0.05,0.1]
coef0_vals = [0,1,2,3]

sigmoid_min_score = float("Inf")
sigmoid_params = []
for i in alpha_vals:
    for j in gamma_vals:
        for k in coef0_vals:
            try:
                score = mse_scorer(latent_dim, "sigmoid", i, j, k, 0, tw_train_data.values,tw_test_data.values)[1]
                if score < sigmoid_min_score:
                        sigmoid_min_score = score
                        sigmoid_params = [i,j,k]
            except: break

In [None]:
# Determine the best hyperparameters for the polynomial KPCA kernel
alpha_vals = [1,2,3,4,5]
gamma_vals = [0.001, 0.005,0.01,0.05,0.1]
coef0_vals = [0,1,2,3]
degree_vals = [2.0, 3.0, 4.0, 5.0,6.0, 7.0]

poly_min_score = float("Inf")
poly_params = []

for i in alpha_vals:
    for j in gamma_vals:
        for k in coef0_vals:
            for m in degree_vals:
                try:
                    score = mse_scorer(latent_dim, "poly", i, j, k, m, tw_train_data.values,tw_test_data.values)[1]
                    if score < poly_min_score:
                        poly_min_score = score
                        poly_params = [i,j,k,m]
                except: break

In [None]:
print('================= Cosine Kernel Parameters ========================')
print('Best testing score: ', cosine_min_score)
print('Alpha: \t Gamma: \t Coef0: \t  Degree:')
print(cosine_alpha, '\t  NA \t \t NA \t \t NA')

print('================= RBF Kernel Parameters========================')
print('Best testing score: ', rbf_min_score)
print('Alpha: \t Gamma: \t Coef0: \t  Degree:')
print(rbf_params[0], '\t', rbf_params[1],'\t \t NA \t \t NA')

print('================= Sigmoid Kernel Parameters========================')
print('Best testing score: ', sigmoid_min_score)
print('Alpha: \t Gamma: \t Coef0: \t  Degree:')
print(sigmoid_params[0], '\t', sigmoid_params[1],'\t \t', sigmoid_params[2],'\t \t NA')

print('================= Polynomial Kernel Parameters========================')
print('Best testing score: ', poly_min_score)
print('Alpha: \t Gamma: \t Coef0: \t  Degree:')
print(poly_params[0], '\t', poly_params[1],'\t \t', poly_params[2],'\t \t', poly_params[3])