In [14]:
import pandas as pd

from sklearn.metrics.classification import accuracy_score
from sklearn.gaussian_process import GaussianProcessClassifier as GPC
from sklearn.gaussian_process.kernels import RBF, DotProduct as DP

from sklearn.model_selection import KFold
from random import *

import warnings; warnings.simplefilter('ignore')

#  Import Data

In [15]:
train = pd.read_csv('/Users/chuyiyu/Desktop/UCI HAR Dataset/train.csv')
test = pd.read_csv('/Users/chuyiyu/Desktop/UCI HAR Dataset/test.csv')

### separating data inputs and output lables

In [5]:
trainData  = train.drop('Activity', axis=1).values
trainLabel = train.Activity.values
testData  = test.drop('Activity', axis=1).values
testLabel = test.Activity.values

# Gaussian Process Classification

### Instanciate a Gaussian Process model using RBF kernel

In [6]:
kernel_rbf = 1.0 * RBF(length_scale=1.0)

In [5]:
gpc_rbf = GPC(kernel=kernel_rbf).fit(trainData, trainLabel)

### Instanciate a Gaussian Process model using Dot Product kernel

In [7]:
kernel_dp = 1.0 * DP(sigma_0=0)

In [6]:
gpc_dp = GPC(kernel=kernel_dp).fit(trainData, trainLabel)

### Log Marginal Likelihood on Training Set

In [7]:
print("Log Marginal Likelihood : %.3f (RBF) %.3f (Dot Product)"
      % (gpc_rbf.log_marginal_likelihood(gpc_rbf.kernel_.theta),
      gpc_dp.log_marginal_likelihood(gpc_dp.kernel_.theta)))

Log Marginal Likelihood : -218.409 (RBF) -226.873 (Dot Product)


### Prediction Accuracy on Test Set

In [8]:
print("Accuracy (RBF): %.3f (train data) %.3f (test data)"
      % (accuracy_score(trainLabel, gpc_rbf.predict(trainData)),
         accuracy_score(testLabel, gpc_rbf.predict(testData))))

print("Accuracy (Dot Product): %.3f (train data) %.3f (test data)"
      % (accuracy_score(trainLabel, gpc_dp.predict(trainData)),
         accuracy_score(testLabel, gpc_dp.predict(testData))))

Accuracy (RBF): 0.999 (train data) 0.955 (test data)
Accuracy (Dot Product): 0.996 (train data) 0.961 (test data)


# Compare the Predictions using RBF and Dot Product Kernels via 10-fold Cross-validation with Log Marginal Likelihood as Error Measure

### Merging the training data and test data

In [8]:
fulldata = train.append(test)
fulldata_X = fulldata.drop('Activity', axis=1).values
fulldata_y = fulldata.Activity.values

### Cross-validation

In [20]:
n_samples, n_features = fulldata_X.shape
items = list(range(1, n_samples+1))
index = sample(items, 500) 

n_splits=10
val_acc_rbf, val_acc_dp=0, 0
cv = KFold(n_splits=n_splits)
for train, test in cv.split(fulldata_X[index] , fulldata_y[index]):
    cv_rbf=GPC(kernel=kernel_rbf).fit(fulldata_X[index][train], fulldata_y[index][train])
    cv_dp=GPC(kernel=kernel_dp).fit(fulldata_X[index][train], fulldata_y[index][train])
    val_acc_rbf += accuracy_score(fulldata_y[index][test], cv_rbf.predict(fulldata_X[index][test]))
    val_acc_dp += accuracy_score(fulldata_y[index][test], cv_dp.predict(fulldata_X[index][test]))

val_acc_rbf /= n_splits
val_acc_dp /= n_splits
print("Accuracy: %.3f (RBF) %.3f (Dot Product)" % (val_acc_rbf,val_acc_dp))

Accuracy: 0.902 (RBF) 0.916 (Dot Product)
