In [137]:
import scipy.io as scio
import matplotlib.pyplot as plt
import numpy as np
import random
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.metrics import confusion_matrix
from skbayes.rvm_ard_models import RVC

In [138]:
train_data = scio.loadmat('Proj2FeatVecsSet1.mat')['Proj2FeatVecsSet1']
output_labels = scio.loadmat('Proj2TargetOutputsSet1.mat')['Proj2TargetOutputsSet1']
print(train_data.shape, output_labels.shape)

(25000, 60) (25000, 5)


In [139]:
def encode_output_labels(output_labels=output_labels):
    "Encodes output labels to a single value"
    out_labels = []
    for row in output_labels:
        out_labels.append(np.where(row == 1)[0][0])
    return np.array(out_labels)

def generate_unknown_class_data(entries=10000):
    "Returns Data and Output Labels for Unknown Class(Nc+1)"
    unknown_class_data = []
    for i in range(entries):
        rand = []
        for j in range(train_data.shape[1]):
            rand.append(random.random())
        unknown_class_data.append(rand)
    unknown_class_label = np.full((10000), 5, dtype=int)
    return np.array(unknown_class_data), unknown_class_label

def K_Fold_Seperation(Xtrain,Ytrain,Nf):
    "Separate the Training Data into Nf Folds and Generate Estimation and Validation Sets"
    x_est_def = []
    x_val_def = []
    y_est_def = []
    y_val_def = []

    kf = KFold(n_splits=Nf)
    for train,test in kf.split(Xtrain):
        x_est_def.append(Xtrain[train])
        x_val_def.append(Xtrain[test])
        y_est_def.append(Ytrain[train])
        y_val_def.append(Ytrain[test])

    x_est_def = np.array(x_est_def)
    x_val_def = np.array(x_val_def)
    y_est_def = np.array(y_est_def)
    y_val_def = np.array(y_val_def)
    return x_est_def, x_val_def, y_est_def, y_val_def


In [140]:
# Encode Output Lables to a single value
out_labels = encode_output_labels(output_labels=output_labels)

# Generate Data for Training Unknown Class Nc+1
unknown_class_data, unknown_class_labels = generate_unknown_class_data(entries=10000)

# Merge Actual and Generated Class Nc+1 Training Data and
train_data = np.concatenate((train_data, unknown_class_data))
out_labels = np.concatenate((out_labels, unknown_class_labels))

print(train_data.shape, out_labels.shape)

(35000, 60) (35000,)


In [141]:
x_train_def, x_test_def, y_train_def, y_test_def = train_test_split(train_data, out_labels,
                                                                   test_size=0.33,
                                                                   shuffle=True)
x_train_def = x_train_def[:1000]
x_test_def = x_test_def[:1000]
y_train_def = y_train_def[:1000]
y_test_def = y_test_def[:1000]

x_est_def, x_val_def, y_est_def, y_val_def = K_Fold_Seperation(x_train_def, y_train_def, 5)
print(x_est_def.shape, x_val_def.shape, y_est_def.shape, y_val_def.shape)

(5, 800, 60) (5, 200, 60) (5, 800) (5, 200)


In [142]:
svm_parameters = {'kernel':('linear','rbf'), 'gamma':[0.125, 0.25, 0.5 ,1, 2, 4], 'decision_function_shape':['ovo','ovr']}
def svm_classify(x_train, y_train,
                 svm_parameters, Nf):
    """Trains a Support Vector Machine Classifer"""
    # Create Model with passed hyperparameters
    svc = SVC()
    cvsvm = GridSearchCV(svc, svm_parameters, cv=Nf)
    
    # Train Model 
    cvsvm.fit(x_train,y_train)
    
    # Return Trained Model and Accuracy on Test Data
    return cvsvm


rvm_parameters = {'kernel':('linear', 'rbf'), 'n_iter':[90, 100]}
def rvm_classify(x_train,y_train,
                 rvm_parameters, Nf):
    """Trains a Relevance Vector Machine Classifier"""
    
    rvm = RVC()
    cvrvm = GridSearchCV(rvm, rvm_parameters, cv=Nf)
    
    # Train Model 
    cvrvm.fit(x_train,y_train)    
    
    # Return Trained Model and Accuracy on Test Data
    return cvrvm


gp_parameters = {'n_restarts_optimizer':[0,1], 'max_iter_predict':[80, 90, 100], 'warm_start':('True','False'),
                'multi_class':('one_vs_rest','one_vs_one')}
def gp_classify(x_train,y_train,
                gp_parameters, Nf):
    """Trains a Gaussian Process Classifier"""
    
    gpc = GaussianProcessClassifier()
    cvgpc = GridSearchCV(gpc, gp_parameters, cv=Nf)
    
    # Train Model 
    cvgpc.fit(x_train, y_train)
    
    y_pred = cvgpc.predict(x_train)
    print('The Predicted Labels are: ')
    print(y_pred)

    # Return Trained Model and Accuracy on Test Data
    return cvgpc

In [143]:
def MyCrossValidate(XTrain, YTrain, Nf):
    cvsvm = svm_classify(XTrain, YTrain, svm_parameters, Nf)
    print('The parameters of the best SVM model are: ')
    print(cvsvm.best_params_)
    
    SVMEstConfMat = []
    for i in range(x_val_def.shape[0]):
        temp_pred = cvsvm.predict(x_val_def[i])
        temp_conf = confusion_matrix(temp_pred, y_val_def[i])
        print('The Confusion Matrix For Fold %d is: ' % i)
        print(temp_conf)
        SVMEstConfMat.append(temp_conf)
    SVMEstConfMat = np.array(SVMEstConfMat)
    
    y_pred = cvsvm.predict(XTrain)
    SVMConfMatrix = confusion_matrix(YTrain, y_pred)
    print('The Overall Confusion Matrix is: ')
    print(SVMConfMatrix)

    #cvrvm = rvm_classify(x_train_def, y_train_def, x_test_def, y_test_def, rvm_parameters)
    #print('The parameters of the best RVM model are: ')
    #print(cvrvm.best_params_)

    #cvgpc = gp_classify(x_train_def, y_train_def, x_test_def, y_test_def, gp_parameters)
    #print('The parameters of the best GP model are: ')
    #print(cvgpc.best_params_)
    
    #return cvsvm.best_params_,cvrvm.best_params_,cvgpc.best_params_
    #return cvsvm.best_params_,cvgpc.best_params_
    return cvsvm.best_params_


In [144]:
#svm_best_params, rvm_best_params, gp_best_params = MyCrossValidate(x_train_def, y_train_def, 5)
#svm_best_params, gp_best_params = MyCrossValidate(x_train_def, y_train_def, 5)
svm_best_params = MyCrossValidate(x_train_def, y_train_def, 5)

The parameters of the best SVM model are: 
{'decision_function_shape': 'ovo', 'gamma': 0.5, 'kernel': 'rbf'}
The Confusion Matrix For Fold 0 is: 
[[29  1  2  0  0  0]
 [ 0 29  0  0  0  0]
 [ 0  0 25  0  0  0]
 [ 0  3  0 25  0  0]
 [ 0  0  0  0 34  0]
 [ 0  0  0  0  0 52]]
The Confusion Matrix For Fold 1 is: 
[[29  0  0  0  2  0]
 [ 0 25  0  2  0  0]
 [ 0  0 25  0  0  0]
 [ 0  2  1 26  1  0]
 [ 0  0  0  0 31  0]
 [ 0  0  0  0  0 56]]
The Confusion Matrix For Fold 2 is: 
[[28  1  0  0  0  0]
 [ 0 22  0  1  0  0]
 [ 0  0 22  1  1  0]
 [ 0  2  1 28  0  0]
 [ 1  0  0  1 30  0]
 [ 0  0  0  0  0 61]]
The Confusion Matrix For Fold 3 is: 
[[26  1  0  0  0  0]
 [ 0 18  1  0  0  0]
 [ 0  0 29  0  0  0]
 [ 0  2  1 35  1  0]
 [ 0  0  0  0 26  0]
 [ 0  0  0  0  0 60]]
The Confusion Matrix For Fold 4 is: 
[[29  1  0  0  0  0]
 [ 0 32  0  0  0  0]
 [ 0  0 29  0  1  0]
 [ 0  0  0 29  1  0]
 [ 0  0  2  0 23  0]
 [ 0  0  0  0  0 53]]
The Overall Confusion Matrix is: 
[[141   0   0   0   1   0]
 [  4 126 