In [37]:
import warnings
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix

In [38]:
warnings.filterwarnings("ignore")

(25000, 60) (25000, 5)


In [39]:
# Use K_Fold to seperate the training set into Nf parts, take Nf-1 parts as Estimation set
# Take the remaining 1 as Validation set
def K_Fold_Seperation(Xtrain,Ytrain,Nf):
    "Separate the Training Data into Nf Folds and Generate Estimation and Validation Sets"
    x_est_def = []
    x_val_def = []
    y_est_def = []
    y_val_def = []

    kf = KFold(n_splits=Nf)
    for train,test in kf.split(Xtrain):
        x_est_def.append(Xtrain[train])
        x_val_def.append(Xtrain[test])
        y_est_def.append(Ytrain[train])
        y_val_def.append(Ytrain[test])

    x_est_def = np.array(x_est_def)
    x_val_def = np.array(x_val_def)
    y_est_def = np.array(y_est_def)
    y_val_def = np.array(y_val_def)
    return x_est_def, x_val_def, y_est_def, y_val_def


In [41]:
x_est_def, x_val_def, y_est_def, y_val_def = K_Fold_Seperation(x_train_def, y_train_def, 5)
print(x_est_def.shape, x_val_def.shape, y_est_def.shape, y_val_def.shape)
print(x_test_def.shape, y_test_def.shape)

(5, 800, 60) (5, 200, 60) (5, 800) (5, 200)
(1000, 60) (1000,)


In [42]:
# Define parameter candidates to get the optimal parameters
# Return the optimal parameters for each algorithm
svm_parameters = {'kernel':('linear', 'poly', 'rbf'), 'gamma':[0.125, 0.5, 1, 4], 'decision_function_shape':['ovo','ovr'],
                  'tol':[1e-3, 5e-3, 1e-2]}
def svm_cv(x_train, y_train,
                 svm_parameters, Nf):
    """Trains a Support Vector Machine Classifer"""
    # Create Model with passed hyperparameters
    svc = SVC()
    cvsvm = GridSearchCV(svc, svm_parameters, cv=Nf)
    
    # Train Model 
    cvsvm.fit(x_train,y_train)
    
    # Return Trained Model and Accuracy on Test Data
    return cvsvm


rvm_parameters = {'kernel':('linear', 'poly', 'rbf'), 'degree':[2, 3], 'n_iter':[100, 200], 'tol':[0.001,0.005]}
def rvm_cv(x_train,y_train,
                 rvm_parameters, Nf):
    """Trains a Relevance Vector Machine Classifier"""
    
    rvm = RVC()
    cvrvm = GridSearchCV(rvm, rvm_parameters, cv=Nf)
    
    # Train Model 
    cvrvm.fit(x_train,y_train)    
    
    # Return Trained Model and Accuracy on Test Data
    return cvrvm


gp_parameters = {'n_restarts_optimizer':[0,1], 'max_iter_predict':[50, 100], 'warm_start':('True','False'),
                'multi_class':('one_vs_rest','one_vs_one')}
def gp_cv(x_train,y_train,
                gp_parameters, Nf):
    """Trains a Gaussian Process Classifier"""
    
    gpc = GaussianProcessClassifier()
    cvgpc = GridSearchCV(gpc, gp_parameters, cv=Nf)
    
    # Train Model 
    cvgpc.fit(x_train, y_train)

    # Return Trained Model and Accuracy on Test Data
    return cvgpc

In [43]:
# Use cross validation to get the optimal parameters and hyper-parameters
# Use the trained model to get confusion-matrix of each validation set
# And the confusion-matrix for the whole training set
# Return Ytrain, EstParameters, EstConfMatrices, and ConfMatrix
def MyCrossValidate(XTrain, YTrain, Nf):
    cvsvm = svm_cv(XTrain, YTrain, svm_parameters, Nf)
    cvrvm = rvm_cv(XTrain, YTrain, rvm_parameters, Nf)
    cvgpc = gp_cv(XTrain, YTrain, gp_parameters, Nf)
    
    SVMEstConfMat = []
    SVMY_Pred = []
    RVMEstConfMat = []
    RVMY_Pred = []
    GPEstConfMat = []
    GPY_Pred = []
    
    for i in range(x_val_def.shape[0]):
        svmtemp_pred = cvsvm.predict(x_val_def[i])
        svmtemp_conf = confusion_matrix(y_val_def[i], svmtemp_pred)
        rvmtemp_pred = cvrvm.predict(x_val_def[i])
        rvmtemp_conf = confusion_matrix(y_val_def[i], rvmtemp_pred)
        gptemp_pred = cvgpc.predict(x_val_def[i])
        gptemp_conf = confusion_matrix(y_val_def[i], gptemp_pred)
        
        SVMEstConfMat.append(svmtemp_conf)
        SVMY_Pred.append(svmtemp_pred)
        RVMEstConfMat.append(rvmtemp_conf)
        RVMY_Pred.append(rvmtemp_pred)
        GPEstConfMat.append(gptemp_conf)
        GPY_Pred.append(gptemp_pred)
        
    SVMEstConfMat = np.array(SVMEstConfMat)
    SVMY_Pred = np.array(SVMY_Pred)
    RVMEstConfMat = np.array(RVMEstConfMat)
    RVMY_Pred = np.array(RVMY_Pred)
    GPEstConfMat = np.array(GPEstConfMat)
    GPY_Pred = np.array(GPY_Pred)
    
    YTrue = y_val_def.flatten()
    SVM_All_pred = SVMY_Pred.flatten()    
    SVMConfMatrix = confusion_matrix(YTrue, SVM_All_pred)
    RVM_All_pred = RVMY_Pred.flatten()
    RVMConfMatrix = confusion_matrix(YTrue, RVM_All_pred)
    GP_All_pred = GPY_Pred.flatten()
    GPConfMatrix = confusion_matrix(YTrue, GP_All_pred)
    
    return SVMY_Pred,RVMY_Pred,GPY_Pred,cvsvm.best_params_,cvrvm.best_params_,cvgpc.best_params_,\
SVMEstConfMat,RVMEstConfMat,GPEstConfMat,SVMConfMatrix,RVMConfMatrix,GPConfMatrix


In [44]:
# Display the info acquired through Cross Validation
# Display EstParameters, EstConfMatrices, and ConfMatrix
def Dis_CV_Info(SVM_best_params,RVM_best_params,GPR_best_params,
SVMEstConfMat,RVMEstConfMat,GPEstConfMat,SVMConfMatrix,RVMConfMatrix,GPConfMatrix):
    print('The parameters of the best SVM model are: ')
    print(SVM_best_params)
    print('The parameters of the best RVM model are: ')
    print(RVM_best_params)
    print('The parameters of the best GPR model are: ')
    print(GPR_best_params)
    
    for i in range(x_val_def.shape[0]):
        print('The Confusion Matrix For Fold %d is: ' % i)
        print('SVM:')
        print(SVMEstConfMat[i])
        print('RVM:')
        print(RVMEstConfMat[i])
        print('GPR:')
        print(GPEstConfMat[i])

    print('The Overall Confusion Matrix is: ')
    print('SVM:')
    print(SVMConfMatrix)
    print('RVM:')
    print(RVMConfMatrix)
    print('GPR:')
    print(GPConfMatrix)
    return 1

In [45]:
# SVM_Y,RVM_Y,GPR_Y corresponds to Ytrain
# SVM_best_params,RVM_best_params,GPR_best_params corresponds to EstParameters
# SVMEstConfMat,RVMEstConfMat,GPEstConfMat corresponds to EstConfMatrices
# SVMConfMatrix,RVMConfMatrix,GPConfMatrix corresponds to ConfMatrix

SVM_Y,RVM_Y,GPR_Y,SVM_best_params,RVM_best_params,GPR_best_params,\
SVMEstConfMat,RVMEstConfMat,GPEstConfMat,SVMConfMatrix,RVMConfMatrix,GPConfMatrix\
= MyCrossValidate(x_train_def, y_train_def, 5)

Dis_CV_Info(SVM_best_params,RVM_best_params,GPR_best_params,
SVMEstConfMat,RVMEstConfMat,GPEstConfMat,SVMConfMatrix,RVMConfMatrix,GPConfMatrix)

The parameters of the best SVM model are: 
{'decision_function_shape': 'ovo', 'gamma': 4, 'kernel': 'poly', 'tol': 0.001}
The parameters of the best RVM model are: 
{'degree': 2, 'kernel': 'rbf', 'n_iter': 100, 'tol': 0.001}
The parameters of the best GPR model are: 
{'max_iter_predict': 50, 'multi_class': 'one_vs_rest', 'n_restarts_optimizer': 0, 'warm_start': 'True'}
The Confusion Matrix For Fold 0 is: 
SVM:
[[22  0  0  0  0  0]
 [ 0 28  0  0  0  0]
 [ 0  0 25  0  0  0]
 [ 0  0  0 32  0  0]
 [ 0  0  0  0 30  0]
 [ 0  0  0  0  0 63]]
RVM:
[[20  0  0  0  2  0]
 [ 2 16  0  9  1  0]
 [ 5  1 13  2  4  0]
 [ 5  8  0 19  0  0]
 [ 4  3  6  5 12  0]
 [ 0  0  0  0  0 63]]
GPR:
[[22  0  0  0  0  0]
 [ 0 28  0  0  0  0]
 [ 1  1 19  2  2  0]
 [ 1  1  0 30  0  0]
 [ 0  0  1  1 28  0]
 [ 0  0  0  0  0 63]]
The Confusion Matrix For Fold 1 is: 
SVM:
[[38  0  0  0  0  0]
 [ 0 31  0  0  0  0]
 [ 0  0 28  0  0  0]
 [ 0  0  0 22  0  0]
 [ 0  0  0  0 27  0]
 [ 0  0  0  0  0 54]]
RVM:
[[29  2  2  1  4  0]


1