In [92]:
import numpy as np
import matplotlib.pyplot as plt
dataset = np.loadtxt('./tictac_multi.txt')

In [93]:
np.random.shuffle(dataset)

In [94]:
X = dataset[:, : 9 ]
Y = dataset[:, 9 : ]

In [95]:
print(X.shape)
print(Y.shape)

(6551, 9)
(6551, 9)


In [96]:
from sklearn import neighbors
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from numpy import arange
from numpy import argmax
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

In [97]:
# apply threshold to positive probabilities to create labels
def to_labels(pos_probs, threshold):
    return (pos_probs >= threshold).astype('int')

In [98]:
def KNNRegressor(train_X, test_X, train_Y, test_Y):
    print(test_X.shape)
    print(test_Y.shape)
    print(train_X.shape)
    print(train_Y.shape)
    
    accuracy = 0
    
    #Hyper Parameters Set
    #params = {'n_neighbors': [5,6,7,8,9,10]}
    params = {'n_neighbors':[5,6,7,8,9,10],
            'leaf_size':[1,2,3,5],
            'weights':['uniform', 'distance'],
            'algorithm':['auto', 'ball_tree','kd_tree','brute'],
            'n_jobs':[-1]}
    
    knn = neighbors.KNeighborsRegressor()
    #model = OneVsRestClassifier(knn)
    #Use GridSearch
    clf = GridSearchCV(knn, param_grid=params, n_jobs= -1)    
    #lf = knn
    clf.fit(train_X, train_Y)
    
    #The best hyper parameters set
    print("Best Hyper Parameters:\n",clf.best_params_)
    
    res = clf.predict(test_X)
    res_cm = res
    # define thresholds
    thresholds = arange(0, 1, 0.001)
    
    # evaluate each threshold
    scores = [f1_score(test_Y, to_labels(res, t), average = 'weighted') for t in thresholds]
    # get best threshold
    ix = argmax(scores)
    print('Threshold=%.3f, F-Score=%.5f' % (thresholds[ix], scores[ix]))
    threshold_res = np.where(res < thresholds[ix], 0, 1)
    print("########## Y_actual #########")
    print(test_Y)
    print("######### T_RES ############")
    print(threshold_res)
    
    accuracy = 0
#     for index in range(len(threshold_res)):
#         temp = accuracy_score(test_Y[index], threshold_res[index], normalize = False)
#         #print("Accuracy for each row:" + str(temp))
#         accuracy += temp
#     accuracy = accuracy / threshold_res.shape[0]

    print('Mean Absolute Error:', metrics.mean_absolute_error(test_Y, threshold_res))
    print('Mean Squared Error:', metrics.mean_squared_error(test_Y, threshold_res))
    mse = np.sqrt(metrics.mean_squared_error(test_Y, threshold_res))
    print('Root Mean Squared Error:', mse)
    
    accuracy = 100 - (mse * 100)
    print("Accuracy:" + str(accuracy))
    
    print("#### CONFUSION MATRIX ##########")
    rounded_labels = np.argmax(test_Y, axis = 1)
    cm = confusion_matrix(rounded_labels, res_cm.argmax(axis = 1))
    cm = cm / cm.sum(axis=1)[:, np.newaxis]
    print(cm)
    
    return accuracy, clf

In [99]:
from skmultilearn.model_selection import IterativeStratification
accuracy_max = 0.0
accuracy_min = 100.0
accuracy_avg = 0.0

best_clf = 0

k_cross_fold = 10

skf = IterativeStratification(n_splits = k_cross_fold)

for train_index, test_index in skf.split(X, Y):
    train_X, test_X = X[train_index], X[test_index]
    train_Y, test_Y = Y[train_index], Y[test_index]
    
    accuracy, clf = KNNRegressor(train_X, test_X, train_Y, test_Y)
    if(accuracy > accuracy_max):
        accuracy_max = accuracy
        best_clf = clf
    accuracy_min = min(accuracy_min, accuracy)
    accuracy_avg += accuracy
    #print(accuracy_avg)
    print("#######################")
    
print("Max accuracy = " + str(accuracy_max))
print("Min accuracy = " + str(accuracy_min))
print("Avg accuracy = " + str(accuracy_avg/k_cross_fold))

(656, 9)
(656, 9)
(5895, 9)
(5895, 9)
Best Hyper Parameters:
 {'algorithm': 'brute', 'leaf_size': 1, 'n_jobs': -1, 'n_neighbors': 10, 'weights': 'distance'}
Threshold=0.501, F-Score=0.92127
########## Y_actual #########
[[0. 0. 0. ... 1. 1. 0.]
 [1. 0. 1. ... 0. 1. 0.]
 [1. 0. 0. ... 1. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
######### T_RES ############
[[0 0 0 ... 1 1 0]
 [1 0 1 ... 0 1 0]
 [1 0 0 ... 1 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Mean Absolute Error: 0.03218157181571816
Mean Squared Error: 0.03218157181571816
Root Mean Squared Error: 0.17939222897248966
Accuracy:82.06077710275103
#### CONFUSION MATRIX ##########
[[0.85987261 0.         0.03184713 0.00636943 0.03184713 0.00636943
  0.04458599 0.00636943 0.01273885]
 [0.         0.73563218 0.02298851 0.04597701 0.03448276 0.02298851
  0.03448276 0.03448276 0.06896552]
 [0.         0.         0.87096774 0.         0.03225806 0.02150538
  0.01075269 



Best Hyper Parameters:
 {'algorithm': 'brute', 'leaf_size': 1, 'n_jobs': -1, 'n_neighbors': 10, 'weights': 'distance'}
Threshold=0.482, F-Score=0.92741
########## Y_actual #########
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 ...
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 1. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]
######### T_RES ############
[[0 1 0 ... 1 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]
 ...
 [0 1 0 ... 0 0 0]
 [0 0 1 ... 1 0 0]
 [0 1 0 ... 0 0 0]]
Mean Absolute Error: 0.030514096185737978
Mean Squared Error: 0.030514096185737978
Root Mean Squared Error: 0.17468284456619654
Accuracy:82.53171554338034
#### CONFUSION MATRIX ##########
[[0.88461538 0.         0.02564103 0.         0.01923077 0.01923077
  0.02564103 0.01923077 0.00641026]
 [0.         0.82352941 0.04705882 0.01176471 0.02352941 0.
  0.03529412 0.02352941 0.03529412]
 [0.01030928 0.         0.86597938 0.03092784 0.02061856 0.05154639
  0.02061856 0.         0.        ]
 [0.         0.034482



Best Hyper Parameters:
 {'algorithm': 'brute', 'leaf_size': 1, 'n_jobs': -1, 'n_neighbors': 10, 'weights': 'distance'}
Threshold=0.500, F-Score=0.94275
########## Y_actual #########
[[0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 1. 0. 0.]
 ...
 [0. 1. 0. ... 1. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
######### T_RES ############
[[0 0 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [1 0 0 ... 1 0 0]
 ...
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Mean Absolute Error: 0.024907260201377845
Mean Squared Error: 0.024907260201377845
Root Mean Squared Error: 0.1578203415323191
Accuracy:84.2179658467681
#### CONFUSION MATRIX ##########
[[0.91082803 0.         0.01273885 0.01273885 0.01273885 0.01273885
  0.00636943 0.00636943 0.02547771]
 [0.         0.78823529 0.02352941 0.03529412 0.02352941 0.01176471
  0.07058824 0.         0.04705882]
 [0.         0.         0.94623656 0.         0.01075269 0.
  0.03225806 0.         0.01075269]
 [0.         0.        



Best Hyper Parameters:
 {'algorithm': 'brute', 'leaf_size': 1, 'n_jobs': -1, 'n_neighbors': 10, 'weights': 'distance'}
Threshold=0.534, F-Score=0.92431
########## Y_actual #########
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 1. 0. 0.]]
######### T_RES ############
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 1 0 0]
 [0 0 1 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 1]
 [1 0 0 ... 0 0 0]
 [0 0 0 ... 1 0 0]]
Mean Absolute Error: 0.03232941977199252
Mean Squared Error: 0.03232941977199252
Root Mean Squared Error: 0.17980383692233187
Accuracy:82.01961630776681
#### CONFUSION MATRIX ##########
[[0.85350318 0.         0.01273885 0.01910828 0.03184713 0.00636943
  0.01910828 0.02547771 0.03184713]
 [0.01204819 0.87951807 0.03614458 0.         0.04819277 0.
  0.01204819 0.         0.01204819]
 [0.00925926 0.         0.91666667 0.01851852 0.02777778 0.
  0.01851852 0.         0.00925926]
 [0.         0.         0.     



Best Hyper Parameters:
 {'algorithm': 'brute', 'leaf_size': 1, 'n_jobs': -1, 'n_neighbors': 10, 'weights': 'distance'}
Threshold=0.500, F-Score=0.93470
########## Y_actual #########
[[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 ...
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 1.]
 [1. 0. 1. ... 0. 0. 1.]]
######### T_RES ############
[[0 0 0 ... 0 0 0]
 [1 0 0 ... 0 1 0]
 [0 0 0 ... 0 0 1]
 ...
 [0 1 0 ... 0 0 0]
 [0 0 1 ... 0 0 1]
 [1 0 1 ... 0 0 1]]
Mean Absolute Error: 0.028774484312705843
Mean Squared Error: 0.028774484312705843
Root Mean Squared Error: 0.16963043451192902
Accuracy:83.0369565488071
#### CONFUSION MATRIX ##########
[[0.82802548 0.01910828 0.03184713 0.01273885 0.04458599 0.01273885
  0.02547771 0.01273885 0.01273885]
 [0.         0.83333333 0.03571429 0.02380952 0.04761905 0.
  0.03571429 0.         0.02380952]
 [0.01041667 0.02083333 0.875      0.         0.08333333 0.
  0.         0.         0.01041667]
 [0.03636364 0.         0.    



Best Hyper Parameters:
 {'algorithm': 'brute', 'leaf_size': 1, 'n_jobs': -1, 'n_neighbors': 10, 'weights': 'distance'}
Threshold=0.425, F-Score=0.91663
########## Y_actual #########
[[1. 1. 0. ... 1. 0. 0.]
 [0. 1. 0. ... 1. 1. 0.]
 [1. 0. 0. ... 0. 0. 1.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 1. 0. 0.]]
######### T_RES ############
[[1 1 0 ... 1 0 0]
 [0 1 0 ... 1 1 0]
 [0 0 0 ... 0 0 1]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [1 0 0 ... 1 0 0]]
Mean Absolute Error: 0.03533859303090072
Mean Squared Error: 0.03533859303090072
Root Mean Squared Error: 0.18798561921301513
Accuracy:81.20143807869849
#### CONFUSION MATRIX ##########
[[0.87820513 0.00641026 0.00641026 0.00641026 0.02564103 0.01282051
  0.02564103 0.         0.03846154]
 [0.         0.82222222 0.         0.04444444 0.05555556 0.03333333
  0.03333333 0.01111111 0.        ]
 [0.         0.         0.94117647 0.         0.00980392 0.
  0.01960784 0.00980392 0.01960784]
 [0.         0.        