In [1]:
import numpy as np
dataset = np.loadtxt('./tictac_multi.txt')

In [2]:
np.random.shuffle(dataset)

In [3]:
X = dataset[:, : 9 ]
Y = dataset[:, 9 : ]

In [4]:
print(X.shape)
print(Y.shape)

(6551, 9)
(6551, 9)


In [5]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.multiclass import OneVsRestClassifier

In [6]:
def KNN(train_X, test_X, train_Y, test_Y):
    print(test_X.shape)
    print(test_Y.shape)
    print(train_X.shape)
    print(train_Y.shape)
    
    #Hyper Parameters Set
    params = {'n_neighbors':[5,6,7,8,9,10],
          'leaf_size':[1,2,3,5],
          'weights':['uniform', 'distance'],
          'algorithm':['auto', 'ball_tree','kd_tree','brute'],
          'n_jobs':[-1]}
    
    knn =  KNeighborsClassifier(n_jobs = -1)
    
    #Use GridSearch
    clf = GridSearchCV(knn, param_grid=params, n_jobs=1)
    
    clf.fit(train_X, train_Y)
    
    #The best hyper parameters set
    print("Best Hyper Parameters:\n",clf.best_params_)
    
    res = clf.predict(test_X)
    res_cm = res
    #print("res:" + str(res.shape))
    res = np.asarray(res)
    res = np.reshape(res, (-1, test_Y.shape[0]))
    accuracy = 0.0
    for i in range(res.shape[1]):
        if(res[0][i] == test_Y[i][0]):
            accuracy += 1
    #print(accuracy)
    print("Accuracy = " + str(accuracy/res.shape[1]))
    print("#### CONFUSION MATRIX ##########")
    rounded_labels = np.argmax(test_Y, axis = 1)
    cm = confusion_matrix(rounded_labels, res_cm.argmax(axis = 1))
    cm = cm / cm.sum(axis=1)[:, np.newaxis] # normalizing the confusion matrix
    print(cm)
    return accuracy/res.shape[1], clf

In [7]:
from skmultilearn.model_selection import IterativeStratification
accuracy_max = 0.0
accuracy_min = 100.0
accuracy_avg = 0.0

best_clf = 0

k_cross_fold = 10

skf = IterativeStratification(n_splits = k_cross_fold)
for train_index, test_index in skf.split(X, Y):
    #print(str(train_index[0]) + " and " + str(train_index[-1])  + " and " + "shape = " + str(train_index.shape))
    #print(str(test_index[0]) + " and " + str(test_index[-1])   + " and " + "shape = " + str(test_index.shape))
    train_X, test_X = X[train_index], X[test_index]
    train_Y, test_Y = Y[train_index], Y[test_index]
    
    accuracy, clf = KNN(train_X, test_X, train_Y, test_Y)
    accuracy *= 100.0
    
    if(accuracy > accuracy_max):
        accuracy_max = accuracy
        best_clf = clf
    accuracy_min = min(accuracy_min, accuracy)
    accuracy_avg += accuracy
    
    print("#######################")
    
print("Max accuracy = " + str(accuracy_max))
print("Min accuracy = " + str(accuracy_min))
print("Avg accuracy = " + str(accuracy_avg/k_cross_fold))

(641, 9)
(641, 9)
(5910, 9)
(5910, 9)
Best Hyper Parameters:
 {'algorithm': 'ball_tree', 'leaf_size': 1, 'n_jobs': -1, 'n_neighbors': 10, 'weights': 'distance'}
Accuracy = 0.6583463338533542
#### CONFUSION MATRIX ##########
[[0.96153846 0.         0.01282051 0.00641026 0.01282051 0.
  0.         0.00641026 0.        ]
 [0.         0.87640449 0.01123596 0.         0.04494382 0.01123596
  0.01123596 0.02247191 0.02247191]
 [0.05154639 0.         0.92783505 0.         0.01030928 0.
  0.01030928 0.         0.        ]
 [0.02       0.         0.         0.92       0.02       0.04
  0.         0.         0.        ]
 [0.03809524 0.         0.03809524 0.00952381 0.85714286 0.02857143
  0.00952381 0.         0.01904762]
 [0.07142857 0.03571429 0.         0.         0.         0.89285714
  0.         0.         0.        ]
 [0.         0.         0.         0.         0.02083333 0.02083333
  0.95833333 0.         0.        ]
 [0.03846154 0.03846154 0.         0.         0.         0.
  0.      

Best Hyper Parameters:
 {'algorithm': 'auto', 'leaf_size': 2, 'n_jobs': -1, 'n_neighbors': 10, 'weights': 'distance'}
Accuracy = 0.6539027982326951
#### CONFUSION MATRIX ##########
[[0.97435897 0.         0.         0.         0.         0.
  0.         0.00641026 0.01923077]
 [0.         0.96428571 0.         0.         0.01190476 0.
  0.01190476 0.         0.01190476]
 [0.         0.02941176 0.95098039 0.00980392 0.00980392 0.
  0.         0.         0.        ]
 [0.05084746 0.01694915 0.03389831 0.83050847 0.         0.
  0.03389831 0.01694915 0.01694915]
 [0.         0.         0.         0.         1.         0.
  0.         0.         0.        ]
 [0.05       0.025      0.025      0.025      0.         0.85
  0.         0.         0.025     ]
 [0.03636364 0.         0.         0.         0.         0.
  0.96363636 0.         0.        ]
 [0.         0.         0.03225806 0.         0.03225806 0.03225806
  0.         0.90322581 0.        ]
 [0.02173913 0.         0.         0.0434