In [14]:
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.model_selection import cross_validate, cross_val_predict
from sklearn.metrics import confusion_matrix, accuracy_score, r2_score
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.neural_network import MLPClassifier,MLPRegressor

In [2]:
data_final = pd.read_csv('tictactoedatasets/tictac_final.txt', sep=" ", header=None)
data_final.columns = ['p1','p2','p3','p4','p5','p6','p7','p8','p9','y']
data_final

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,y
0,1,1,1,1,-1,-1,1,-1,-1,1
1,1,1,1,1,-1,-1,-1,1,-1,1
2,1,1,1,1,-1,-1,-1,-1,1,1
3,1,1,1,1,-1,-1,-1,0,0,1
4,1,1,1,1,-1,-1,0,-1,0,1
...,...,...,...,...,...,...,...,...,...,...
953,-1,1,1,1,-1,-1,-1,1,1,-1
954,-1,1,-1,1,1,-1,1,-1,1,-1
955,-1,1,-1,1,-1,1,1,-1,1,-1
956,-1,1,-1,-1,1,1,1,-1,1,-1


In [3]:
data_final = data_final.sample(frac=1).reset_index(drop=True)

In [4]:
svc = LinearSVC()
y_pred = cross_val_predict(svc, data_final.drop('y', axis = 1), data_final['y'], cv=5)
confusion_matrix(data_final['y'], y_pred, normalize='true'), accuracy_score(data_final['y'], y_pred)

(array([[0.95180723, 0.04819277],
        [0.        , 1.        ]]),
 0.9832985386221295)

In [7]:
knc = KNeighborsClassifier(n_neighbors= 5,weights='distance')
y_pred = cross_val_predict(knc, data_final.drop('y', axis = 1), data_final['y'], cv = 5)
confusion_matrix(data_final['y'], y_pred, normalize='true'), accuracy_score(data_final['y'], y_pred)

(array([[1., 0.],
        [0., 1.]]),
 1.0)

In [6]:
mlp = MLPClassifier(hidden_layer_sizes=(500,200, 100, 50,20), max_iter=1000)
y_pred = cross_val_predict(mlp, data_final.drop('y', axis = 1), data_final['y'], cv = 5)
confusion_matrix(data_final['y'], y_pred, normalize='true'), accuracy_score(data_final['y'], y_pred)

(array([[0.98192771, 0.01807229],
        [0.        , 1.        ]]),
 0.9937369519832986)

In [8]:
data_single = pd.read_csv('tictactoedatasets/tictac_single.txt', sep=" ", header=None)
data_single.columns = ['p1','p2','p3','p4','p5','p6','p7','p8','p9','y']
data_single

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,y
0,1,-1,0,0,0,0,0,1,0,6
1,1,0,1,0,0,0,0,-1,0,1
2,0,0,0,0,0,1,0,0,0,2
3,0,1,1,0,0,0,0,-1,0,0
4,1,1,-1,0,0,0,0,-1,0,3
...,...,...,...,...,...,...,...,...,...,...
6546,-1,0,1,1,1,-1,0,0,0,6
6547,1,0,1,-1,1,1,-1,0,-1,7
6548,-1,-1,0,1,0,0,1,1,0,2
6549,0,1,1,-1,-1,1,0,1,-1,0


In [9]:
data_single = data_single.sample(frac=1).reset_index(drop=True)

In [20]:
svc = LinearSVC()
y_pred = cross_val_predict(svc, data_single.drop('y', axis = 1), data_single['y'], cv=5)
confusion_matrix(data_single['y'], y_pred, normalize='true'), accuracy_score(data_single['y'], y_pred)

(array([[0.98017903, 0.        , 0.00255754, 0.        , 0.01726343,
         0.        , 0.        , 0.        , 0.        ],
        [0.93964497, 0.        , 0.01538462, 0.        , 0.04497041,
         0.        , 0.        , 0.        , 0.        ],
        [0.96402878, 0.00102775, 0.01130524, 0.        , 0.02363823,
         0.        , 0.        , 0.        , 0.        ],
        [0.91812865, 0.        , 0.01169591, 0.        , 0.07017544,
         0.        , 0.        , 0.        , 0.        ],
        [0.96749522, 0.00382409, 0.00956023, 0.        , 0.01912046,
         0.        , 0.        , 0.        , 0.        ],
        [0.95184136, 0.        , 0.02549575, 0.        , 0.02266289,
         0.        , 0.        , 0.        , 0.        ],
        [0.95404412, 0.00183824, 0.00735294, 0.        , 0.03676471,
         0.        , 0.        , 0.        , 0.        ],
        [0.8976378 , 0.00393701, 0.00787402, 0.        , 0.09055118,
         0.        , 0.        , 0.       

In [21]:
knc = KNeighborsClassifier(n_neighbors= 7, weights='distance')
y_pred = cross_val_predict(knc, data_single.drop('y', axis = 1), data_single['y'], cv = 5)
confusion_matrix(data_single['y'], y_pred, normalize = 'true'), accuracy_score(data_single['y'], y_pred)

(array([[9.41176471e-01, 6.39386189e-03, 1.34271100e-02, 7.03324808e-03,
         1.27877238e-02, 4.47570332e-03, 8.31202046e-03, 6.39386189e-04,
         5.75447570e-03],
        [1.89349112e-02, 8.72189349e-01, 2.48520710e-02, 1.30177515e-02,
         2.72189349e-02, 5.91715976e-03, 1.77514793e-02, 7.10059172e-03,
         1.30177515e-02],
        [3.18602261e-02, 1.64439877e-02, 9.14696814e-01, 1.02774923e-02,
         9.24974306e-03, 4.11099692e-03, 6.16649538e-03, 2.05549846e-03,
         5.13874615e-03],
        [4.87329435e-02, 2.72904483e-02, 1.75438596e-02, 8.36257310e-01,
         3.70370370e-02, 9.74658869e-03, 5.84795322e-03, 3.89863548e-03,
         1.36452242e-02],
        [3.53728489e-02, 1.72084130e-02, 1.43403442e-02, 3.82409178e-03,
         9.14913958e-01, 3.82409178e-03, 4.78011472e-03, 0.00000000e+00,
         5.73613767e-03],
        [3.11614731e-02, 3.68271955e-02, 1.98300283e-02, 1.13314448e-02,
         2.54957507e-02, 8.35694051e-01, 1.41643059e-02, 5.66572238

In [12]:
mlp = MLPClassifier(hidden_layer_sizes=(100, 20), max_iter=1000)
y_pred = cross_val_predict(mlp, data_single.drop('y', axis = 1), data_single['y'], cv = 5)
confusion_matrix(data_single['y'], y_pred, normalize  = 'true'), accuracy_score(data_single['y'], y_pred)

(array([[1497,    6,   19,    8,   11,    5,    4,    7,    7],
        [  11,  771,   26,    5,   14,    5,   10,    0,    3],
        [   9,   10,  915,   10,   11,    1,    9,    2,    6],
        [  14,   15,    0,  451,   19,    9,    2,    1,    2],
        [  17,    5,    9,    5,  997,    5,    4,    2,    2],
        [   6,    9,    3,    1,    2,  321,    1,    6,    4],
        [   5,    3,   10,    8,    0,    0,  511,    3,    4],
        [   2,    6,    2,    5,    2,    3,    1,  230,    3],
        [   9,   10,    7,    2,    5,    0,    1,    0,  425]],
       dtype=int64),
 0.933903220882308)

In [6]:
data_multi = pd.read_csv('tictactoedatasets/tictac_multi.txt', sep=" ", header=None)
data_multi.columns = ['p1','p2','p3','p4','p5','p6','p7','p8','p9','y1','y2','y3','y4','y5','y6','y7','y8','y9','x']

In [7]:
data_multi = data_multi.drop('x', axis = 1)
data_multi

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,y1,y2,y3,y4,y5,y6,y7,y8,y9
0,1,-1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1
1,1,0,1,0,0,0,0,-1,0,0,1,0,1,1,1,1,0,1
2,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,1
3,0,1,1,0,0,0,0,-1,0,1,0,0,0,0,0,0,0,0
4,1,1,-1,0,0,0,0,-1,0,0,0,0,1,0,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6546,-1,0,1,1,1,-1,0,0,0,0,0,0,0,0,0,1,0,0
6547,1,0,1,-1,1,1,-1,0,-1,0,0,0,0,0,0,0,1,0
6548,-1,-1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,1
6549,0,1,1,-1,-1,1,0,1,-1,1,0,0,0,0,0,0,0,0


In [8]:
data_multi = data_multi.sample(frac=1).reset_index(drop=True)

In [40]:
knr = KNeighborsRegressor(n_neighbors= 7, weights='distance')
y_pred = cross_val_predict(knr, data_multi.drop(['y1','y2','y3','y4','y5','y6','y7','y8','y9'], axis = 1), data_multi[['y1','y2','y3','y4','y5','y6','y7','y8','y9']], cv = 5)
r2_score(data_multi[['y1','y2','y3','y4','y5','y6','y7','y8','y9']], y_pred)

0.819706899885367

In [41]:
y_pred_binary = []
for i in range(9):
    fpr, tpr, thresholds = metrics.roc_curve(data_multi['y'+str(i+1)], y_pred.transpose()[i],pos_label=1)
    th = thresholds[np.argmax(tpr*(1-fpr))]
    bi = [1 if x>=th else 0 for x in y_pred.transpose()[i]]
    y_pred_binary.append(bi)
y_pred_binary = np.array(y_pred_binary).transpose()

In [42]:
confusion_matrix(data_multi[['y1','y2','y3','y4','y5','y6','y7','y8','y9']], y_pred_binary, normalize  = 'true'), 

ValueError: multilabel-indicator is not supported

In [43]:
accuracy_score(data_multi[['y1','y2','y3','y4','y5','y6','y7','y8','y9']], y_pred_binary)

0.7756067775912074

In [39]:
y_pred_binary.shape, y_pred.shape

((6551, 9), (6551, 9))

In [18]:
knr = KNeighborsRegressor(n_neighbors= 25, weights='distance')
y_pred = cross_val_predict(knr, data_multi.drop(['y1','y2','y3','y4','y5','y6','y7','y8','y9'], axis = 1), data_multi[['y1','y2','y3','y4','y5','y6','y7','y8','y9']], cv = 5)
r2_score(data_multi[['y1','y2','y3','y4','y5','y6','y7','y8','y9']], y_pred)

0.8293692028435523

In [64]:
mlr = MLPRegressor(hidden_layer_sizes=(500, 300, 150,100,50,50, 20), max_iter=5000)
y_pred = cross_val_predict(mlr, data_multi.drop(['y1','y2','y3','y4','y5','y6','y7','y8','y9'], axis = 1), data_multi[['y1','y2','y3','y4','y5','y6','y7','y8','y9']], cv = 5)
r2_score(data_multi[['y1','y2','y3','y4','y5','y6','y7','y8','y9']], y_pred)

0.8913986441570726