# Tic Tac Toe - A Classification Approach

#### Tic-Tac-Toe game is a typical binary classification problem. 
#### In classifying Tic-Tac-Toe games, there are nine positional input variables and a given winner output variable. Each input variable can carry an “X”, an “O”, or b (blank). 
#### The binary winner output variable can be either “positive” or “negative”. 
#### The input data is a set of games {g1, g2, …, gj} with their output category {c1,c2}.

#### The problem is to find the model ci = f(gj) from correct pairs of <gj, ci> consistently, where game gj is represented by nine positional variables.

#### Once the classifier is built, a correct prediction is made when a game gj can be assigned to the correct class ci , either “positive” or “negative”

#### Aim : To predict if the given set of moves will win the game or not

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn import tree,metrics

from sklearn.preprocessing import Imputer

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

## Data Loading

In [3]:
# Loading data
data = pd.read_csv("tic_tac_toe_data.csv", sep = ",")
data_copy = pd.read_csv("tic_tac_toe_data.csv", sep = ",")

In [4]:
data.head()

Unnamed: 0,first_row_left,first_row_middle,first_row_right,center_row_left,center_row_middle,center_row_right,bottom_row_left,bottom_row_middle,bottom_row_right,is_win
0,x,x,x,x,o,o,o,x,o,positive
1,x,x,x,x,o,o,o,o,x,positive
2,x,x,x,x,o,o,o,b,b,positive
3,x,x,x,x,o,o,b,o,b,positive
4,x,x,x,x,o,o,b,b,o,positive


## Data Preprocessing

In [5]:
# Lets do some Data Pre-processing ...

# Lets do label encoding such that Positive(win)=1, negative(lose)=0
mapping_for_wins = {'positive':1, 'negative':0}
data.is_win = data.is_win.map(mapping_for_wins)
data_copy.is_win = data_copy.is_win.map(mapping_for_wins)

In [6]:
data_copy.head()

Unnamed: 0,first_row_left,first_row_middle,first_row_right,center_row_left,center_row_middle,center_row_right,bottom_row_left,bottom_row_middle,bottom_row_right,is_win
0,x,x,x,x,o,o,o,x,o,1
1,x,x,x,x,o,o,o,o,x,1
2,x,x,x,x,o,o,o,b,b,1
3,x,x,x,x,o,o,b,o,b,1
4,x,x,x,x,o,o,b,b,o,1


In [7]:
# Lets encode Moves such that x=1,o=0, b=mean of the data(later)
mapping_for_moves = {'x':1, "o":0} 
data = data.drop(columns=["is_win"], axis=1)
for i in data.columns: # Applying map to all the columns except is_win.
    data[i] = data[i].map(mapping_for_moves)

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 957 entries, 0 to 956
Data columns (total 9 columns):
first_row_left       752 non-null float64
first_row_middle     707 non-null float64
first_row_right      752 non-null float64
center_row_left      707 non-null float64
center_row_middle    797 non-null float64
center_row_right     707 non-null float64
bottom_row_left      752 non-null float64
bottom_row_middle    707 non-null float64
bottom_row_right     752 non-null float64
dtypes: float64(9)
memory usage: 67.4 KB


In [9]:
data.head()

Unnamed: 0,first_row_left,first_row_middle,first_row_right,center_row_left,center_row_middle,center_row_right,bottom_row_left,bottom_row_middle,bottom_row_right
0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
1,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0
2,1.0,1.0,1.0,1.0,0.0,0.0,0.0,,
3,1.0,1.0,1.0,1.0,0.0,0.0,,0.0,
4,1.0,1.0,1.0,1.0,0.0,0.0,,,0.0


In [10]:
# Extracting features and labels
features = data.values
labels = data_copy.is_win.values

In [11]:
# Filling missing values aka "b"
features = (Imputer().fit_transform(features))



In [12]:
# Changing type to int
features = features.astype(np.int)
labels = labels.astype(np.int)

In [13]:
features

array([[1, 1, 1, ..., 0, 1, 0],
       [1, 1, 1, ..., 0, 0, 1],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [0, 1, 0, ..., 1, 0, 1],
       [0, 1, 0, ..., 1, 0, 1],
       [0, 0, 1, ..., 0, 1, 1]])

In [14]:
labels

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

## Splitting Dataset into Training set and Testing set

In [15]:
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, random_state=3, shuffle=True)

# Model Fitting : Decision Tree Classifier

In [16]:
dt_clf = DecisionTreeClassifier()
dt_clf.fit(features_train, labels_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

## Model Evaluation Study

In [17]:
decision_tree_score = dt_clf.score(features_test, labels_test)

In [18]:
decision_tree_score

0.975

In [19]:
predictions = dt_clf.predict(features_test)

In [20]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(labels_test, predictions)

In [21]:
print(cm)

[[ 79   2]
 [  4 155]]


In [22]:
count_misclassified = (labels_test != predictions).sum()
print('Misclassified samples: {}'.format(count_misclassified))

Misclassified samples: 6


In [23]:
# how did our model perform?
np.where(labels_test!=predictions)

(array([ 14,  17,  77, 116, 130, 197]),)

In [24]:
accuracy = metrics.accuracy_score(labels_test, predictions)
print('Accuracy: {:.2f}'.format(accuracy))

Accuracy: 0.97


In [25]:
from sklearn.metrics import classification_report
dt_clf_report = classification_report(labels_test, predictions)

In [26]:
print(dt_clf_report)

              precision    recall  f1-score   support

           0       0.95      0.98      0.96        81
           1       0.99      0.97      0.98       159

   micro avg       0.97      0.97      0.97       240
   macro avg       0.97      0.98      0.97       240
weighted avg       0.98      0.97      0.98       240



In [1]:
from sklearn.metrics import roc_curve
from sklearn.metrics import auc

y_scores = dt_clf.predict_proba(features_test)
fpr, tpr, threshold = roc_curve(labels_test, y_scores[:, 1])
roc_auc = auc(fpr, tpr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, color='Blue', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.title('ROC Curve of Decision Tree Classifier')
plt.show()

NameError: name 'dt_clf' is not defined

In [28]:
# View a list of the features and their importance scores
list(zip(data.columns, dt_clf.feature_importances_))

[('first_row_left', 0.11447603471499686),
 ('first_row_middle', 0.1301481506102757),
 ('first_row_right', 0.029874637785972352),
 ('center_row_left', 0.11794784741665391),
 ('center_row_middle', 0.0630206740345565),
 ('center_row_right', 0.16430305912163168),
 ('bottom_row_left', 0.1675481465374115),
 ('bottom_row_middle', 0.0803622408844287),
 ('bottom_row_right', 0.13231920889407275)]

# Model Fitting : Random Forest Classifier

In [3]:
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier(n_estimators=100) # With 100 decision tree
rf_clf.fit(features_train, labels_train)

NameError: name 'features_train' is not defined

## Model Evaluation Study

In [30]:
random_forest_score = rf_clf.score(features_test, labels_test)
print(random_forest_score)

0.9958333333333333


In [31]:
predictions = rf_clf.predict(features_test)

In [32]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(labels_test, predictions)
print(cm)

[[ 81   0]
 [  1 158]]


In [33]:
count_misclassified = (labels_test != predictions).sum()
print('Misclassified samples: {}'.format(count_misclassified))

Misclassified samples: 1


In [34]:
# how did our model perform?
np.where(labels_test!=predictions)

(array([126]),)

In [35]:
accuracy = metrics.accuracy_score(labels_test, predictions)
print('Accuracy: {:.2f}'.format(accuracy))

Accuracy: 1.00


In [36]:
from sklearn.metrics import classification_report
rf_clf_report = classification_report(labels_test, predictions)
print(rf_clf_report)

              precision    recall  f1-score   support

           0       0.99      1.00      0.99        81
           1       1.00      0.99      1.00       159

   micro avg       1.00      1.00      1.00       240
   macro avg       0.99      1.00      1.00       240
weighted avg       1.00      1.00      1.00       240



In [2]:
from sklearn.metrics import roc_curve
from sklearn.metrics import auc

y_scores = rf_clf.predict_proba(features_test)
fpr, tpr, threshold = roc_curve(labels_test, y_scores[:, 1])
roc_auc = auc(fpr, tpr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, color='Blue', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.title('ROC Curve of Decision Tree Classifier')
plt.show()

NameError: name 'rf_clf' is not defined

In [38]:
# View a list of the features and their importance scores
list(zip(data.columns, rf_clf.feature_importances_))

[('first_row_left', 0.1392289110810076),
 ('first_row_middle', 0.09945762565230863),
 ('first_row_right', 0.1144302986827411),
 ('center_row_left', 0.0961020162062947),
 ('center_row_middle', 0.1331360512257608),
 ('center_row_right', 0.10458047851470739),
 ('bottom_row_left', 0.11358771707700756),
 ('bottom_row_middle', 0.09117148053741392),
 ('bottom_row_right', 0.10830542102275842)]