## Assemble the data into CSV file

#### Read the data  

In [60]:
import pandas as pd
df = pd.read_csv('https://archive.ics.uci.edu/ml/'
'machine-learning-databases/tic-tac-toe/tic-tac-toe.data', header=None)
df.columns = ['top-left-square','top-middle-square','top-right-square',
    'middle-left-square','middle-middle-square','middle-right-square',
    'bottom-left-square','bottom-middle-square','bottom-right-square','X_Wins']

#### Review Data 

In [61]:
df.tail()

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,X_Wins
953,o,x,x,x,o,o,o,x,x,negative
954,o,x,o,x,x,o,x,o,x,negative
955,o,x,o,x,o,x,x,o,x,negative
956,o,x,o,o,x,x,x,o,x,negative
957,o,o,x,x,x,o,o,x,x,negative


#### Map The Characters to Numerals 

In [62]:
X_Wins_map = {'positive':1, 'negative': 0}
column_map = {'o':3 , 'x':4 , 'b':2}
df['X_Wins']=df['X_Wins'].map(X_Wins_map)
df['top-left-square']=df['top-left-square'].map(column_map)
df['top-middle-square']=df['top-middle-square'].map(column_map)
df['top-right-square']=df['top-right-square'].map(column_map)
df['middle-left-square']=df['middle-left-square'].map(column_map)
df['middle-middle-square']=df['middle-middle-square'].map(column_map)
df['middle-right-square']=df['middle-right-square'].map(column_map)
df['bottom-left-square']=df['bottom-left-square'].map(column_map)
df['bottom-right-square']=df['bottom-right-square'].map(column_map)
df['bottom-middle-square']=df['bottom-middle-square'].map(column_map)


#### Review Data 

In [63]:
df.head()

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,X_Wins
0,4,4,4,4,3,3,4,3,3,1
1,4,4,4,4,3,3,3,4,3,1
2,4,4,4,4,3,3,3,3,4,1
3,4,4,4,4,3,3,3,2,2,1
4,4,4,4,4,3,3,2,3,2,1


### Shuffle the dataframe and save to a CSV file

In [65]:
import numpy as np
np.random.seed(0)
df = df.reindex(np.random.permutation(df.index))
df.to_csv('./Tic-Tac_Toe_data.csv', index=False)

#### Review data 

In [66]:
df.head()

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,X_Wins
879,2,4,4,4,4,3,3,3,3,0
496,2,4,3,4,4,4,3,3,2,1
14,4,4,4,3,4,3,3,3,4,1
546,2,3,4,3,2,4,4,3,4,1
55,4,4,4,2,3,4,2,3,3,1


### Split our data into 70 percent training and 30 percent test data 

In [68]:
from sklearn.cross_validation import train_test_split

feature_col_names = ['top-left-square','top-middle-square','top-right-square',
    'middle-left-square','middle-middle-square','middle-right-square',
    'bottom-left-square','bottom-middle-square','bottom-right-square']
predicted_class_names =['X_Wins']

x = df[feature_col_names].values
y = df[predicted_class_names].values
split_test_size = 0.30

x_train , x_test, y_train, y_test = train_test_split(x,y,test_size=split_test_size, random_state=42)

### Verfying predicted value was split correctly 

In [83]:
print("{0:0.2f}% in training set".format((len(x_train)/len(df.index))* 100))
print("{0:0.2f}% in training set".format((len(x_test)/len(df.index))* 100))

69.94% in training set
30.06% in training set


In [85]:
print("Original X_Wins/positive  : {0} ({1:0.2f}%)".format(len(df.loc[df['X_Wins'] == 1]), (len(df.loc[df['X_Wins'] == 1])/len(df.index)) * 100.0))
print("Original X_Loses/negative : {0} ({1:0.2f}%)".format(len(df.loc[df['X_Wins'] == 0]), (len(df.loc[df['X_Wins'] == 0])/len(df.index)) * 100.0))
print("")
print("Training X_Wins/positive  : {0} ({1:0.2f}%)".format(len(y_train[y_train[:] == 1]), (len(y_train[y_train[:] == 1])/len(y_train) * 100.0)))
print("Training X_Loses/negative : {0} ({1:0.2f}%)".format(len(y_train[y_train[:] == 0]), (len(y_train[y_train[:] == 0])/len(y_train) * 100.0)))
print("")
print("Test X_Wins/positive      : {0} ({1:0.2f}%)".format(len(y_test[y_test[:] == 1]), (len(y_test[y_test[:] == 1])/len(y_test) * 100.0)))
print("Test X_Loses/negative     : {0} ({1:0.2f}%)".format(len(y_test[y_test[:] == 0]), (len(y_test[y_test[:] == 0])/len(y_test) * 100.0)))

Original X_Wins/positive  : 626 (65.34%)
Original X_Loses/negative : 332 (34.66%)

Training X_Wins/positive  : 444 (66.27%)
Training X_Loses/negative : 226 (33.73%)

Test X_Wins/positive      : 182 (63.19%)
Test X_Loses/negative     : 106 (36.81%)


### Training our Model using Naive Bayes 

In [86]:
from sklearn.naive_bayes import GaussianNB

# create Gaussian Naive Bayes model object and train it with the data
nb_model = GaussianNB()

nb_model.fit(x_train, y_train.ravel())

GaussianNB(priors=None)

### Evaluate our Training Model 

In [89]:
# predict values using the training and test data
nb_predict_train = nb_model.predict(x_train)
nb_predict_test = nb_model.predict(x_test)

# import the performance metrics library
from sklearn import metrics

# Accuracy
print("Accuracy on Training data: {0:.4f}".format(metrics.accuracy_score(y_train, nb_predict_train)))
print("Accuracy on Test data: {0:.4f}".format(metrics.accuracy_score(y_test, nb_predict_test)))
print()

Accuracy on Training data: 0.7104
Accuracy on Test data: 0.7326



### Confusion Matrix 

In [90]:
print("Confusion Matrix")
# Note the use of labels for set 1=True to upper left and 0=False to lower right
print("{0}".format(metrics.confusion_matrix(y_test, nb_predict_test, labels=[1, 0])))
print("")

print("Classification Report")
print(metrics.classification_report(y_test, nb_predict_test, labels=[1,0]))

Confusion Matrix
[[182   0]
 [ 77  29]]

Classification Report
             precision    recall  f1-score   support

          1       0.70      1.00      0.83       182
          0       1.00      0.27      0.43       106

avg / total       0.81      0.73      0.68       288



### Train Our Model using Random Forest 

In [77]:
# Import RandomForestClassifier from ensemble module
from sklearn.ensemble import RandomForestClassifier

# Create random forest object
rf_model = RandomForestClassifier(random_state=42)     

# Train our Model
rf_model.fit(x_train, y_train.ravel())

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=42,
            verbose=0, warm_start=False)

### Evaluate our Model 

#### Training data Accuracy 

In [91]:
rf_predict_train = rf_model.predict(x_train)

# training metrics
print("Accuracy: {0:.4f}".format(metrics.accuracy_score(y_train, rf_predict_train)))

Accuracy: 1.0000


#### Test data Acuuracy 

In [92]:
rf_predict_test = rf_model.predict(x_test)

# test metrics
print("Accuracy: {0:.4f}".format(metrics.accuracy_score(y_test, rf_predict_test)))

Accuracy: 0.9340


### Classification Report 

In [80]:
print(metrics.confusion_matrix(y_test, rf_predict_test, labels=[1, 0]) )
print("")
print("Classification Report")
print(metrics.classification_report(y_test, rf_predict_test, labels=[1,0]))

[[175   7]
 [ 12  94]]

Classification Report
             precision    recall  f1-score   support

          1       0.94      0.96      0.95       182
          0       0.93      0.89      0.91       106

avg / total       0.93      0.93      0.93       288

