In [None]:
#Importing the Header

import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold,cross_val_score
from sklearn.metrics import accuracy_score

In [None]:
#Load the dataset
data = pd.read_csv('/content/tic-tac-toe.txt')

In [None]:
data.columns = ['top_left_square','top_middle_square','top_right_square','middle_left_square','middle_middle_square',
                'middle_right_square','bottom_left_square','bottom_middle_square','bottom_right_square','predict_class']

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 957 entries, 0 to 956
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   top_left_square       957 non-null    object
 1   top_middle_square     957 non-null    object
 2   top_right_square      957 non-null    object
 3   middle_left_square    957 non-null    object
 4   middle_middle_square  957 non-null    object
 5   middle_right_square   957 non-null    object
 6   bottom_left_square    957 non-null    object
 7   bottom_middle_square  957 non-null    object
 8   bottom_right_square   957 non-null    object
 9   predict_class         957 non-null    object
dtypes: object(10)
memory usage: 74.9+ KB


Data type of the object should be changed to string before performing any operation

In [None]:
#change the Dtypes as mentioned
data.top_left_square = data.top_left_square.astype('string')
data.top_middle_square = data.top_middle_square.astype('string')
data.top_right_square = data.top_right_square.astype('string')
data.middle_left_square = data.middle_left_square.astype('string')
data.middle_middle_square = data.middle_middle_square.astype('string')
data.middle_right_square = data.middle_right_square.astype('string')
data.bottom_left_square = data.bottom_left_square.astype('string')
data.bottom_middle_square = data.bottom_middle_square.astype('string')
data.bottom_right_square = data.bottom_right_square.astype('string')
data.predict_class = data.predict_class.astype('string')

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 957 entries, 0 to 956
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   top_left_square       957 non-null    string
 1   top_middle_square     957 non-null    string
 2   top_right_square      957 non-null    string
 3   middle_left_square    957 non-null    string
 4   middle_middle_square  957 non-null    string
 5   middle_right_square   957 non-null    string
 6   bottom_left_square    957 non-null    string
 7   bottom_middle_square  957 non-null    string
 8   bottom_right_square   957 non-null    string
 9   predict_class         957 non-null    string
dtypes: string(10)
memory usage: 74.9 KB


Label encoder is used to change the categorical values to numerical values for accurate results

In [None]:
#Using label encoder to encode the values in dataset
le = preprocessing.LabelEncoder()
data['predict_class'] = le.fit_transform(data['predict_class'])

In [None]:
data = data.replace('x','2')
data = data.replace('o','1')
data = data.replace('b','0')

In [None]:
data.head(3)

Unnamed: 0,top_left_square,top_middle_square,top_right_square,middle_left_square,middle_middle_square,middle_right_square,bottom_left_square,bottom_middle_square,bottom_right_square,predict_class
0,2,2,2,2,1,1,1,2,1,1
1,2,2,2,2,1,1,1,1,2,1
2,2,2,2,2,1,1,1,0,0,1


In [None]:
# check for null values
data.isnull().sum()

top_left_square         0
top_middle_square       0
top_right_square        0
middle_left_square      0
middle_middle_square    0
middle_right_square     0
bottom_left_square      0
bottom_middle_square    0
bottom_right_square     0
predict_class           0
dtype: int64

In [None]:
# Assign the dependent and Independent variables
X = data.drop('predict_class' , axis = 1)
Y = data['predict_class']
X.shape,Y.shape

((957, 9), (957,))

In [None]:
# Split the data into Train,test set with random state = 3

from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state = 3, test_size=0.2)

X_train.shape,X_test.shape,Y_train.shape,Y_test.shape

((765, 9), (192, 9), (765,), (192,))

In [None]:
# Train the  Randomforest classifier model with random state as 0 and n_estimator as 100

model_1 = RandomForestClassifier(n_estimators=100,random_state=0)
model_1.fit(X_train,Y_train)

RandomForestClassifier(random_state=0)

In [None]:
#Predict the Train and Test values

Y_test_pred = model_1.predict(X_test)
Y_train_pred = model_1.predict(X_train)

In [None]:
# Accuracy scores

print("Testing accuracy scores are", accuracy_score(Y_test,Y_test_pred))
print("Training accuracy scores are : ",accuracy_score(Y_train,Y_train_pred))

Testing accuracy scores are 0.96875
Training accuracy scores are :  1.0


In [None]:
# Perform k-fold cross validation technique

kfold=KFold(n_splits = 10)

cross_validation = cross_val_score(model_1,X_train,Y_train,scoring = 'accuracy', cv = kfold)

In [None]:
print("Cross validation score mean : ",cross_validation.mean())

Cross validation score mean :  0.9358680792891318


In [None]:
# Using Adaboost algorithm with random state = 0 and n_estimator = 100

abc = AdaBoostClassifier(n_estimators=100,random_state=0)
abc.fit(X_train,Y_train)

AdaBoostClassifier(n_estimators=100, random_state=0)

In [None]:
#Predict the Train and Test values

Y_test_pred = abc.predict(X_test)
Y_train_pred = abc.predict(X_train)

In [None]:
# Accuracy scores

print("Training accuracy scores are", accuracy_score(Y_test,Y_test_pred))
print("Testing accuracy scores are : ",accuracy_score(Y_train,Y_train_pred))

Training accuracy scores are 0.8697916666666666
Testing accuracy scores are :  0.8339869281045752


In [None]:
# Perform k-fold cross validation technique

kfold=KFold(n_splits = 20)

cross_validation_1 = cross_val_score(abc,X_train,Y_train,scoring = 'accuracy', cv = kfold)



In [None]:
print("Cross validation score mean : ",cross_validation_1.mean())

Cross validation score mean :  0.8091093117408906


In [None]:
cv_RF = np.round(cross_validation.mean(),3)
cv_ADA = np.round(cross_validation_1.mean(),3)

In [None]:
list1 = []
list1.append(cv_RF)
list1.append(cv_ADA)
print(list1)

[0.936, 0.809]


In [None]:
data=pd.DataFrame(list1)
data.to_csv('output.csv')