# Tic-Tac-Toe with TensorFlow

In [1]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd

In [2]:
features = [
    "top-left-square", "top-middle-square", "top-right-square",
    "middle-left-square", "middle-middle-square", "middle-right-square",
    "bottom-left-square", "bottom-middle-square", "bottom-right-square",
]
labels = ["label"]
df = pd.read_csv("tic-tac-toe.data", names=features+labels)
df

Unnamed: 0,top-left-square,top-middle-square,top-right-square,middle-left-square,middle-middle-square,middle-right-square,bottom-left-square,bottom-middle-square,bottom-right-square,label
0,x,x,x,x,o,o,x,o,o,positive
1,x,x,x,x,o,o,o,x,o,positive
2,x,x,x,x,o,o,o,o,x,positive
3,x,x,x,x,o,o,o,b,b,positive
4,x,x,x,x,o,o,b,o,b,positive
5,x,x,x,x,o,o,b,b,o,positive
6,x,x,x,x,o,b,o,o,b,positive
7,x,x,x,x,o,b,o,b,o,positive
8,x,x,x,x,o,b,b,o,o,positive
9,x,x,x,x,b,o,o,o,b,positive


In [3]:
df[df=="x"] = 1.
df[df=="o"] = -1.
df[df=="b"] = 0.
x_train = df[features].values.astype(np.float32)
y_train = np.zeros([len(df[labels]), 2])
for i, j in enumerate(df[labels].values):
    if j[0] == "positive":
        y_train[i][0] = 1.
    else:
        y_train[i][1] = 1.

In [4]:
x_ph = tf.placeholder(tf.float32, [None, 9])
y_ph = tf.placeholder(tf.float32, [None, 2])

In [5]:
def inference(x_ph):
    logits = tf.layers.dense(x_ph, 2)
    # logits = tf.layers.dense(hidden, 2)
    y = tf.nn.softmax(logits)
    return y

y = inference(x_ph)

In [6]:
cross_entropy = -tf.reduce_mean(y_ph * tf.log(y))

In [7]:
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_ph, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [8]:
train_op = tf.train.ProximalGradientDescentOptimizer(1e-1).minimize(cross_entropy)
init_op = tf.global_variables_initializer()
saver = tf.train.Saver()

In [9]:
with tf.Session() as sess:
    sess.run(init_op)
    for i in range(3000):
        sess.run(train_op, feed_dict={x_ph: x_train, y_ph: y_train})
        if i % 100 == 0:
            train_loss = sess.run(cross_entropy, feed_dict={x_ph: x_train, y_ph: y_train})
            train_accuracy = sess.run(accuracy, feed_dict={x_ph: x_train, y_ph: y_train})
            tf.logging.info("Iteration: {0} Loss: {1} Accuracy: {2}".format(i, train_loss, train_accuracy))
    if not os.path.isdir("checkpoints"):
        os.mkdir("checkpoints")
    saver.save(sess, "checkpoints/tictactoe")

INFO:tensorflow:Iteration: 0 Loss: 0.477356672287 Accuracy: 0.479123175144
INFO:tensorflow:Iteration: 100 Loss: 0.27493712306 Accuracy: 0.716075181961
INFO:tensorflow:Iteration: 200 Loss: 0.254184484482 Accuracy: 0.743215024471
INFO:tensorflow:Iteration: 300 Loss: 0.239518240094 Accuracy: 0.813152372837
INFO:tensorflow:Iteration: 400 Loss: 0.226386025548 Accuracy: 0.815240085125
INFO:tensorflow:Iteration: 500 Loss: 0.214362606406 Accuracy: 0.815240085125
INFO:tensorflow:Iteration: 600 Loss: 0.203327029943 Accuracy: 0.815240085125
INFO:tensorflow:Iteration: 700 Loss: 0.193195074797 Accuracy: 0.816283941269
INFO:tensorflow:Iteration: 800 Loss: 0.183890327811 Accuracy: 0.899791240692
INFO:tensorflow:Iteration: 900 Loss: 0.175341427326 Accuracy: 0.899791240692
INFO:tensorflow:Iteration: 1000 Loss: 0.167481482029 Accuracy: 0.899791240692
INFO:tensorflow:Iteration: 1100 Loss: 0.160248726606 Accuracy: 0.899791240692
INFO:tensorflow:Iteration: 1200 Loss: 0.153586193919 Accuracy: 0.916492700577

In [10]:
def print_board(board):
    markers = []
    for i in board:
        if i == 0:
            markers.append("   ")
        elif i == 1:
            markers.append("x ")
        else:
            markers.append("o ")
    print("{0} | {1} | {2}".format(markers[0], markers[1], markers[2]))
    print(" - + - + -")
    print("{0} | {1} | {2}".format(markers[3], markers[4], markers[5]))
    print(" - + - + -")
    print("{0} | {1} | {2}".format(markers[6], markers[7], markers[8]))


def judge_game(board):
    board_mat = board.reshape([3, 3])
    # Check rows
    for row in board_mat:
        if np.all(row==1):
            return 1
        elif np.all(row==-1):
            return -1
    # Check columns
    for col in board_mat.T:
        if np.all(col==1):
            return 1
        elif np.all(col==-1):
            return -1
    # Check diag
    if np.all(np.diag(board_mat)==1) or np.all(np.diag(board_mat[::-1])==1):
        return 1
    elif np.all(np.diag(board_mat)==-1) or np.all(np.diag(board_mat[::-1])==-1):
        return -1
    return 0


rule = """
Input your move!

[0] top-left-square
[1] top-middle-square
[2] top-right-square
[3] middle-left-square
[4] middle-middle-square
[5] middle-right-square
[6] bottom-left-square
[7] bottom-middle-square
[8] bottom-right-square
"""

print(rule)

# Build graph
with tf.Graph().as_default() as g:
    x_ph = tf.placeholder(tf.float32, [None, 9])
    y = inference(x_ph)
    saver = tf.train.Saver()

board = np.zeros(9, dtype=np.int32)

with tf.Session(graph=g) as sess:
    saver.restore(sess, "checkpoints/tictactoe")
    game_result = 0
    for _ in range(9):
        print_board(board)
        game_result = judge_game(board)
        if game_result:
            break
        # Compute scores
        scores = np.zeros(9)
        for i in range(9):
            if board[i] == 0:
                board_copy = np.array([board])
                board_copy[0][i] = -1
                scores[i] = sess.run(y, feed_dict={x_ph: board_copy})[0][1]
        print("Scores: {}".format(scores))
        print("cpu move: {}".format(np.argmax(scores)))
        board[np.argmax(scores)] = 1
        print_board(board)
        game_result = judge_game(board)
        if game_result:
            break
        your_move = int(raw_input("your move: "))
        board[your_move] = -1

if game_result == 1:
    print("\n ===== x win! =====")
elif game_result == -1:
    print("\n===== o win! =====")


Input your move!

[0] top-left-square
[1] top-middle-square
[2] top-right-square
[3] middle-left-square
[4] middle-middle-square
[5] middle-right-square
[6] bottom-left-square
[7] bottom-middle-square
[8] bottom-right-square

INFO:tensorflow:Restoring parameters from checkpoints/tictactoe
    |     |    
 - + - + -
    |     |    
 - + - + -
    |     |    
Scores: [ 0.99077725  0.98722535  0.99077725  0.98722535  0.9935137   0.98722535
  0.99077725  0.98722535  0.99077725]
cpu move: 4
    |     |    
 - + - + -
    | x  |    
 - + - + -
    |     |    
your move: 3
    |     |    
 - + - + -
o  | x  |    
 - + - + -
    |     |    
Scores: [ 0.98188424  0.97499394  0.98188424  0.          0.          0.97499394
  0.98188424  0.97499394  0.98188424]
cpu move: 0
x  |     |    
 - + - + -
o  | x  |    
 - + - + -
    |     |    
your move: 8
x  |     |    
 - + - + -
o  | x  |    
 - + - + -
    |     | o 
Scores: [ 0.          0.97499394  0.98188424  0.          0.          0.97499394
