<a href="https://colab.research.google.com/github/nomomon/drl-js/blob/main/tic%20tac%20toe/notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np

In [None]:
class Policy(tf.keras.Model):
    def __init__(self):
        super(Policy, self).__init__()
        self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu, input_shape = (-1, 9))
        self.dense2 = tf.keras.layers.Dense(4, activation=tf.nn.relu)
        self.dense3 = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(inputs)
        return self.dense3(x)

In [None]:
policy = Policy()
policy.compile(
    optimizer = tf.keras.optimizers.Adam(), 
    loss = tf.keras.losses.BinaryCrossentropy(), 
    metrics=[tf.keras.metrics.BinaryAccuracy()]
)

In [None]:
policy.predict([[1, 0, -1, 0, -1, 0, 1, 0, 0]])

array([[0.90546757]], dtype=float32)

In [None]:
policy.summary()

Model: "policy"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  40        
_________________________________________________________________
dense_1 (Dense)              multiple                  40        
_________________________________________________________________
dense_2 (Dense)              multiple                  5         
Total params: 85
Trainable params: 85
Non-trainable params: 0
_________________________________________________________________


In [None]:
# 0 1 2
# 3 4 5
# 6 7 8

def gameState(board):
    lines = [
        [0, 1, 2],
        [3, 4, 5],
        [6, 7, 8],
        [0, 3, 6],
        [1, 4, 7],
        [2, 5, 8],
        [0, 4, 8],
        [2, 4, 6],
    ]
    for line in lines:
        if (board[line[0]] == board[line[1]] and board[line[1]] == board[line[2]] and board[line[1]] != 0):
            return board[line[1]]
    return 0

In [None]:
gameState([0, 0, 0, 
           1, 1, 1, 
           1, 0, 0])

1

In [None]:
def chooseAction(policy, board):
    probs = [0, 0, 0,
             0, 0, 0, 
             0, 0, 0]

    for i, cell in enumerate(board):
        if cell == 0:
            playBoard = board
            playBoard[i] = 1

            probs[i] = policy.predict([playBoard])[0][0]

    maxprob = probs[0]
    maxi = 0

    for i, prob in enumerate(probs):
        if prob >= maxprob:
            maxprob = prob
            maxi = i

    return maxi

In [None]:
chooseAction(policy, [0, 0, 1,
                      1, 1, 0,
                      0, 0, 1])

6

In [None]:
def getData(policy):
    X = []
    y = []

    board = [0, 0, 0,
             0, 0, 0,
             0, 0, 0]

    winner = 0

    for i in range(9):
        action = chooseAction(policy, board.copy())
        
        board[action] = 1


        X.append(board)
        y.append((i % 2) * 2 - 1)

        if(gameState(board) != 0):
            winner = (i % 2) * 2 - 1
            break

        board = (np.array(board) * -1).tolist()

    y = list(map(lambda q: ((q == winner) - 0), y))

    return X, y

In [None]:
getData(policy)

([[0, 0, 0, 0, 0, 0, 0, 0, 1],
  [1, 0, 0, 0, 0, 0, 0, 0, -1],
  [-1, 0, 0, 0, 0, 0, 0, 1, 1],
  [1, 1, 0, 0, 0, 0, 0, -1, -1],
  [-1, -1, 1, 0, 0, 0, 0, 1, 1],
  [1, 1, -1, 1, 0, 0, 0, -1, -1],
  [-1, -1, 1, -1, 0, 1, 0, 1, 1]],
 [1, 0, 1, 0, 1, 0, 1])

In [None]:
for i in range(100):
    X, y = getData(policy)
    policy.fit(X, y, epochs = 1, )



In [None]:
from IPython.display import clear_output 

def symbol(x):
    if x == 1:
        return "X"
    elif x == -1:
        return "O"
    else:
        return "?"

def printBoard(board):
    clear_output()
    cBoard = list(map(symbol, board))
    for i in range(0, 3):
        row = ""
        for j in range(0, 3):
            row += (cBoard[j + i * 3]) if (cBoard[j + i * 3] != "?") else str(j + i * 3 + 1)
            if j != 2:
                row += " | "
        print(row)
        if i != 2:
            print("---------")

def play(policy):
    player = (int(input("which player you want to be? (1 or 2) ")) + 1) % 2 

    board = [0, 0, 0,
             0, 0, 0,
             0, 0, 0]

    winner = 0

    for i in range(9):
        if (i % 2 == player):
            printBoard(board)
            action = int(input("what cell? ")) - 1
        else:
            action = chooseAction(policy, board.copy())

        board[action] = 1

        if(gameState(board) != 0):
            winner = i % 2 == player
            break

        board = (np.array(board) * -1).tolist()
    
    printBoard(board)
    if(gameState(board) != 0):
        print("\nwinner is the", "humen" if winner else "ai")
    else:
        print("\nit's a tie!")

In [None]:
play(policy)

O | X | 3
---------
4 | X | O
---------
X | X | O

winner is the humen
