In [3]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Input layer
board_input = keras.Input(shape=(2, 3, 3), name='board')
x = layers.Reshape((3, 3, 2))(board_input)

# Hidden layers
x = layers.Conv2D(
  256, 1,
  activation='relu',
  input_shape=(3, 3, 2),
)(x)
x = layers.BatchNormalization(axis=1)(x) 
x = layers.MaxPooling2D(
  1,
)(x)

# Output layers
x = layers.Conv2D(
  1, 1,
  activation='relu',
  input_shape=(3, 3, 2),
)(x)
play_dist = layers.Reshape((1, 3, 3), name='play_dist')(x)

model = keras.Model(
  inputs=[board_input],
  outputs=[play_dist],
)

model.compile(
  optimizer=keras.optimizers.RMSprop(
    learning_rate=0.003,
  ),
  loss={
    'play_dist': keras.losses.BinaryCrossentropy(
      from_logits=True,
    ),
  },
)

model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 board (InputLayer)          [(None, 2, 3, 3)]         0         
                                                                 
 reshape_2 (Reshape)         (None, 3, 3, 2)           0         
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 3, 256)         768       
                                                                 
 batch_normalization_1 (Batc  (None, 3, 3, 256)        12        
 hNormalization)                                                 
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 3, 3, 256)        0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 3, 3, 1)           257 

In [5]:
from src.tic_tac_toe import TicTacToe
from src.self_play import self_play_game

game = TicTacToe()
self_play_game(model, game)

winner = game.get_winner()

while len(game.plays) > 0:
  play = game.pop_last_play()
  
  winner = game.get_winner()

  # If current player is not the winner, do not train on this play
  if (winner is not None and game.turn != winner):
    continue

  board = game.board

  # flip the board if player 2 to be consistent with training data
  if game.turn == 1:
    board = np.flip(board, axis=0)

  boards_np = np.array([board])

  (player, row, col) = play
  play_matrix = [
    [0, 0, 0],
    [0, 0, 0],
    [0, 0, 0],
  ]
  play_matrix[row][col] = 1

  plays_np = np.array([[play_matrix]])

  # Train the model
  model.fit(
    x={
      'board': boards_np,
    },
    y={
      'play_dist': plays_np,
    },
    batch_size=1,
    epochs=1,
    verbose=0,
  )

In [11]:
from src.self_play import get_next_play

game = TicTacToe()

get_next_play(model, game)

{'row': 0, 'col': 0, 'probability': 0}