In [0]:
import numpy as np

#This is just the board.py code

class TicTacToe(object):

    def __init__(self):
        self.setup_board = np.zeros((3, 3), dtype='str')
        self.setup_board[self.setup_board == ''] = ' '
        self.current_board = self.setup_board
        self.player = 0
        self.player_map = {0: 'X', 1: 'O'}

    def print_board(self):
        for line in self.current_board:
            print(line)

    def flatten_board(self):
        flattened = [0] * 9
        j = 0
        for line in self.current_board:
            for i in line:
                if i == ' ':
                    flattened[j] = 0
                elif i == 'X':
                    flattened[j] = 1
                elif i == 'O':
                    flattened[j] = 2
                j += 1
        return flattened
      
    def move(self, row, column):
        if self.current_board[row, column] != ' ':
            raise Exception('Invalid placement')
        self.current_board[row, column] = self.player_map[self.player]

    def possible_actions(self):
        n_rows, n_cols = self.current_board.shape
        actions = []
        for row in range(n_rows):
            for col in range(n_cols):
                if self.current_board[row, col] == ' ':
                    actions.append((row, col))
        return actions

    def check_winner(self):
        """Checks if the game is over and return a possible winner.
        There are 3 possible scenarios
            a) The game is over and we have a winner.
            b) The game is over but it is a draw.
            c) The game is not over.
        Args:
            Takes a TicTacToe board
        Returns:
            A bool representing the game over state.
            An integer action value. (win: 1, loss: -1, draw: 0)
        """
        n_rows, n_cols = self.current_board.shape

        player_a = self.player
        player_b = (self.player + 1) % 2

        # Check for horizontal marks
        for x in range(n_rows):
            player_a_count = 0
            player_b_count = 0
            for y in range(n_cols):
                if self.current_board[x][y] == self.player_map[player_a]:
                    player_a_count += 1
                elif self.current_board[x][y] == self.player_map[player_b]:
                    player_b_count += 1
            if player_a_count == n_cols:
                return True, 1
            elif player_b_count == n_cols:
                return True, -1

        # Check for vertical marks
        for x in range(n_rows):
            player_a_count = 0
            player_b_count = 0
            for y in range(n_cols):
                if self.current_board[y][x] == self.player_map[player_a]:
                    player_a_count += 1
                elif self.current_board[y][x] == self.player_map[player_b]:
                    player_b_count += 1
            if player_a_count == n_rows:
                return True, 1
            elif player_b_count == n_rows:
                return True, -1

        # Check for major diagonal marks
        player_a_count = 0
        player_b_count = 0
        for x in range(n_rows):
            if self.current_board[x][x] == self.player_map[player_a]:
                player_a_count += 1
            elif self.current_board[x][x] == self.player_map[player_b]:
                player_b_count += 1

        if player_a_count == n_rows:
            return True, 1
        elif player_b_count == n_rows:
            return True, -1

        # Check for minor diagonal marks
        player_a_count = 0
        player_b_count = 0
        for y in range(n_rows - 1, -1, -1):
            x = 2 - y
            if self.current_board[x][y] == self.player_map[player_a]:
                player_a_count += 1
            elif self.current_board[x][y] == self.player_map[player_b]:
                player_b_count += 1

        if player_a_count == n_rows:
            return True, 1
        elif player_b_count == n_rows:
            return True, -1

        # There are still moves left so the game is not over
        actions = self.possible_actions()
        if actions:
            return False, 0

        # If there are no moves left the game is over without a winner
        return True, 0

    def next_player(self):
        self.player = (self.player + 1) % 2

    def debug_board(self, positions):
        self.current_board = self.setup_board
        for key, value in positions.items():
            for row, col in value:
                self.current_board[row, col] = key

In [0]:
#this is the play.py code

import numpy as np
from copy import deepcopy
import random


def best_move(board, model, player, rnd=0, flatten=True):
    scores = []
    moves = board.possible_actions()

    # Make predictions for each possible move
    for i in range(len(moves)):
        future = deepcopy(board)
        future.move(*moves[i])

        if flatten:
          current_board = np.array(future.flatten_board())
          current_board = current_board.reshape((-1,9))
        else:
          current_board = future.current_board
          # print(current_board)
          n_rows, n_cols = current_board.shape
          for i in range(n_rows):
            for j in range(n_cols):
              if current_board[i][j] == ' ':
                current_board[i][j] = 0
              elif current_board[i][j] == 'X':
                current_board[i][j] = 1
              elif current_board[i][j] == 'O':
                current_board[i][j] = 2
          current_board = current_board.reshape((-1, 3, 3, 1))

        prediction = model.predict(current_board)[0]
        if player == 0:
            win_prediction = prediction[1]
            loss_prediction = prediction[2]
        else:
            win_prediction = prediction[2]
            loss_prediction = prediction[1]
        draw_prediction = prediction[0]

        if win_prediction - loss_prediction > 0:
            scores.append(win_prediction - loss_prediction)
        else:
            scores.append(draw_prediction - loss_prediction)

    # Choose the best move with a random factor
    best_moves = np.flip(np.argsort(scores))
    for i in range(len(best_moves)):
        if random.random() * rnd < 0.5:
            return moves[best_moves[i]]

    # Choose a move completely at random
    return moves[random.randint(0, len(moves) - 1)]


def play_game(p1=None, p2=None, rnd=0, flatten=[True, True]):
    game_data = dict(moves=list(), board_history=list(), winner=None)
    board = TicTacToe()
    play = True
    actions = board.possible_actions()
    while play and actions:
        actions = board.possible_actions()
        m = len(actions)
        if board.player == 0 and p1 is not None:
            move_ind = best_move(board, p1, board.player, rnd, flatten=flatten[0])
            board.move(*move_ind)
        elif board.player == 1 and p2 is not None:
            move_ind = best_move(board, p2, board.player, rnd, flatten=flatten[1])
            board.move(*move_ind)
        else:
            pick = np.random.randint(0, m)
            move_ind = actions[pick]
            board.move(*move_ind)
        a_winner, reward = board.check_winner()
        if a_winner and reward != 0:
            play = False
            game_data['winner'] = board.player
        elif a_winner and reward == 0:
            play = False
            game_data['winner'] = 0.5
        game_data['moves'].append((board.player, move_ind))
        game_data['board_history'].append(deepcopy(board.current_board))
        board.next_player()
    return game_data


def gather_game_results(n_games, p1=None, p2=None, rnd=0, flatten=[True, True]):
    results = dict(x_wins=0, o_wins=0, draws=0)
    for i in range(n_games):
        sim_game = play_game(p1=p1, p2=p2, rnd=rnd, flatten=flatten)
        if sim_game['winner'] == 0:
            results['x_wins'] += 1
        elif sim_game['winner'] == 1:
            results['o_wins'] += 1
        else:
            results['draws'] += 1
    x_win_pct = results['x_wins'] / n_games
    o_win_pct = results['o_wins'] / n_games
    draw_pct = results['draws'] / n_games

    print(f'The winning percentage for X was {x_win_pct*100:.2f}% in {n_games} random simulations')
    print(f'The winning percentage for O was {o_win_pct*100:.2f}% in {n_games} random simulations')
    print(f'The percentage of draws was {draw_pct*100:.2f}% in {n_games} random simulations')

In [0]:
#flattens data and gets it in a format that our model can read

def get_training_data(games):
  X = []
  y = []
  for game in games:
    move_set = np.zeros((3, 3))
    winner_array = np.zeros(3)
    for i in range(len(game['moves'])):
      player = game['moves'][i][0]
      row = game['moves'][i][1][0]
      column = game['moves'][i][1][1]

      move_set[row, column] = player + 1
      X.append(deepcopy(move_set))

      winner_array[int(game['winner'])+1] = 1

      y.append(deepcopy(winner_array))

  X = np.array(X).reshape((-1, 3, 3, 1))
  y = np.array(y)
  trainNum = int(len(X) * 0.8)
  return (X[:trainNum], X[trainNum:], y[:trainNum], y[trainNum:])

In [5]:
gather_game_results(1000)

The winning percentage for X was 60.00% in 1000 random simulations
The winning percentage for O was 26.70% in 1000 random simulations
The percentage of draws was 13.30% in 1000 random simulations


In [0]:
#flattens data and gets it in a format that our model can read

def get_training_data_flat(games):
  X = []
  y = []
  for game in games:
    move_set = [0] * 9
    winner_array = [0] * 3
    for i in range(len(game['moves'])):
      player = game['moves'][i][0]
      row = game['moves'][i][1][0]
      column = game['moves'][i][1][1]

      index = 3*row + column
      move_set[index] = player+1
      X.append(deepcopy(move_set))

      winner_array[int(game['winner'])+1] = 1

      y.append(deepcopy(winner_array))

  X = np.array(X)
  y = np.array(y)
  trainNum = int(len(X) * 0.8)
  return (X[:trainNum], X[trainNum:], y[:trainNum], y[trainNum:])

In [0]:
# Run 100,000 random games and store it in game array

games = []
for _ in range(100_000):
  games.append(play_game())

X_train, X_test, y_train, y_test = get_training_data(games)
X_flat_train, X_flat_test, y_flat_train, y_flat_test = get_training_data_flat(games)

In [8]:
#build net

import keras
from keras.models import Sequential
from keras.layers import *
from keras.backend import reshape
from keras import optimizers
from keras.utils.np_utils import to_categorical

def dense_model(learning_rate=1e-4, decay=1e-6):
    numCells = 9
    outcomes = 3
    model = Sequential()
    model.add(Dense(400, activation='relu', input_shape=(9, )))
    model.add(Dropout(0.2))
    model.add(Dense(300, activation='relu'))
    model.add(Dense(200, activation='relu'))
    model.add(Dense(125, activation='relu'))
    model.add(Dense(75, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(outcomes, activation='softmax'))
    opt = keras.optimizers.RMSprop(lr=learning_rate, decay=decay)
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['acc'])
    return model

def conv_model():
    outcomes = 3
    model = Sequential()
    model.add(Conv2D(32, (2, 2), padding='same', input_shape=(3, 3, 1)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(64, (2, 2), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(32, (2, 2), padding='same'))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(outcomes, activation='softmax'))
    optim = optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, amsgrad=False)
    model.compile(loss='categorical_crossentropy',
                  optimizer=optim,
                  metrics=['acc'])
    return model

Using TensorFlow backend.


In [9]:
conv_model = conv_model()
conv_history = conv_model.fit(X_train, y_train, validation_data=(X_test, y_test),
                              epochs=10, batch_size=500)














Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 610565 samples, validate on 152642 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
#train model. This cell will take some time to run, depending on how many games in training, size of model, # of epochs, etc.

dense_model = dense_model()
dense_history = dense_model.fit(X_flat_train, y_flat_train, validation_data=(X_flat_test, y_flat_test),
                                epochs=10, batch_size=500)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Train on 610565 samples, validate on 152642 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
dense_history

<keras.callbacks.History at 0x7f96702d7940>

In [18]:
#test the model
gather_game_results(1000, p1=conv_model, flatten=[False, None])

The winning percentage for X was 90.60% in 1000 random simulations
The winning percentage for O was 0.00% in 1000 random simulations
The percentage of draws was 9.40% in 1000 random simulations


In [19]:
gather_game_results(1000, p1=dense_model, flatten=[True, None])

The winning percentage for X was 81.50% in 1000 random simulations
The winning percentage for O was 0.00% in 1000 random simulations
The percentage of draws was 18.50% in 1000 random simulations


In [20]:
#play the model against itself, with some randomness introduced so that it's not deterministic

gather_game_results(1000, p1=dense_model, p2=conv_model, rnd=0.6, flatten=[True, False])

The winning percentage for X was 73.90% in 1000 random simulations
The winning percentage for O was 16.10% in 1000 random simulations
The percentage of draws was 10.00% in 1000 random simulations


In [21]:
gather_game_results(1000, p1=conv_model, p2=dense_model, rnd=0.6, flatten=[False, True])

The winning percentage for X was 93.40% in 1000 random simulations
The winning percentage for O was 4.80% in 1000 random simulations
The percentage of draws was 1.80% in 1000 random simulations


In [0]:
#Pickle and download the model once you have a model you like
import pickle
filename_conv = 'dnn_conv_model.pkl'
filename_dense = 'dnn_dense_model.pkl'
# filename = 'dnn_model2.pkl'
pickle.dump(conv_model, open(filename_conv, 'wb'))
pickle.dump(dense_model, open(filename_dense, 'wb'))

In [0]:
from google.colab import files
files.download('dnn_conv_model.pkl')
files.download('dnn_dense_model.pkl')

In [0]:
# import os
# os.listdir()