In [1]:
import numpy as np
from random import randint
import math
import unittest
import operator
import numbers
import sys

In [2]:
class TicTacToe:

    # There are 5812 legal board states that can be reached before there is a winner
    # http://brianshourd.com/posts/2012-11-06-tilt-number-of-tic-tac-toe-boards.html
    
    __bad_move_game_is_over = -1
    __bad_move_action_already_played = -2
    __bad_move_no_consecutive_plays = -3
    __good_move = 0 # value of a free action space on board
    __play = 0.001 # reward for playing an action
    __draw = 0.002 # reward for playing to end but no one wins
    __win = 0.01 # reward for winning a game
    __no_player = -2 # id of a non existent player i.e. used to record id of player that has not played
    __win_mask = np.full((1, 3),3,np.int8)
    actions =  {1:(0,0), 2:(0,1), 3:(0,2), 4:(1,0), 5:(1,1), 6:(1,2), 7:(2,0), 8:(2,1), 9:(2,2)}
    __num_actions = 9
    player_X = 1
    player_O = -1

    #
    # Return game to intial state, where no one has played
    # and the board contains no moves.
    #
    def reset(self):
        self.__board = np.zeros((3, 3),np.int8)
        self.__last_board = np.zeros((3, 3),np.int8)
        self.__game_over = False
        self.__draw = False
        self.__player = TicTacToe.__no_player
        self.__last_player = TicTacToe.__no_player
        
    #
    # Constructor has no arguments as it just sets the game
    # to an intial up-played set-up
    #
    def __init__(self):
        self.__board = np.zeros((3, 3),np.int8)
        self.__last_board = np.zeros((3, 3),np.int8)
        self.__game_over = False
        self.__draw = False
        self.__player = TicTacToe.__no_player
        self.__last_player = TicTacToe.__no_player
    
    #
    # return player as string "X" or "O"
    #
    def __player_to_str(self,player):
        if(player == TicTacToe.player_X): return "X"
        if(player == TicTacToe.player_O): return "O"
        return "?"
        
    #
    # Return a displayable version of the entire game.
    #
    def __str__(self):
        s = ""
        s += "Game Over: " + str(self.__game_over) +"\n"
        s += "Player :" + self.__player_to_str(self.__player) + "\n"
        s += "Current Board : \n" + str(self.__board)+ "\n"
        s += "Prev Player :" + self.__player_to_str(self.__last_player) + "\n"
        s += "Prev Current Board : \n" + str(self.__last_board)+ "\n"
        s += "State" + str(self.state()) + "\n"
        return s
    
    #
    # Return the list of valid actions
    #
    def list_actions(self):
        return list(TicTacToe.actions)
    
    #
    # Assume the move has been validated by move method
    # Make a copy of board before move is made and the last player
    #
    def __make_move(self, action, player):
        self.__last_board = np.copy(self.__board)
        self.__last_player = self.__player
        self.__player = player
        self.__board[TicTacToe.actions[action]] = player
        self.__game_over = self.__game_won()
        if(self.random_move() == None):
            self.__game_over = True
            self.__draw = True
        return
    
    #
    # Has a player already moved using the given action.
    #
    def __valid_move(self,action):
        return self.__board[TicTacToe.actions[action]] != self.__good_move
    
    #
    # If the proposed action is a valid move and the game is not
    # over. Make the given move (action) on behalf of the given 
    # player and update the game status
    #
    def move(self, action, player):
        if(self.__game_won()) : return TicTacToe.__bad_move_game_is_over
        if(self.__valid_move(action)): return TicTacToe.__bad_move_action_already_played 
        if(player == self.__player): return TicTacToe.__bad_move_no_consecutive_plays 
        
        self.__make_move(action,player)
        if(self.__game_over):
            if(self.__draw):
                return TicTacToe.__draw
            else:
                return TicTacToe.__win
        else:
            return TicTacToe.__play            

    #
    # Return (flattened) Game Ended, Last Player, Last Board, Player, Board
    #
    def detailed_state(self):
        flattened_state = []
        if(self.__game_over):
            flattened_state.append(1)
        else:
            flattened_state.append(0)
        flattened_state.append(self.__last_player)
        flattened_state.append(self.__player)
        for itm in np.reshape(self.__last_board,9).tolist() : flattened_state.append(itm)
        for itm in np.reshape(self.__board,9).tolist() : flattened_state.append(itm)
            
        return flattened_state

    #
    # Return state of current board as simple vector
    #
    def state(self,tostr=False):
        flattened_state = []
        for itm in np.reshape(self.__board,9).tolist() : flattened_state.append(itm)            
        if not tostr:
            return flattened_state
        else:
            return ''.join(str(e) for e in flattened_state)

    #
    # Show return the current board contents
    #
    def board(self):
        return self.__board
     
    #
    # Any row, column or diagonal with all player X or player O
    #
    def __game_won(self):
        rows = np.abs(np.sum(self.__board,axis=1))
        cols = np.abs(np.sum(self.__board,axis=0))
        diagLR = np.abs(np.sum(self.__board.diagonal()))
        diagRL = np.abs(np.sum(np.rot90(self.__board).diagonal()))
        
        if(np.sum(rows == self.__win_mask) > 0):
            return True
        if(np.sum(cols == self.__win_mask) > 0):
            return True
        if((np.mod(diagLR,3)) == 0) and diagLR > 0:
            return True
        if((np.mod(diagRL,3)) == 0) and diagRL > 0:
            return True
        return False

    #
    # Return which player goes next given the current player
    #
    def next_player(self,current_player):
        if(current_player == TicTacToe.player_O):
            return  TicTacToe.player_X
        else:
            return  TicTacToe.player_O
        
    #
    # Return a random action (move) that is still left
    # to make
    #
    def random_move(self):
        valid_moves = []
        for key in self.list_actions():
            if(self.__board[TicTacToe.actions[key]] == self.__good_move):
                valid_moves.append(key)
         
        num_poss_moves = len(valid_moves)
        if(num_poss_moves > 0):
            return valid_moves[randint(0, num_poss_moves-1)]
        else:
            return None
        
    #
    # Play a random game until completion.
    #
    def play(self):
        self.reset()
        plyr = (TicTacToe.player_X,TicTacToe.player_O)[randint(0,1)] # Random player to start
        mv = None
        while(not self.__game_over):
            mv = self.random_move()
            self.move(mv,plyr)
            plyr = self.next_player(plyr)
        return self.__draw
    
    #
    # Run simulation to estimate Q values for state, action pairs. Random exploration policy
    # which should be tractable with approx 6K valid board states.
    #
    def estimate_Q_values(self,num_simulations):
        learning_rate0 = 0.05
        learning_rate_decay = 0.1
        discount_rate = 0.95
        reward = 0
        s = None
        sp = None
        Q = {}
        sim = 0 
        while(sim < num_simulations):
            self.reset()
            plyr = (TicTacToe.player_X,TicTacToe.player_O)[randint(0,1)] # Random player to start
            mv = None
            while(not self.__game_over):
                s = self.state(tostr=True)
                mv = self.random_move()
                reward = self.move(mv,plyr)
                sp = self.state(tostr=True)
                learning_rate = learning_rate0 / (1 + (sim * learning_rate_decay))
                if s not in Q:
                    Q[s] = np.zeros((self.__num_actions))
                if sp not in Q:
                    Q[sp] = np.zeros((self.__num_actions))
                (Q[s])[mv-1] = learning_rate * (Q[s])[mv-1] + (1-learning_rate) * (reward + discount_rate * np.max(Q[sp]))
                plyr = self.next_player(plyr)
            if (sim % 100) == 0 : print(str(sim))
            sim += 1
        return Q
        

In [None]:
game = TicTacToe()
QV = game.estimate_Q_values(20000)


In [None]:
print(QV.keys())

In [None]:
game = TicTacToe()
n = 0
go = True
while(go and n < 100):
    go = game.play()
    sys.stdout.write(".")
    n+=1
print("\n")
print(game.board())



In [None]:
print(game.board())
print(np.sum(game.board(),axis=0))


In [None]:
bd =np.zeros((3, 3),np.int8)
pxm = np.full((3, 3),1,np.int8)
wn = np.full((1, 3),3,np.int8)
bd [0,0]=1
bd [0,1]=-1
bd [0,2]=-1
bd [1,0]=-1
bd [1,1]=1
bd [1,2]=-1
bd [2,0]=-1
bd [2,1]=1
bd [2,2]=-1
print(bd)
print((np.sum(np.abs(np.sum(bd,axis=0)) == wn) > 0))
print((np.sum(np.abs(np.sum(bd,axis=1)) == wn) > 0))

In [None]:
class testTicTacToe(unittest.TestCase):
    def test_one(self):
        self.assertEqual(1 == 1, True)

In [None]:
#
# Run A Single Test
#
test_to_run = "test_one"
suite = unittest.TestSuite()
suite.addTest(testTicTacToe(test_to_run))
runner = unittest.TextTestRunner()
runner.run(suite)

In [None]:
#
# Run All Tests.
#
tests = testTicTacToe()
suite = unittest.TestLoader().loadTestsFromModule(tests)
unittest.TextTestRunner().run(suite)

In [None]:
game = TicTacToe()
print(game.list_actions())
print(game.board())
print(game.state())


In [None]:
game.reset()
print(game.move(1,TicTacToe.player_X))
print(game.move(5,TicTacToe.player_X))
print(game.move(9,TicTacToe.player_X))

In [None]:
print(game.random_move())
print(game.random_move())
print(game.random_move())
print(game.random_move())
print(game.random_move())
print(game.random_move())
print(game.random_move())
print(game.random_move())

In [None]:
print(game)

In [None]:
print(list(TicTacToe.actions))

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation

In [None]:
class playTicTacToe:
    __actor_network_name = "net/actor"
    __critic_network_name = "net/critic"
    __x_units = 21

    def __init__(self):
        return
    
    #
    # Build a four layer fully connected Network.
    # 21 -> relu -> 21 -> relu -> 21 -> relu -> 21 -> relu
    #
    def construct_network(X_state,network_name):
        return