In [8]:
from board import tick_tack_toe
import numpy as np

In [9]:
game2 = tick_tack_toe()
game2.avail_moves

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [10]:
game = tick_tack_toe()
a = game.play(1, 2)

In [11]:
list(a[0].values())

[2, 0, 0, 0, 0, 0, 0, 0, 0]

In [18]:
class Node:
    
    def __init__(self, state):
        
        self.state = state
        self.avail_moves = game.avail_moves
        self.parent = None
        self.children = []
        self.Q = 0
        self.N = 0
        self.W = 0
        self.P = 0
        self.v = 0
        
        if len(self.children) == 0:
            self.leaf_node = True        
        
class MCTS:        
    
    def __init__(self, states):
      
        self.states = states
        
    def simulate(self, state, path):
        
        #get current node
        current_node = self.states[tuple(state)]
        
        #path
        path.append(current_node.state)
        
        #exploit root or explore
        exp = np.random.choice([1, 0], p = [0.9, 0.1])
        
        if current_node.leaf_node:
            
            #use the neural network to calculate the p and v
            #P, v = current_model.predict(current_node) #check this once the neural network is programmed
            P = 0
            v = 0
            
            #update the node parameters
            self.update(current_node, P, v)
            current_node.parent = path[-1]
            self.states[tuple(current_node.state)] = current_node
            
            return P, v
        
        else:
            
            ucb = -inf
            
            for child in current_node.children:
                
                #calculate the ucb value for the child
                ucb_val = calc_ucb(child)
                
                if ucb_val > ucb:
                    
                    ucb = ucb_val
                    new_node = child       
            
            return self.simulate(new_node.state, path)
                
            
            
            
    def rand_move(self, node, player):
        
        #pic a random move from the available moves 
        #avail_moves = node.avail_moves
        #avail_moves = list(np.where(np.array([node.state]) == 0)[1])
        avail_moves = game.avail_moves

        #choose random move from available moves
        move = np.random.choice(avail_moves)       
        
        #change the state with the move
        node.state[move - 1] = player
        
        
        
        return node, move
        
        
    
    def calc_ucb(self, node):

        ucb_val = node.Q + node.P/(1 + node.N)

        return ucb_val

    def update(self, node, P = 0, v = 0):
        
        node.N += 1
        node.W = node.W + v
        node.Q = node.W/node.N
        node.P = P
        node.v = v
        

In [23]:
player = 2

#start game
game = tick_tack_toe()
root = game.play(1, player)

#root node
current_node = Node(list(root[0].values()))

#states
states = {}
states[tuple(root[0].values())] = current_node

#tree
tree = MCTS(states)

for i in range(8):
    
    if player == 2:
        player = 1
    elif player == 1:
        player = 2
    
    game_winner = ''
        
    path = []
    new_node, move = tree.rand_move(current_node, player)
    states[tuple(new_node.state)] = new_node
    tree.simulate(new_node.state, path)
    current_node.children.append(new_node)
    current_node = new_node   
    game.play(move, player)
    
    game.print_board()
    
    game_winner = game.check_winner()
    
    if game_winner == 1 or game_winner == 2:
        
        break

    |    |   
 2  | 0  | 0
____|____|___
    |    |  
 1  | 0  | 0
____|____|___
    |    |  
 0  | 0  | 0
    |    |  
    |    |   
 2  | 0  | 0
____|____|___
    |    |  
 1  | 0  | 0
____|____|___
    |    |  
 2  | 0  | 0
    |    |  
    |    |   
 2  | 0  | 1
____|____|___
    |    |  
 1  | 0  | 0
____|____|___
    |    |  
 2  | 0  | 0
    |    |  
    |    |   
 2  | 0  | 1
____|____|___
    |    |  
 1  | 0  | 0
____|____|___
    |    |  
 2  | 2  | 0
    |    |  
    |    |   
 2  | 1  | 1
____|____|___
    |    |  
 1  | 0  | 0
____|____|___
    |    |  
 2  | 2  | 0
    |    |  
    |    |   
 2  | 1  | 1
____|____|___
    |    |  
 1  | 0  | 0
____|____|___
    |    |  
 2  | 2  | 2
    |    |  


In [25]:
states

{(2, 0, 0, 0, 0, 0, 0, 0, 0): <__main__.Node at 0x248a78d7d30>,
 (2, 0, 0, 1, 0, 0, 0, 0, 0): <__main__.Node at 0x248a78d7d30>,
 (2, 0, 0, 1, 0, 0, 2, 0, 0): <__main__.Node at 0x248a78d7d30>,
 (2, 0, 1, 1, 0, 0, 2, 0, 0): <__main__.Node at 0x248a78d7d30>,
 (2, 0, 1, 1, 0, 0, 2, 2, 0): <__main__.Node at 0x248a78d7d30>,
 (2, 1, 1, 1, 0, 0, 2, 2, 0): <__main__.Node at 0x248a78d7d30>,
 (2, 1, 1, 1, 0, 0, 2, 2, 2): <__main__.Node at 0x248a78d7d30>}