In [208]:
import numpy as np
import json
import random

list_index_win = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 3, 6], [1, 4, 7], [2, 5, 8], [0, 4, 8], [2, 4, 6]]

def get_reward(state):
    for win_combination in list_index_win:
        if sum(state[win_combination]) == 3: #*self.player:
            return 1
        elif sum(state[win_combination]) == -3: #*self.player:
            return -1
        elif len(np.where(state==0)[0]) == 0:
            # Draw
            return 0
    return None

def get_children(state, player):
    children = []
    available_positions = np.where(state==0)[0]
    for position in available_positions:
        new_state = state.copy()
        new_state[position] = player
        children.append(Node(new_state, -player))
    return children
class Node:
    def __init__(self, state, player):
        self.state = state
        self.player = player
        self.reward = get_reward(state)
        self.children = [] if self.reward else get_children(state, player)
    def __repr__(self):
        return f"State:{self.state}, player {self.player}, reward {self.reward}"


# recursion
def minimax(node:Node, depth, maximize):
    if depth ==0 or len(node.children)==0:
        return node.reward
    if maximize:
        maxeva=-float("inf")
        for children in node.children:
            eva = minimax(children, depth-1, False)
            maxeva = max(eva, maxeva)
        return maxeva
    else:
        mineva=+float("inf")
        for children in node.children:
            eva = minimax(children, depth-1, True)
            mineva = min(eva, mineva)

        return mineva
    

    
class TicTacToePlayerToJson:
    def __init__(self, state, player):
        self.tree = Node(state, player)
        self.state_dict = {}
        
    def minimax_save_dict(self, node, depth, maximize):
        if depth ==0 or len(node.children)==0:
            self.state_dict[str(node.state)] = {"minimax": node.reward, "next": []}
            return node.reward
        if maximize:
            maxeva=-float("inf")
            for children in node.children:
                eva = self.minimax_save_dict(children, depth-1, False)
                maxeva = max(eva, maxeva)
            self.state_dict[str(node.state)] = {"minimax": maxeva, "next": [str(children.state) for children in node.children]}
            return maxeva
        else:
            mineva=+float("inf")
            for children in node.children:
                eva = self.minimax_save_dict(children, depth-1, True)
                mineva = min(eva, mineva)
            self.state_dict[str(node.state)] = {"minimax": mineva, "next": [str(children.state) for children in node.children]}

            return mineva
    def choose_best(self):
        minimax_scores = []
        for children in self.tree.children:
            # maximize if player 1, minimize otherwise
            maximize = self.tree.player==1
            minimax_scores.append(self.minimax_save_dict(children,10, maximize))
        if maximize:
            # check position that is different
            pos = np.argwhere(self.tree.state != self.tree.children[np.argmax(minimax_scores)].state)[0][0]
            return pos, (self.tree.children[np.argmax(minimax_scores)], minimax_scores, self.tree.children)
        else:
            pos = np.argwhere(self.tree.state != self.tree.children[np.argmin(minimax_scores)].state)[0][0]
            return pos, (self.tree.children[np.argmin(minimax_scores)], minimax_scores, self.tree.children)
        
class TicTacToeLookUpSolver():
    def __init__(self, path):
        self.minimax_dict = self.load_minimax_dict(path)
        
    def load_minimax_dict(self, path):
        with open(path) as json_file:
            minimax_dict = json.load(json_file)
        return minimax_dict
        
    def convert_to_str(self, state):
        return str(np.array(state))
    
    def convert_str_to_position(self, state_string):
        return np.array(state_string.replace("[","").replace("]","").split(), dtype=int)
    
    def play(self, state, debug=False, difficulty=10):
        state_str = self.convert_to_str(state)
        # Perfect play, the minimax value of the position would be the minimax value of the child
        candidates = [next_state for next_state in self.minimax_dict[state_str]["next"] if self.minimax_dict[self.convert_to_str(next_state)]["minimax"]==self.minimax_dict[state_str]["minimax"]]
        pos = [np.argwhere(state != self.convert_str_to_position(next_state))[0][0] for next_state in candidates]
        if debug:
            print("Candidates:", candidates)
            print("Minimax", self.minimax_dict[state_str]["minimax"])
        if random.randint(0,10-difficulty)==0:
            ret = random.choice(pos)

        else:
            print("Randomly chosen")
            ret = random.choice(np.argwhere(np.array(state)==0))[0]
            
        
        return ret
        
        
                
# example                   
#player = TicTacToePlayer(np.array([1,0,0,0,0,0,0,0,0]), 1)
#player.choose_best()  
#pos, (node, minimax_scores, children) = player.choose_best()
#print(pos, node, minimax_scores, children)


In [209]:
random.randint(0,0)

0

In [201]:
random.choice(np.argwhere(np.array([0,1,0, 0])==0))[0]

0

In [136]:
np.array([0,0,0])==0

array([ True,  True,  True])

In [135]:
random.randint(1,1)

1

In [210]:
playerAI = TicTacToeLookUpSolver(path="../tictactoe_solver.json")

In [211]:
#player.minimax_dict['[0 0 0 0 0 0 0 0 0]']

In [212]:
playerAI.play([0,0,0,0,0,0,0,0,0])

4

In [213]:
from src.game import Connect2, Game, TicTacToe

In [214]:
random.choice(np.argwhere(np.array(game.board)==0))[0]

1

In [216]:
game=TicTacToe()
game.reset()
while True:

    game.play_mcts(playerAI.play(state=game.board, difficulty=0))
    game.render_table()
    input_player_1 = int(input("your action"))
    game.play_mcts(input_player_1)
    game.render_table()
    

Randomly chosen
[0, 0, 0, 0, 0, 0, 0, 0, 0]
<class 'list'>
[[0 1 0]
 [0 0 0]
 [0 0 0]] Next player: -1


your action 4


[[ 0  1  0]
 [ 0 -1  0]
 [ 0  0  0]] Next player: 1
Randomly chosen
[0, 1, 0, 0, -1, 0, 0, 0, 0]
<class 'list'>
[[ 0  1  0]
 [ 0 -1  0]
 [ 1  0  0]] Next player: -1


your action 8


[[ 0  1  0]
 [ 0 -1  0]
 [ 1  0 -1]] Next player: 1
Randomly chosen
[0, 1, 0, 0, -1, 0, 1, 0, -1]
<class 'list'>
[[ 0  1  0]
 [ 0 -1  1]
 [ 1  0 -1]] Next player: -1


your action 0


[[-1  1  0]
 [ 0 -1  1]
 [ 1  0 -1]] Next player: 1
Randomly chosen
[-1, 1, 0, 0, -1, 1, 1, 0, -1]
<class 'list'>
[[-1  1  0]
 [ 1 -1  1]
 [ 1  0 -1]] Next player: -1


KeyboardInterrupt: Interrupted by user

In [113]:
game=TicTacToe()
game.reset()
while True:
    input_player_1 = int(input("your action"))
    game.play_mcts(input_player_1)
    game.render_table()
    game.play_mcts(playerAI.play(game.board))
    game.render_table()

your action 4


[[0 0 0]
 [0 1 0]
 [0 0 0]] Next player: -1
[[ 0  0  0]
 [ 0  1  0]
 [ 0  0 -1]] Next player: 1


your action 1


[[ 0  1  0]
 [ 0  1  0]
 [ 0  0 -1]] Next player: -1
[[ 0  1  0]
 [ 0  1  0]
 [ 0 -1 -1]] Next player: 1


your action 6


[[ 0  1  0]
 [ 0  1  0]
 [ 1 -1 -1]] Next player: -1
[[ 0  1 -1]
 [ 0  1  0]
 [ 1 -1 -1]] Next player: 1


your action 0


[[ 1  1 -1]
 [ 0  1  0]
 [ 1 -1 -1]] Next player: -1
[[ 1  1 -1]
 [ 0  1 -1]
 [ 1 -1 -1]] Next player: 1


KeyboardInterrupt: Interrupted by user

In [26]:
# outcome, we want a dict with states and 

In [32]:
player = TicTacToePlayerToJson(np.array([0,0,0,0,0,0,0,0,0]), 1)

In [33]:
player.state_dict

{}

In [57]:
player.choose_best()

AttributeError: 'TicTacToeLookUpSolver' object has no attribute 'choose_best'

In [56]:
import pickle
with open(f"tree_tictactoe_old.pickle", "wb") as f:
    pickle.dump(player, f)

In [18]:
import json

In [35]:
with open('tictactoe.json', 'w') as outfile:
    json.dump(player.state_dict, outfile)

In [19]:
json(player.state_dict).dumps("tictacplayer.json")

TypeError: 'module' object is not callable

In [14]:
player.choose_best()

(2,
 (State:[ 1 -1  1  0  0  0  0  0  0], player -1, reward None,
  [0, 0, 0, 0, 0, 0, 0],
  [State:[ 1 -1  1  0  0  0  0  0  0], player -1, reward None,
   State:[ 1 -1  0  1  0  0  0  0  0], player -1, reward None,
   State:[ 1 -1  0  0  1  0  0  0  0], player -1, reward None,
   State:[ 1 -1  0  0  0  1  0  0  0], player -1, reward None,
   State:[ 1 -1  0  0  0  0  1  0  0], player -1, reward None,
   State:[ 1 -1  0  0  0  0  0  1  0], player -1, reward None,
   State:[ 1 -1  0  0  0  0  0  0  1], player -1, reward None]))

In [2]:
import pickle

In [41]:
import numpy as np

list_index_win = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 3, 6], [1, 4, 7], [2, 5, 8], [0, 4, 8], [2, 4, 6]]

class Node:
    def __init__(self, state, player):
        self.state = state
        self.player = player
        self.children = self.get_children(state)
    def __repr__(self):
        return f"State:{self.state}, player {self.player}, reward {self.reward}"
    def get_children(self, state):
        children = []
        available_positions = np.where(state==0)[0]
        self.reward = self.get_reward()
        if self.reward:
            # empty children
            pass
        elif len(available_positions)==0 and self.reward is None:
            self.reward = 0
        else: 
            for position in available_positions:
                new_state = state.copy()
                new_state[position] = self.player
                children.append(Node(new_state, -self.player))
        return children
                
    def get_reward(self):
        for win_combination in list_index_win:
            if sum(self.state[win_combination]) == 3: #*self.player:
                return 1
            elif sum(self.state[win_combination]) == -3: #*self.player:
                return -1
        return None

# recursion
def minimax(node:Node, depth, maximize):
    if depth ==0 or len(node.children)==0:
        return node.reward
    if maximize:
        maxeva=-float("inf")
        for children in node.children:
            eva = minimax(children, depth-1, False)
            maxeva = max(eva, maxeva)
        return maxeva
    else:
        mineva=+float("inf")
        for children in node.children:
            eva = minimax(children, depth-1, True)
            mineva = min(eva, mineva)

        return mineva
    
class TicTacToePlayer:
    def __init__(self, state, player):
        self.tree = Node(state, player)
    def choose_best(self):
        minimax_scores = []
        for children in self.tree.children:
            # maximize if player 1, minimize otherwise
            maximize = self.tree.player==1
            minimax_scores.append(minimax(children, 10, maximize))
        if maximize:
            # check position that is different
            pos = np.argwhere(self.tree.state != self.tree.children[np.argmax(minimax_scores)].state)[0][0]
            return pos, (self.tree.children[np.argmax(minimax_scores)], minimax_scores, self.tree.children)
        else:
            pos = np.argwhere(self.tree.state != self.tree.children[np.argmin(minimax_scores)].state)[0][0]
            return pos, (self.tree.children[np.argmin(minimax_scores)], minimax_scores, self.tree.children)
                
                    
            
        
    
        
    

In [8]:
np.argwhere(np.array([-1,1]) != np.array([-1,0]))[0][0]

1

In [42]:
%time player = TicTacToePlayer(np.array([0,0,0,0,0,0,0,0,0]), 1)

CPU times: user 22.3 s, sys: 570 ms, total: 22.9 s
Wall time: 23.1 s


In [32]:
player.choose_best()

(0,
 (State:[1 0 0 0 0 0 0 0 0], player -1, reward None,
  [1, 1, 1, 1, 0, 1, 1, 1, 1],
  [State:[1 0 0 0 0 0 0 0 0], player -1, reward None,
   State:[0 1 0 0 0 0 0 0 0], player -1, reward None,
   State:[0 0 1 0 0 0 0 0 0], player -1, reward None,
   State:[0 0 0 1 0 0 0 0 0], player -1, reward None,
   State:[0 0 0 0 1 0 0 0 0], player -1, reward None,
   State:[0 0 0 0 0 1 0 0 0], player -1, reward None,
   State:[0 0 0 0 0 0 1 0 0], player -1, reward None,
   State:[0 0 0 0 0 0 0 1 0], player -1, reward None,
   State:[0 0 0 0 0 0 0 0 1], player -1, reward None]))

In [135]:
state = np.array([0,0,0,0,1,0,0,0,0])
state.reshape(3,3)

array([[0, 0, 0],
       [0, 1, 0],
       [0, 0, 0]])

In [136]:
tree = Node(state, -1)

In [43]:
with open(f"tree_tictactoe_old.pickle", "wb") as f:
    pickle.dump(player, f)

In [137]:
tree.children

[State:[-1  0  0  0  1  0  0  0  0], player 1, reward None,
 State:[ 0 -1  0  0  1  0  0  0  0], player 1, reward None,
 State:[ 0  0 -1  0  1  0  0  0  0], player 1, reward None,
 State:[ 0  0  0 -1  1  0  0  0  0], player 1, reward None,
 State:[ 0  0  0  0  1 -1  0  0  0], player 1, reward None,
 State:[ 0  0  0  0  1  0 -1  0  0], player 1, reward None,
 State:[ 0  0  0  0  1  0  0 -1  0], player 1, reward None,
 State:[ 0  0  0  0  1  0  0  0 -1], player 1, reward None]

In [138]:
for children in tree.children[0].children:
    print(children, minimax(children, 10, children.player==1))

State:[-1  1  0  0  1  0  0  0  0], player -1, reward None 0
State:[-1  0  1  0  1  0  0  0  0], player -1, reward None 0
State:[-1  0  0  1  1  0  0  0  0], player -1, reward None 0
State:[-1  0  0  0  1  1  0  0  0], player -1, reward None 0
State:[-1  0  0  0  1  0  1  0  0], player -1, reward None 0
State:[-1  0  0  0  1  0  0  1  0], player -1, reward None 0
State:[-1  0  0  0  1  0  0  0  1], player -1, reward None 0


In [98]:
tree.children[0].minmax_score

In [10]:
np.where(np.array([0,0])==0)[0]

array([0, 1])

In [5]:
max(2,1)

2