In [1]:
import random
import copy
import numpy as np
import pyspiel
from open_spiel.python.algorithms import mcts, random_agent
from othello_game import OthelloGame, OthelloState
from MCTS import MCTS
from BasicModel import BasicModel
from Arena import Arena
from Coach import Coach

In [5]:
game = pyspiel.load_game("othello_nxn")

def get_alpha_zero_player():
    class AlphaZeroPlayer:
        def __init__(self, game):
            self.game = game
            self.nnet = BasicModel()
            self.mcts = MCTS(game, self.nnet, numMCTSSims=500)

        def step(self, state):
            probs = self.mcts.getActionProb(state, temp=1)
            # action = np.random.choice(len(probs), p=probs)
            action = np.argmax(probs)
            return action

    return AlphaZeroPlayer(game)

def get_random_player():
    class RandomPlayer:
        def __init__(self, game):
            self.game = game

        def step(self, state):
            legal_actions = state.legal_actions()
            action = random.choice(legal_actions)
            return action

    return RandomPlayer(game)

def get_mcts_player():
    class MCTSPlayer:
        def __init__(self, game):
            self.game = game
            evaluator = mcts.RandomRolloutEvaluator(n_rollouts=1)
            self.mcts = mcts.MCTSBot(game, uct_c=2, max_simulations=10, evaluator=evaluator)

        def step(self, state):
            action = self.mcts.step(state)
            return action

    return MCTSPlayer(game)

arena = Arena(game, get_mcts_player, get_random_player)
arena.playGames(20, verbose=True)

Arena.playGames (1): 100%|██████████| 10/10 [00:06<00:00,  1.61it/s]
Arena.playGames (2): 100%|██████████| 10/10 [00:05<00:00,  1.73it/s]


(16, 3, 1)

In [2]:
game = pyspiel.load_game("othello_nxn")
nnet = BasicModel()
coach = Coach(game, nnet)
coach.self_play()

[[array([[ 0,  0,  0,  0,  0,  0],
         [ 0,  0,  0,  0,  0,  0],
         [ 0,  0, -1,  1,  0,  0],
         [ 0,  0,  1, -1,  0,  0],
         [ 0,  0,  0,  0,  0,  0],
         [ 0,  0,  0,  0,  0,  0]], dtype=int8),
  0,
  array([0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.25252524, 0.        ,
         0.        , 0.        , 0.        , 0.24242425, 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.25252524, 0.        , 0.        ,
         0.        , 0.        , 0.25252524, 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        ], dtype=float32),
  -1.0],
 [array([[ 0,  0,  0,  0,  0,  0],
         [ 0,  0,  0,  0,  0,  0],
         [ 0,  0,  1, -1,  0,  0],
         [ 0,  0, -1,  1,  0,  0],
         [ 0,  0,  0,  0,  0,  0],
         [ 0,  0,  0,  0,  0,  0]], dtype=int8),
  0,
  array([0. 

In [71]:
game = pyspiel.load_game("othello_nxn")
state = game.new_initial_state()
initial_state = copy.deepcopy(state)

nnet = BasicModel()
mcts = MCTS(game, nnet, numMCTSSims=500)

# mcts.getActionProb(state)

while not state.is_terminal():
    if state.current_player() == 0:
        probs = mcts.getActionProb(state)
        state.apply_action(np.argmax(probs))
        # state.apply_action(np.argmin(probs))
        # print("Es")
        # for key, value in mcts.Es.items():
        #     if value != 0:
        #         print(key)
        #         print(value)
        # print("Qsa")
        # for key, value in mcts.Qsa.items():
        #     print(key[0])
        #     print(key[1])
        #     print(value)
        # break
    else:
        action = random.choice(state.legal_actions())
        state.apply_action(action)

print(state.returns())
# print(np.array(state.observation_tensor(0)).reshape((2, 4, 4)))

[1.0, -1.0]


In [60]:
# s = """. . . .
# . O X .
# . X O .
# . . . ."""

s = str(initial_state)

children = mcts.tree[s]

while len(children) > 0:
    next_state, action = random.choice(list(children))
    print(next_state)
    print(action)
    print(mcts.Qsa[(s, action)])
    if next_state in mcts.tree:
        children = mcts.tree[next_state]
    else:
        children = []
    s = next_state

. . . . . .
. . . . . .
. . O X . .
. . X X . .
. . . X . .
. . . . . .
27
0.04188034188034187
. . . . . .
. . . . . .
. . O X . .
. . X O . .
. . . X O .
. . . . . .
28
-0.047222222222222214
. . . . . .
. . . . . .
. . O X . .
. . X O . .
. . . X X X
. . . . . .
29
0.06313131313131314
. . . . . .
. . . . . .
. . O X . .
. . X O . .
. . . X O X
. . . . . O
35
-0.0


In [3]:
np.array(state.observation_tensor(0)).reshape(2, 4, 4)

array([[[0., 1., 0., 0.],
        [0., 1., 1., 0.],
        [0., 1., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 0.]]])