In [1]:
%cd ../..
%reload_ext autoreload
%autoreload 2

/mnt/d/ownCloud/Uni/Semester Ma 5/Advanced Deep Learning for Robotics (IN2349)/Project/tum-adlr-ws20-9


In [2]:
import pyspiel
import math
import numpy as np
import matplotlib.pyplot as plt
from alpha_one.model.model_manager import OpenSpielCheckpointManager
from alpha_one.train import MCTSConfig
from alpha_one.utils.state_to_value import state_to_value
from alpha_one.utils.mcts_II import initialize_bot_alphaone, ii_mcts_agent
from alpha_one.utils.determinized_mcts import initialize_bot, compute_mcts_policy
from alpha_one.game.information_set import InformationSetGenerator

Instructions for updating:
non-resource variables are not supported in the long term


In [None]:
game_name = "leduc_poker"
game = pyspiel.load_game(game_name)
state_to_value = state_to_value(game_name)

# Load Model

In [None]:
model_manager = OpenSpielCheckpointManager('alphaone/kuhn_poker', 'LP-local-1-observation_model')
observation_model = model_manager.load_checkpoint(-1)

In [None]:
model_manager = OpenSpielCheckpointManager('alphaone/kuhn_poker', 'LP-local-1-game_model')
game_model = model_manager.load_checkpoint(-1)

In [None]:
model_manager = OpenSpielCheckpointManager('dmcts/kuhn_poker', 'KP-local-1')
dmcts_model = model_manager.load_checkpoint(-1)

# Setup player

In [None]:
UCT_C = math.sqrt(2)
max_mcts_simulations = 100

In [None]:
alphaone_mcts_config = MCTSConfig(UCT_C, max_mcts_simulations, 0, None, None, None, 
                                  alpha_one=True, 
                                  state_to_value=state_to_value)

alphaone_bot = initialize_bot_alphaone(game, [observation_model, game_model], alphaone_mcts_config)

In [None]:
dmcts_mcts_config = MCTSConfig(UCT_C, max_mcts_simulations, 0, None, None, None, 
                               determinized_MCTS=True, 
                               omniscient_observer=True)

In [None]:
player_setup = {
    'd-mcts': 1,
    'alphaone': 0,
}
player_setup_reverse = {player_id:player_type for player_type, player_id in player_setup.items()}

# Play a game

In [None]:
correct_guess = 0
incorrect_guess = 0
game_returns = []
for _ in range(100):
    state = game.new_initial_state()
    information_set_generator = InformationSetGenerator(game)
    while not state.is_terminal():
        if state.current_player() < 0:
            action = np.random.choice(state.legal_actions())
            information_set_generator.register_action(action)
            state.apply_action(action)
            information_set_generator.register_observation(state)
        else:
            current_player_type = player_setup_reverse[state.current_player()]
        
            if current_player_type == 'd-mcts':
                policy = compute_mcts_policy(game, dmcts_model, state, 
                                             information_set_generator, 
                                             dmcts_mcts_config)
                action = np.argmax(policy)
                information_set_generator.register_action(action)
                state.apply_action(action)
                information_set_generator.register_observation(state)
            else:
                _, game_node_policy, guess_state, _ = ii_mcts_agent(information_set_generator, 
                                                                               alphaone_mcts_config, 
                                                                               alphaone_bot,
                                                                               game)
                if guess_state.__str__() == state.__str__():
                    correct_guess += 1
                else:
                    incorrect_guess += 1

                action = np.argmax(game_node_policy)

                information_set_generator.register_action(action)
                state.apply_action(action)
                information_set_generator.register_observation(state)
            
    game_returns.append(state.returns())
            
            

In [None]:
game_returns = np.array(game_returns)
average_return = game_returns.mean(axis=0)
print(f"Average return:")
print(f"---------------")
print(f"  {player_setup_reverse[0]}: {average_return[0]}")
print(f"  {player_setup_reverse[1]}: {average_return[1]}")
print(f" correct guess probability: {correct_guess/(correct_guess+incorrect_guess)}")