In [None]:
%cd ../..
%reload_ext autoreload
%autoreload 2

In [None]:
import pyspiel
import numpy as np

from alpha_one.model.model_manager import AlphaOneCheckpointManager
from alpha_one.utils.play import VerboseGameMachine
from alpha_one.utils.mcts_II import IIGMCTSConfig
from alpha_one.utils.state_to_value import state_to_value
from alpha_one.utils.statemask import get_state_mask
from alpha_one.model.agent import DirectInferenceAgent, IIGMCTSAgent

# 1. Game Setup

In [None]:
game_name = 'leduc_poker'
run_name = 'LP-13'

game = pyspiel.load_game(game_name)

In [None]:
model_manager = AlphaOneCheckpointManager(game_name, run_name)
observation_model, game_model = model_manager.load_checkpoint(-1)

# 2. Model Setup

In [None]:
n_previous_observations = 3

In [None]:
alpha_one_mcts_config = IIGMCTSConfig(
    uct_c=5,
    max_mcts_simulations=100,
    temperature=1,
    optimism=0.1,
    n_previous_observations=3,
    use_reward_policy=True,
    alpha_one=True,
    state_to_value=state_to_value(game_name))

In [None]:
direct_inference_agent = DirectInferenceAgent(observation_model, n_previous_observations=n_previous_observations)
alpha_one_agent = IIGMCTSAgent.from_config(game, observation_model, game_model, alpha_one_mcts_config)

# 3. Investigate specific Game Scenarios

In [None]:
game_machine = VerboseGameMachine(game)

In [None]:
game_machine.new_game()

game_machine.play_action(0)
game_machine.play_action(1)
game_machine.play_action(2)
game_machine.play_action(1)
game_machine.play_action(5)

In [None]:
game_machine.list_player_actions()

In [None]:
obs = [game_machine.information_set_generator.get_padded_observation_history(n_previous_observations)]

information_set = game_machine.information_set_generator.calculate_information_set()
state_mask, _ = get_state_mask(alpha_one_mcts_config.state_to_value, information_set)

In [None]:
value, policy = observation_model.inference(obs,  [state_mask])

In [None]:
policy[0][state_mask]

In [None]:
alpha_one_agent.next_move(game_machine.information_set_generator)

In [None]:
alpha_one_agent.get_last_state_policy()