In [1]:
%cd ..
%reload_ext autoreload
%autoreload 2

/mnt/d/ownCloud/Uni/Semester Ma 5/Advanced Deep Learning for Robotics (IN2349)/Project/tum-adlr-ws20-9


In [2]:
import pyspiel

from alpha_one.model.model_manager import OpenSpielCheckpointManager, AlphaOneCheckpointManager
from alpha_one.model.agent import HybridAlphaOneDMCTSAgent, DMCTSAgent, IIGMCTSAgent
from alpha_one.utils.state_to_value import state_to_value
from alpha_one.utils.statemask import get_state_mask
from alpha_one.utils.play import VerboseGameMachine
from alpha_one.utils.mcts import MCTSConfig
from alpha_one.utils.mcts_II import IIGMCTSConfig

Instructions for updating:
non-resource variables are not supported in the long term


 1) AlphaOne observation model + D-MCTS
 2) Use cheating model instead of game model in AlphaOne

# 1. Hybrid: AlphaOne observation model + D-MCTS

In [3]:
game_name = 'leduc_poker'

## 1.1. Load models

In [4]:
run_name_d_mcts = 'LP-DMCTS'
run_name_alpha_one = 'LP-local-43'

In [5]:
model_manager_d_mcts = OpenSpielCheckpointManager(game_name, run_name_d_mcts)
model_manager_alpha_one = AlphaOneCheckpointManager(game_name, run_name_alpha_one)

d_mcts_model = model_manager_d_mcts.load_checkpoint(-1)
observation_model = model_manager_alpha_one.load_observation_model_checkpoint(-1)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Restoring parameters from /home/tobias/Uni/Semester Ma 5/Advanced Deep Learning for Robotics (IN2349)/Project/model_saves/leduc_poker/LP-DMCTS/checkpoint-12
INFO:tensorflow:Restoring parameters from /home/tobias/Uni/Semester Ma 5/Advanced Deep Learning for Robotics (IN2349)/Project/model_saves/leduc_poker/LP-local-43-observation_model/checkpoint-11


## 1.2. Setup Agents

In [6]:
n_previous_observations = 3
state_to_value_dict = state_to_value(game_name)

In [7]:
mcts_config = MCTSConfig(
    uct_c=5,
    max_mcts_simulations=100,
    temperature=1,  # Differences in policies are more visible with temperature = 1
    determinized_MCTS=True,
    use_reward_policy=False)

In [8]:
hybrid_alpha_one_dmcts_agent = HybridAlphaOneDMCTSAgent(d_mcts_model, observation_model, mcts_config, state_to_value_dict, n_previous_observations)
d_mcts_agent = DMCTSAgent(d_mcts_model, mcts_config)

## 1.3. Play Game


In [9]:
game_machine = VerboseGameMachine(game_name)

game_machine.new_game()
game_machine.play_action(0)
game_machine.play_action(4)
game_machine.play_action(2)
game_machine.play_action(2)
game_machine.play_action(1)
game_machine.play_action(5)

In [10]:
hybrid_alpha_one_dmcts_agent.next_move(game_machine.get_information_set_generator())

(2, array([0.        , 0.68678541, 0.31321459]))

In [11]:
d_mcts_agent.next_move(game_machine.get_information_set_generator())

(1, array([0.        , 0.67972869, 0.32027131]))

# 2. Hybrid AlphaOne + Omniscient Model as game model

In [36]:
game_name = 'leduc_poker'
game = pyspiel.load_game(game_name)

## 2.1. Load Models

In [44]:
run_name_alpha_one = 'LP-local-43'
run_name_omniscient = 'LP-local-6'

In [50]:
model_manager_omniscient = OpenSpielCheckpointManager(game_name, run_name_omniscient)
model_manager_alpha_one = AlphaOneCheckpointManager(game_name, run_name_alpha_one)

omniscient_model = model_manager_omniscient.load_checkpoint(-1)
observation_model, game_model = model_manager_alpha_one.load_checkpoint(-1)

INFO:tensorflow:Restoring parameters from /home/tobias/Uni/Semester Ma 5/Advanced Deep Learning for Robotics (IN2349)/Project/model_saves/leduc_poker/LP-local-6/checkpoint-8
INFO:tensorflow:Restoring parameters from /home/tobias/Uni/Semester Ma 5/Advanced Deep Learning for Robotics (IN2349)/Project/model_saves/leduc_poker/LP-local-43-observation_model/checkpoint-11
INFO:tensorflow:Restoring parameters from /home/tobias/Uni/Semester Ma 5/Advanced Deep Learning for Robotics (IN2349)/Project/model_saves/leduc_poker/LP-local-43-game_model/checkpoint-11


## 2.2. Setup Agents

In [76]:
alpha_one_mcts_config = IIGMCTSConfig(
    uct_c=10,
    max_mcts_simulations=100,
    temperature=1,
    use_reward_policy=True,
    alpha_one=True,
    state_to_value=state_to_value_dict,
    n_previous_observations=3,
    optimism=0.1)

In [77]:
hybrid_alpha_one_agent = IIGMCTSAgent.from_config(game, observation_model, omniscient_model, alpha_one_mcts_config)
alpha_one_agent = IIGMCTSAgent.from_config(game, observation_model, game_model, alpha_one_mcts_config)

## 2.3. Play Game

In [78]:
game_machine = VerboseGameMachine(game_name)

game_machine.new_game()
game_machine.play_action(0)
game_machine.play_action(4)
game_machine.play_action(2)
game_machine.play_action(2)
game_machine.play_action(1)
game_machine.play_action(5)

In [89]:
hybrid_alpha_one_agent.next_move(game_machine.get_information_set_generator())

(1, array([0.        , 0.26522589, 0.73477411]))

In [90]:
alpha_one_agent.next_move(game_machine.get_information_set_generator())

(2, array([0.        , 0.01591915, 0.98408085]))