In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('..')

import isolation
import sample_players
import run_match
import my_baseline_player as custom

Populating the interactive namespace from numpy and matplotlib


In [2]:
isolation.isolation._ACTIONSET

{<Action.SSE: -27>,
 <Action.SSW: -25>,
 <Action.ESE: -15>,
 <Action.WSW: -11>,
 <Action.ENE: 11>,
 <Action.WNW: 15>,
 <Action.NNE: 25>,
 <Action.NNW: 27>}

## A table could be made to have "state-action" pairs as key, and winning frequency as value

In [3]:
book = dict()

In [4]:
class ProbePlayer(custom.CustomPlayer):
    """ A class that is used to get statistics about the game."""
    def __init__(self, player_id):
        super().__init__(player_id)
        self.book_depth = 4
        self.context = dict()
        self.context['book'] = dict()
    
    def get_action(self, state):
        print('Player {} has played like this: {}'.format(self.player_id,
                                                           self.context['book']))
        depth = 1
        while True:
            action = self.alpha_beta_search(state, depth)
            if depth <= self.book_depth:
                self.context['book'][state.ply_count] = (state.board, state.locs, action)
            self.queue.put(action)
            depth += 1

In [5]:
state = isolation.isolation.Isolation()

In [6]:
state

Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None))

In [7]:
state.board

41523161203939122082683632224299007

In [8]:
# Custom vs Custom
from time import time

run_match.TIME_LIMIT = 150

num_rounds = 1
num_procs = 4
agent1 = isolation.Agent(ProbePlayer, "ProbePlayer1")
agent2 = isolation.Agent(ProbePlayer, "ProbePlayer2")

tic = time()
wins, num_games = run_match.play_matches(agent1, 
                                         agent2, 
                                         num_rounds, 
                                         num_procs, 
                                         fair_matches=False)
toc = time()
print('Wins: {}, Games: {}, Ratio: {}'.format(wins, 
                                              num_games, 
                                              wins / num_games))
print('Total time: {}, Time per game: {}'.format((toc-tic), (toc-tic)/num_games))

Running 2 games:
Player 0 has played like this: {}
Player 0 has played like this: {}
-+
Wins: 1, Games: 2, Ratio: 0.5
Total time: 0.1831378936767578, Time per game: 0.0915689468383789


In [10]:
root = isolation.isolation.Isolation()

In [11]:
len(root.actions())

99

In [12]:
def get_full_tree(state, depth, s_a_set=None):
    if s_a_set == None:
        s_a_set = set()
    for action in state.actions():
        s_a_set.add((state, action))
        if depth > 1:
            s_a_set |= get_full_tree(state.result(action), depth - 1, s_a_set)
    return s_a_set

In [13]:
root = isolation.isolation.Isolation()
tree = get_full_tree(root, 1)

In [14]:
len(tree)

99

In [15]:
tree

{(Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  0),
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  1),
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  2),
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  3),
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  4),
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  5),
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  6),
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  7),
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  8),
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  9),
 (Isolation(board=41523161203939122082683632224299007, ply_c

In [16]:
def get_empty_book(state, depth):
    tree = get_full_tree(state, depth)
    return {key: 0 for key in tree}

In [17]:
root = isolation.isolation.Isolation()
book = get_empty_book(root, 1)
book

{(Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  0): 0,
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  1): 0,
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  2): 0,
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  3): 0,
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  4): 0,
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  5): 0,
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  6): 0,
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  7): 0,
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  8): 0,
 (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)),
  9): 0,
 (Isolation(board=415231612039

In [18]:
from time import time

times = list()
for depth in range(1,5):
    tic = time()
    root = isolation.isolation.Isolation()
    tree = get_full_tree(root, depth)
    toc = time()
    times.append(toc-tic)

In [19]:
times

[0.00013017654418945312,
 0.017016887664794922,
 0.292896032333374,
 1.8841850757598877]

In [20]:
tree

{(Isolation(board=41198642638191437159754184456660991, ply_count=3, locs=(108, 83)),
  <Action.ESE: -15>),
 (Isolation(board=40143957350286345584047904549496831, ply_count=3, locs=(106, 110)),
  <Action.WSW: -11>),
 (Isolation(board=41198632737088974515941243613079551, ply_count=3, locs=(108, 83)),
  <Action.ESE: -15>),
 (Isolation(board=41198484184284260270308255718172671, ply_count=3, locs=(108, 83)),
  <Action.ESE: -15>),
 (Isolation(board=41523161203939122082683630076618751, ply_count=3, locs=(31, 17)),
  <Action.ESE: -15>),
 (Isolation(board=41523161203937941491027730440906719, ply_count=3, locs=(45, 5)),
  <Action.WNW: 15>),
 (Isolation(board=41523161203939122082683626855589879, ply_count=3, locs=(30, 32)),
  <Action.WSW: -11>),
 (Isolation(board=41523161203939122073672032775563263, ply_count=3, locs=(31, 53)),
  <Action.SSW: -25>),
 (Isolation(board=41520625901529739768237212624218111, ply_count=3, locs=(55, 101)),
  <Action.ENE: 11>),
 (Isolation(board=4152316120393912207818003

In [21]:
agent_names = ('CustomPlayer1', 'CustomPlayer2')
agent1 = isolation.Agent(custom.CustomPlayer, agent_names[0])
agent2 = isolation.Agent(custom.CustomPlayer, agent_names[1])
agents = (agent1, agent2)

state = isolation.isolation.Isolation()
time_limit = 150
match_id = 0
winner, game_history, match_id = isolation.play((agents,
                                                state,
                                                time_limit,
                                                match_id))

In [22]:
winner.name

'CustomPlayer1'

In [23]:
agent_names.index(winner.name)

0

In [24]:
game_history

[4,
 1,
 <Action.NNE: 25>,
 <Action.NNE: 25>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.NNE: 25>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.WNW: 15>,
 <Action.SSW: -25>,
 <Action.WSW: -11>,
 <Action.SSW: -25>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.NNW: 27>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.NNE: 25>,
 <Action.SSE: -27>,
 <Action.NNE: 25>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.WSW: -11>,
 <Action.SSW: -25>,
 <Action.NNE: 25>,
 <Action.SSE: -27>,
 <Action.ENE: 11>,
 <Action.ENE: 11>,
 <Action.SSE: -27>,
 <Action.ENE: 11>,
 <Action.SSW: -25>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.NNE: 25>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.NNE: 25>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.SSE: -27>,
 <Action.NNE: 25>,
 <Action.SSW: -25>,
 <Action.WSW: -11>,
 <Action.SSW: -25>,
 <Action.SSW: -25>,
 <Action.WSW: -11>,
 <Action.SSW: -25>,
 <Action.NNE: 25>,
 <Action.ENE: 11>,
 <Action.WSW: -11>,
 <Action.SSW: -25>,
 <Action.SSW: -2

In [25]:
len(game_history)

74

In [26]:
b = True
2*b-1

1

In [27]:
b = False
2*b-1

-1

In [46]:
def process_game_history(state, 
                         game_history, 
                         book, 
                         winner_id, 
                         active_player=0):
    """ Given an initial state, and a list of actions, this function iterates
    through the resulting states of the actions and updates count of wins in 
    the state/action book"""
    game_value = 2 * (active_player == winner_id) - 1
    curr_state = state  # It is a named tuple, so I think it is immutable. No need to copy.
    for action in game_history:
        print('Checking {}'.format((curr_state, action)))
        if (curr_state, action) in book.keys():
            print('Updating {} with {}'.format((curr_state, action), game_value))
            book[(curr_state, action)] += game_value
        curr_state = curr_state.result(action)
        active_player = 1 - active_player
        game_value = 2 * (active_player == winner_id) - 1
        # TODO: Break on depth equal to book

In [53]:
root = isolation.isolation.Isolation()
book = get_empty_book(root, 4)
print(sum(abs(value) for value in book.values()))
process_game_history(root, game_history, book, agent_names.index(winner.name))

0
Checking (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)), 4)
Updating (Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)), 4) with 1
Checking (Isolation(board=41523161203939122082683632224298991, ply_count=1, locs=(4, None)), 1)
Updating (Isolation(board=41523161203939122082683632224298991, ply_count=1, locs=(4, None)), 1) with -1
Checking (Isolation(board=41523161203939122082683632224298989, ply_count=2, locs=(4, 1)), <Action.NNE: 25>)
Updating (Isolation(board=41523161203939122082683632224298989, ply_count=2, locs=(4, 1)), <Action.NNE: 25>) with 1
Checking (Isolation(board=41523161203939122082683631687428077, ply_count=3, locs=(29, 1)), <Action.NNE: 25>)
Updating (Isolation(board=41523161203939122082683631687428077, ply_count=3, locs=(29, 1)), <Action.NNE: 25>) with -1
Checking (Isolation(board=41523161203939122082683631620319213, ply_count=4, locs=(29, 26)), <Action.NNE: 25>)
Checking (Isolation(board=41523161

In [54]:
sum(abs(value) for value in book.values())

4

## IDEA: Iterative Time Limit increase

## IDEA 2: Add the book info to the players and repeat the process

## IDEA 3: Play against Monte Carlo Player