In [79]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys
from time import time

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('..')

import isolation
import sample_players
import run_match
import my_baseline_player as custom
import book as b

Populating the interactive namespace from numpy and matplotlib
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


## I estimate 10s per game. 100 starting positions, 100 secondary starting positions, then 10000 openings. 4 threads, and symmetries that produce x4 data. If I want 12 points per opening, then that would be:

In [80]:
estimated_seconds = 10000 * 12 * 10/ (4 * 4)
estimated_hours = estimated_seconds / 3600
print(estimated_hours)

20.833333333333332


### The plan is as follows:
 - Create a book (or load previously saved)
 - for each starting action for player 1 (100) and each starting action for player 2 (100) run 3 experiments (DETERMINISTIC BOOK FILLING).
 - Run epsilon-greedy algorithm to make a STOCHASTIC BOOK FILLING (using the opening book up to its depth [1-epsilon of the time]). Reduce epsilon exponentially to zero.

In [81]:
book = b.load_latest_book(depth=4)

In [82]:
type(book)

dict

In [83]:
sum(abs(value) for value in book.values())

0

In [84]:
#book  # book -> {(state, action): counts}

In [85]:
agent_names = ('CustomPlayer1', 'CustomPlayer2')
agent1 = isolation.Agent(custom.CustomPlayer, agent_names[0])
agent2 = isolation.Agent(custom.CustomPlayer, agent_names[1])
agents = (agent1, agent2)

state = isolation.isolation.Isolation()
time_limit = 150
match_id = 0

tic = time.time()
winner, game_history, match_id = isolation.play((agents,
                                                state,
                                                time_limit,
                                                match_id))
toc = time.time()
print('Elapsed time: {}'.format((toc-tic)))

Elapsed time: 11.986933946609497


In [86]:
root = isolation.isolation.Isolation()
opening_states = list(b.get_full_states(root, depth=2))
print(type(opening_states))
print(len(opening_states))

<class 'list'>
9801


In [87]:
len([s for s in opening_states if s.ply_count==1])

99

In [88]:
[s for s in opening_states if s.ply_count==0]

[]

In [89]:
99*99

9801

In [90]:
opening_states[0]

Isolation(board=41523161184596308896791971391072255, ply_count=2, locs=(84, 56))

### Let's generate the corresponding matches

In [179]:
# Constant parameteres
time_limit = 150
depth = 4
full_search_depth = 2
matches_per_opening = 3

# Create the agents that will play
agent_names = ('CustomPlayer1', 'CustomPlayer2')
agent1 = isolation.Agent(custom.CustomPlayer, agent_names[0])
agent2 = isolation.Agent(custom.CustomPlayer, agent_names[1])
agents = (agent1, agent2)

# Get the initial states
root = isolation.isolation.Isolation()
opening_states = list(b.get_full_states(root, depth=full_search_depth))

# Generate the matches
matches = [(agents, state, time_limit, match_id) 
           for match_id, state in enumerate(opening_states)]
matches = matches * 3
print('Generated {} matches.'.format(len(matches)))

# Create or load the book
book = b.load_latest_book(depth=depth)

Generated 29403 matches.


In [92]:
matches[0]

((Agent(agent_class=<class 'my_baseline_player.CustomPlayer'>, name='CustomPlayer1'),
  Agent(agent_class=<class 'my_baseline_player.CustomPlayer'>, name='CustomPlayer2')),
 Isolation(board=41523161184596308896791971391072255, ply_count=2, locs=(84, 56)),
 150,
 0)

In [93]:
def active_player(state):
    return state.ply_count % 2

In [94]:
active_player(matches[0][1])

0

In [198]:
batch_size = 10
x = list(range(10,45))
batches = [x[i*batch_size:(i+1)*batch_size] 
           for i in range(len(x) // batch_size + (len(x) % batch_size != 0))]
batches

[[10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
 [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
 [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
 [40, 41, 42, 43, 44]]

In [181]:
l = [1,2,3,445]

In [184]:
isinstance(l[3], int)

True

In [177]:
l.insert(0,45)
l

[45, 45, 1, 2, 3, 445]

In [192]:
from multiprocessing.pool import ThreadPool as Pool
num_processes = 1
batch_size = 10

# Small test for debugging
matches = matches[:10]

results = []
pool = Pool(num_processes)
tic = time.time()
for result in pool.imap_unordered(isolation.play, matches):
    results.append(result)
    winner, game_history, match_id = result
    print('Results for match {}: {} wins.'.format(match_id, winner.name))
    _, state, _, _ = matches[match_id]
    if state.locs[1] is not None:
        game_history.insert(0,state.locs[1])
    if state.locs[0] is not None:
        game_history.insert(0,state.locs[0])
    root = isolation.isolation.Isolation()
    print(game_history)
    b.process_game_history(root,
                           game_history, 
                           book,
                           agent_names.index(winner.name),
                           active_player=state.ply_count % 2,
                           depth=depth)
toc = time.time()
print('Elapsed time {} seconds.'.format((toc-tic)))

Results for match 0: CustomPlayer1 wins.
[84, 56, <Action.ENE: 11>, <Action.NNE: 25>, <Action.SSW: -25>, <Action.ENE: 11>, <Action.ESE: -15>, <Action.SSE: -27>, <Action.WSW: -11>, <Action.WSW: -11>, <Action.NNE: 25>, <Action.NNE: 25>, <Action.SSE: -27>, <Action.SSE: -27>, <Action.WSW: -11>, <Action.WSW: -11>, <Action.NNW: 27>, <Action.WSW: -11>, <Action.WSW: -11>, <Action.SSW: -25>, <Action.NNE: 25>, <Action.ENE: 11>, <Action.NNE: 25>, <Action.NNW: 27>, <Action.ESE: -15>, <Action.SSW: -25>, <Action.SSW: -25>, <Action.NNW: 27>, <Action.SSW: -25>, <Action.SSW: -25>, <Action.WSW: -11>, <Action.WNW: 15>, <Action.NNE: 25>, <Action.NNE: 25>, <Action.NNE: 25>, <Action.SSE: -27>, <Action.NNE: 25>, <Action.SSE: -27>, <Action.ENE: 11>, <Action.ENE: 11>, <Action.SSE: -27>, <Action.ESE: -15>, <Action.NNE: 25>, <Action.NNW: 27>, <Action.WSW: -11>, <Action.ENE: 11>, <Action.WNW: 15>, <Action.SSW: -25>, <Action.WSW: -11>, <Action.WSW: -11>, <Action.SSW: -25>, <Action.WNW: 15>, <Action.SSW: -25>, <Act

Results for match 5: CustomPlayer2 wins.
[93, 17, <Action.SSE: -27>, <Action.NNE: 25>, <Action.SSW: -25>, <Action.ENE: 11>, <Action.NNW: 27>, <Action.NNW: 27>, <Action.SSW: -25>, <Action.WSW: -11>, <Action.SSW: -25>, <Action.SSW: -25>, <Action.NNW: 27>, <Action.SSW: -25>, <Action.NNE: 25>, <Action.ENE: 11>, <Action.WNW: 15>, <Action.NNW: 27>, <Action.ENE: 11>, <Action.WSW: -11>, <Action.ENE: 11>, <Action.WNW: 15>, <Action.SSW: -25>, <Action.ENE: 11>, <Action.ESE: -15>, <Action.NNE: 25>, <Action.NNE: 25>, <Action.ENE: 11>, <Action.WSW: -11>, <Action.SSW: -25>, <Action.SSW: -25>, <Action.SSW: -25>, <Action.SSE: -27>, <Action.SSE: -27>, <Action.ENE: 11>, <Action.ESE: -15>, <Action.SSE: -27>, <Action.ENE: 11>, <Action.WSW: -11>]
State: Isolation(board=41523161203939122082683632224299007, ply_count=0, locs=(None, None)) 
 Action: 93


Got an int action: loc_sym = 99
Got an int action: loc_sym = 15
Got an int action: loc_sym = 21
State: Isolation(board=41523151300418807799641433031305215, pl

In [187]:
sum(abs(value) for value in book.values())

19

In [173]:
seconds = 29403 * 37 / 10
print('{} seconds'.format(seconds))
print('{} hours'.format(seconds/3600))

108791.1 seconds
30.21975 hours


In [174]:
game_history

[<Action.WSW: -11>,
 <Action.WSW: -11>,
 <Action.WNW: 15>,
 <Action.NNE: 25>,
 <Action.NNE: 25>,
 <Action.NNE: 25>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.WSW: -11>,
 <Action.NNE: 25>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.SSE: -27>,
 <Action.SSW: -25>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.WSW: -11>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.WSW: -11>,
 <Action.SSE: -27>,
 <Action.NNE: 25>,
 <Action.WSW: -11>,
 <Action.NNE: 25>,
 <Action.ESE: -15>,
 <Action.ESE: -15>,
 <Action.NNE: 25>,
 <Action.ESE: -15>,
 <Action.WSW: -11>,
 <Action.SSE: -27>,
 <Action.NNE: 25>,
 <Action.SSW: -25>,
 <Action.SSE: -27>,
 <Action.SSW: -25>,
 <Action.WSW: -11>,
 <Action.ENE: 11>,
 <Action.WNW: 15>,
 <Action.NNW: 27>,
 <Action.NNE: 25>,
 <Action.SSW: -25>,
 <Action.WSW: -11>,
 <Action.ENE: 11>,
 <Action.NNE: 25>,
 <Action.SSW: -25>,
 <Action.NNE: 25>,
 <Action.WNW: 15>,
 <Action.NNE: 25>,
 <Action.NNE: 25>,
 <Action.SSE: -27>,
 <Action.NNE: 25>,
 <Action.NNE: 25>,
 <Act

## Let's add the symmetry conditions to the game processing

In [108]:
s_a = list(book.keys())[0]
s_a

(Isolation(board=41198632737088974515941243613079551, ply_count=3, locs=(108, 83)),
 <Action.ESE: -15>)

In [142]:
W, H = 11, 9

def h_symmetry(loc):
    if loc is None:
        return None
    row = loc // (W + 2)
    center = W + (row - 1) * (W + 2) + (W + 2) // 2 + 1 if row != 0 else W // 2
    return 2 * center - loc

In [143]:
h_symmetry(28)

2
31


34

In [144]:
h_symmetry(1)

0
5


9

In [118]:
center = (H // 2) * (W + 2) + W // 2
center

57

In [119]:
def c_symmetry(loc):
    if loc is None:
        return None
    center = (H // 2) * (W + 2) + W // 2
    return 2 * center - loc

In [120]:
c_symmetry(81)

33

In [122]:
c_symmetry(67)

47

In [158]:
def v_symmetry(loc):
    if loc is None:
        return None
    col = loc % (W + 2)
    center = (H // 2) * (W + 2) + col
    return 2 * center - loc

In [159]:
v_symmetry(2)

106

In [160]:
v_symmetry(28)

80

In [161]:
v_symmetry(48)

74

In [162]:
v_symmetry(86)

34

In [163]:
symmetric = b.sym_sa(s_a, loc_sym=h_symmetry, cardinal_sym=b.cardinal_sym_h)
symmetric

8
109
6
83


(Isolation(board=40225086979634008618633974744344575, ply_count=3, locs=(110, 83)),
 <Action.WSW: -11>)

In [146]:
print(isolation.DebugState.from_state(s_a[0]))


+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   | 1 |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   | X |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   | 2 |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +



In [147]:
print(isolation.DebugState.from_state(symmetric[0]))


+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   | 1 |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   | 2 |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +
|   |   |   |   |   |   |   |   |   |   |   |
+ - + - + - + - + - + - + - + - + - + - + - +



In [164]:
def process_game_history(state,
                         game_history,
                         book,
                         winner_id,
                         active_player=0,
                         depth=4):
    """ Given an initial state, and a list of actions, this function iterates
    through the resulting states of the actions and updates count of wins in
    the state/action book"""
    OPENING_MOVES = 2
    game_value = 2 * (active_player == winner_id) - 1
    curr_state = state  # It is a named tuple, so I think it is immutable. No need to copy.
    for num_action, action in enumerate(game_history):
        if (curr_state, action) in book.keys():
            book[(curr_state, action)] += game_value
            if curr_state.ply_count <= OPENING_MOVES:
                book[b.sym_sa((curr_state, action), 
                              loc_sym=h_symmetry,
                              cardinal_sym=b.cardinal_sym_h)] += game_value
                book[b.sym_sa((curr_state, action), 
                              loc_sym=v_symmetry,
                              cardinal_sym=b.cardinal_sym_v)] += game_value
                book[b.sym_sa((curr_state, action), 
                              loc_sym=c_symmetry,
                              cardinal_sym=b.cardinal_sym_c)] += game_value
        curr_state = curr_state.result(action)
        active_player = 1 - active_player
        game_value = 2 * (active_player == winner_id) - 1
        # Break on depth equal to book
        if num_action >= depth - 1:
            break