In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys
from time import time
import os

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('..')

import isolation
import sample_players
import run_match
import my_baseline_player as custom
import book as b

from multiprocessing.pool import ThreadPool as Pool
import pickle

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [1]:
def new_node(state, parent=None, from_action=None, N=0, Q=0, children=None):
    node = {'state': state,
            'parent': parent,
            'from_action': from_action,
            'N': N,
            'Q': Q
            }
    if children is None:
        node['children'] = list()
    else:
        node['children'] = children
    if parent is not None:
        parent['children'].append(node)
    return node

def ucb(node, c=1):
    if node['N'] == 0:
        if c != 0:
            return float('inf')
        else:
            return 0
    return (node['Q'] / node['N']) + c * math.sqrt((2 * math.log(node['N'])) / node['N'])


def best_node(node_list, c=1):
    return max(node_list, key=partial(CustomPlayer.ucb, c=c))


def best_child(node, c=1):
    return CustomPlayer.best_node(node['children'], c)


def expand(node):
    a = random.choice(CustomPlayer.unexplored_actions(node))
    child = CustomPlayer.new_node(state=node['state'].result(a),
                                  parent=node,
                                  from_action=a)
    node['children'].append(child)
    return child


def unexplored_actions(node):
    return list(set(node['state'].actions()) -
                set(child['from_action'] for child in node['children']))


def is_fully_expanded(node):
    return len(CustomPlayer.unexplored_actions(node)) == 0


def tree_policy(node, nodes=None):
    if nodes is None:
        nodes = dict()
    temp_node = node
    while not temp_node['state'].terminal_test():
        if CustomPlayer.is_fully_expanded(temp_node):
            temp_node = CustomPlayer.best_child(temp_node)
        else:
            child = CustomPlayer.expand(temp_node)
            if child['state'] not in nodes.keys():
                nodes[child['state']] = [child]
            else:
                nodes[child['state']].append(child)
            return child
    return temp_node


def backup_negamax(node, reward):
    temp_node = node
    temp_node['N'] += 1
    temp_node['Q'] += reward
    while temp_node['parent'] is not None:
        temp_node = temp_node['parent']
        temp_node['N'] += 1
        temp_node['Q'] += reward


def get_root_node(state, nodes=None):
    """
    nodes = {
        state1: [node1_1, node1_2],
        state2: [node2_1],
        state3: [node3_1, node3_2, node3_3]
    }
    """
    if nodes is None:
        nodes = dict()
    if state not in nodes.keys():
        node = CustomPlayer.new_node(state)
        nodes[state] = [node]
        return node, nodes
    else:
        return CustomPlayer.best_node(nodes[state]), nodes

def uct_search(self, root_n, nodes=None):
    """
    nodes contains a dictionary of state:node with the nodes that have
    already been visited.
    """
    edge_node = CustomPlayer.tree_policy(root_n, nodes)
    reward = self.default_policy(edge_node['state'])
    CustomPlayer.backup_negamax(edge_node, reward)
    return CustomPlayer.best_child(root_n, 0)['from_action'], nodes  # Greedy, so c = 0

def default_policy(self, state):
    temp_state = state
    while not temp_state.terminal_test():
        a = random.choice(temp_state.actions())
        temp_state = temp_state.result(a)
    return temp_state.utility(self.player_id)

In [11]:
with open('first_book.pkl', 'rb') as file:
    data = pickle.load(file)

In [12]:
data

{(Isolation(board=41198632737088974515941243613079551, ply_count=3, locs=(108, 83)),
  <Action.ESE: -15>): 0,
 (Isolation(board=40143957350286345584047904549496831, ply_count=3, locs=(106, 110)),
  <Action.WSW: -11>): 0,
 (Isolation(board=41523161126567869041879413484939263, ply_count=3, locs=(86, 53)),
  <Action.SSW: -25>): 0,
 (Isolation(board=41523161203939122082683630076618751, ply_count=3, locs=(31, 17)),
  <Action.ESE: -15>): 0,
 (Isolation(board=41523161203939122082683626855589879, ply_count=3, locs=(30, 32)),
  <Action.WSW: -11>): 0,
 (Isolation(board=41198484184284260270308255718172671, ply_count=3, locs=(108, 83)),
  <Action.ESE: -15>): 0,
 (Isolation(board=41520625901529739768237212624218111, ply_count=3, locs=(55, 101)),
  <Action.ENE: 11>): 0,
 (Isolation(board=41523161203939122082679234042521599, ply_count=3, locs=(42, 20)),
  <Action.NNW: 27>): 0,
 (Isolation(board=41523161203939122078180032462645247, ply_count=3, locs=(52, 16)),
  <Action.WNW: 15>): 0,
 (Isolation(board