In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys
from time import time
import os

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('..')

import isolation
import sample_players
import run_match
import my_baseline_player as custom
import book as b

from multiprocessing.pool import ThreadPool as Pool

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [2]:
class Node:
    def __init__(self, state, parent=None, action=None):
        self.state = state  # Memory inefficient, but let's do it by now...
        if parent is not None:
            self.parents = {(parent, action)}
        else:
            self.parents = set()
        self.children = dict()
        self.Q = 0
        self.N = 0
    
    def __hash__(self):
        return hash(self.state)
        
    def __eq__(self, other):
        return (self.__class__ == other.__class__ and
               self.state == other.state)
    
    def is_fully_expanded(self):
        return len(self.unexplored_actions()) == 0
    
    def unexplored_actions(self):
        return list(set(self.state.actions()) - set(self.children.keys()))
    
    def add_child(self, child_node, action):
        self.children[action] = child_node
    
    def __str__(self):
        return '\n'.join(['{:25}: {}\n'.format(key, value) + '_'*100 + '\n' 
                          for key, value in self.__dict__.items()])

In [None]:
class State:
    """ Mock class to test the functions. """
    def __init__(self, name):
        self.name = name
        
    def actions(self):
        return [1,2,3,4]
    
    def result(self, action):
        return State('{}_{}'.format(self.name, str(action)))
    
    def terminal_test(self):
        return len(self.name.split('_')) >=3
    
    def utility(self, player_id):
        return player_id
        
    def __str__(self):
        return self.name

In [4]:
root = State('root')
print(root)

root


In [5]:
root_node = Node(root)

In [6]:
root_node.is_fully_expanded()

False

In [7]:
root_node.unexplored_actions()

[1, 2, 3, 4]

In [8]:
for a in root_node.unexplored_actions():
    new_node = Node(root_node.state.result(a), root_node, a)
    root_node.add_child(new_node, a)

In [9]:
root_node.children

{1: <__main__.Node at 0x1197f02e8>,
 2: <__main__.Node at 0x1197f0470>,
 3: <__main__.Node at 0x1197f0630>,
 4: <__main__.Node at 0x1197f0828>}

In [10]:
for n in list(root_node.children.values()):
    print(str(n) + '\n'+'.'*100+'\n')

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x10f9354e0>, 1)}
____________________________________________________________________________________________________

state                    : root_1
____________________________________________________________________________________________________

Q                        : 0
____________________________________________________________________________________________________

children                 : {}
____________________________________________________________________________________________________

....................................................................................................

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x10f9354

In [11]:
root_node.unexplored_actions()

[]

In [12]:
root_node.is_fully_expanded()

True

In [13]:
new_node.unexplored_actions()

[1, 2, 3, 4]

In [14]:
import random

random.choice(new_node.unexplored_actions())

1

In [15]:
import random

class Tree:
    def __init__(self, root):
        self.root = root
        self.nodes = {root}
    
    def expand(self, node):
        # Adds the new nodes to the tree, or modifies it adding a new parent,
        # if the node already existed from another parent
        a = random.choice(node.unexplored_actions())
        new_node = Node(node.state.result(a), node, a)
        self.nodes |= {new_node}
        new_node = next(item for item in self.nodes if item == new_node)
        new_node.parents.add((node, a))
        node.add_child(new_node, a)
        return new_node
    
    def __str__(self):
        return ('\n' + '.'*100 + '\n').join(str(node) for node in self.nodes)

In [16]:
class Name:
    def __init__(self, name, other='0'):
        self.name = name
        self.other = other
    
    def __hash__(self):
        return hash(self.name)
        
    def __eq__(self, other):
        return (self.__class__ == other.__class__ and
               self.name == other.name)

In [17]:
s = {Name('1'), Name('2'), Name('3')}

In [18]:
if Name('1','ksajnfaks') in s:
    print('sdjknfdsa')

sdjknfdsa


In [19]:
s |= {Name('1','changed')}
s |= {Name('5','kasdnfksal')}

In [20]:
for item in s:
    print(item.name)
    print(item.other)

1
0
5
kasdnfksal
3
0
2
0


In [21]:
next(item for item in s if item == Name('1','changed')).name

'1'

In [22]:
next(item for item in s if item == Name('1','changed')).other

'0'

In [23]:
next((item for item in s if item == Name('7','changed')), Name('7','changed')).other

'changed'

So, set union favors the item that is already in. It can be used correctly.

In [24]:
root_s = State('root')
root_n = Node(root_s)

In [25]:
t = Tree(root_n)

In [26]:
t.expand(root_n)

<__main__.Node at 0x1197fa860>

In [27]:
for n in t.nodes:
    print(n)
    print('.'*100)

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1197fa1d0>, 2)}
____________________________________________________________________________________________________

state                    : root_2
____________________________________________________________________________________________________

Q                        : 0
____________________________________________________________________________________________________

children                 : {}
____________________________________________________________________________________________________

....................................................................................................
N                        : 0
____________________________________________________________________________________________________

parents                  : set()
_______________________________

In [28]:
import math

def ucb(node, c=1):
    if node.N == 0:
        return float('inf')
    return (node.Q/node.N) + c * math.sqrt((2 * math.log(node.N)) / node.N)

In [29]:
ucb(root_node, 1)

inf

In [30]:
from functools import partial

def best_child(node, c=1):
    return max(list(node.children.values()), key=partial(ucb, c=c))

In [31]:
list(root_n.children.values())

[<__main__.Node at 0x1197fa860>]

In [32]:
ucb(list(root_n.children.values())[0])

inf

In [33]:
child = list(root_n.children.values())[0]

In [34]:
child.Q = 1
child.N = 1

In [35]:
print(best_child(root_n))

N                        : 1
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1197fa1d0>, 2)}
____________________________________________________________________________________________________

state                    : root_2
____________________________________________________________________________________________________

Q                        : 1
____________________________________________________________________________________________________

children                 : {}
____________________________________________________________________________________________________



In [36]:
t.expand(root_n)

<__main__.Node at 0x1198002b0>

In [37]:
print(best_child(root_n))

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1197fa1d0>, 3)}
____________________________________________________________________________________________________

state                    : root_3
____________________________________________________________________________________________________

Q                        : 0
____________________________________________________________________________________________________

children                 : {}
____________________________________________________________________________________________________



In [38]:
root_n.children

{2: <__main__.Node at 0x1197fa860>, 3: <__main__.Node at 0x1198002b0>}

In [39]:
root_n.is_fully_expanded()

False

In [40]:
root_n.unexplored_actions()

[1, 4]

In [41]:
def tree_policy(node, tree):
    temp_node = node
    while not temp_node.state.terminal_test():
        if temp_node.is_fully_expanded():
            temp_node = best_child(temp_node)
        else:
            return tree.expand(temp_node)
    return temp_node

In [42]:
root_n.is_fully_expanded()

False

In [43]:
print(tree_policy(root_n, t))

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1197fa1d0>, 1)}
____________________________________________________________________________________________________

state                    : root_1
____________________________________________________________________________________________________

Q                        : 0
____________________________________________________________________________________________________

children                 : {}
____________________________________________________________________________________________________



In [85]:
import random

def default_random_policy(state, player_id):
    temp_state = state
    while not temp_state.terminal_test():
        a = random.choice(temp_state.actions())
        temp_state = temp_state.result(a)
        
    return temp_state.utility(player_id)

In [45]:
# TODO: def default_greedy_policy(state, player_id)

In [46]:
def backup_negamax(node, reward):
    """ This is a version of backup for multiple parents."""
    temp_node = node
    temp_node.N += 1
    temp_node.Q += reward
    for parent, _ in temp_node.parents:
        backup_negamax(parent, -reward)

In [47]:
print(tree_policy(root_n, t))

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1197fa1d0>, 4)}
____________________________________________________________________________________________________

state                    : root_4
____________________________________________________________________________________________________

Q                        : 0
____________________________________________________________________________________________________

children                 : {}
____________________________________________________________________________________________________



In [48]:
print(tree_policy(root_n, t))

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1198009e8>, 1)}
____________________________________________________________________________________________________

state                    : root_1_1
____________________________________________________________________________________________________

Q                        : 0
____________________________________________________________________________________________________

children                 : {}
____________________________________________________________________________________________________



In [49]:
print(tree_policy(root_n, t))

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1198009e8>, 3)}
____________________________________________________________________________________________________

state                    : root_1_3
____________________________________________________________________________________________________

Q                        : 0
____________________________________________________________________________________________________

children                 : {}
____________________________________________________________________________________________________



In [50]:
print(tree_policy(root_n, t))

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1198009e8>, 4)}
____________________________________________________________________________________________________

state                    : root_1_4
____________________________________________________________________________________________________

Q                        : 0
____________________________________________________________________________________________________

children                 : {}
____________________________________________________________________________________________________



In [51]:
print(tree_policy(root_n, t))

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1198009e8>, 2)}
____________________________________________________________________________________________________

state                    : root_1_2
____________________________________________________________________________________________________

Q                        : 0
____________________________________________________________________________________________________

children                 : {}
____________________________________________________________________________________________________



In [52]:
print(tree_policy(root_n, t))

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1198009e8>, 1)}
____________________________________________________________________________________________________

state                    : root_1_1
____________________________________________________________________________________________________

Q                        : 0
____________________________________________________________________________________________________

children                 : {}
____________________________________________________________________________________________________



In [53]:
terminal_node = tree_policy(root_n, t)

In [54]:
terminal_node.state.terminal_test()

True

In [55]:
terminal_node.state.utility(1)

1

In [56]:
default_random_policy(root_n.state, 1)

root
....................................................................................................
root_2
....................................................................................................


1

In [57]:
default_random_policy(terminal_node.state, 1)

1

In [58]:
print(terminal_node)

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1198009e8>, 1)}
____________________________________________________________________________________________________

state                    : root_1_1
____________________________________________________________________________________________________

Q                        : 0
____________________________________________________________________________________________________

children                 : {}
____________________________________________________________________________________________________



In [59]:
print(t)

N                        : 0
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1197fa1d0>, 1)}
____________________________________________________________________________________________________

state                    : root_1
____________________________________________________________________________________________________

Q                        : 0
____________________________________________________________________________________________________

children                 : {1: <__main__.Node object at 0x119800898>, 2: <__main__.Node object at 0x11980a2b0>, 3: <__main__.Node object at 0x11980a048>, 4: <__main__.Node object at 0x11980a208>}
____________________________________________________________________________________________________

....................................................................................................
N                        : 0


In [60]:
backup_negamax(terminal_node, 10)

In [61]:
print(t)

N                        : 1
____________________________________________________________________________________________________

parents                  : {(<__main__.Node object at 0x1197fa1d0>, 1)}
____________________________________________________________________________________________________

state                    : root_1
____________________________________________________________________________________________________

Q                        : -10
____________________________________________________________________________________________________

children                 : {1: <__main__.Node object at 0x119800898>, 2: <__main__.Node object at 0x11980a2b0>, 3: <__main__.Node object at 0x11980a048>, 4: <__main__.Node object at 0x11980a208>}
____________________________________________________________________________________________________

....................................................................................................
N                        : 

In [91]:
def uct_search(root_state, 
               tree=None, 
               default_policy=default_random_policy,
               player_id=1):
    """ 
    If an existing tree is passed with a root_state different from 
    root state, the search will start in root_state, but will affect the whole
    tree (upper nodes included).
    The root_state HAS to be unique parent of all its children.
    """
    root_n = Node(root_state)
    if tree is None:
        tree = Tree(root_n)
    edge_node = tree_policy(root_n, tree)
    reward = default_policy(edge_node.state, player_id)
    backup_negamax(edge_node, reward)
    best_root_child = best_child(root_n, 0)  # Greedy, so c = 0
    best_root_action_l = [a for p, a in best_root_child.parents if p == root_n]
    return best_root_action_l[0], tree

In [92]:
action, tree = uct_search(root_s)
print(action)
for i in range(100):
    action, tree = uct_search(root_s, tree)
    print('iter {}: action {}'.format(i, action))
    # Place to save the best move so far...

2
iter 0: action 4
iter 1: action 1
iter 2: action 1
iter 3: action 2
iter 4: action 3
iter 5: action 4
iter 6: action 4
iter 7: action 3
iter 8: action 4
iter 9: action 1
iter 10: action 3
iter 11: action 2
iter 12: action 4
iter 13: action 4
iter 14: action 4
iter 15: action 1
iter 16: action 2
iter 17: action 1
iter 18: action 2
iter 19: action 1
iter 20: action 2
iter 21: action 3
iter 22: action 4
iter 23: action 3
iter 24: action 3
iter 25: action 2
iter 26: action 4
iter 27: action 4
iter 28: action 1
iter 29: action 2
iter 30: action 2
iter 31: action 3
iter 32: action 3
iter 33: action 3
iter 34: action 3
iter 35: action 2
iter 36: action 3
iter 37: action 3
iter 38: action 1
iter 39: action 3
iter 40: action 3
iter 41: action 4
iter 42: action 3
iter 43: action 4
iter 44: action 1
iter 45: action 3
iter 46: action 1
iter 47: action 3
iter 48: action 3
iter 49: action 2
iter 50: action 2
iter 51: action 2
iter 52: action 2
iter 53: action 3
iter 54: action 4
iter 55: action 3
