In [337]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys
from time import time
import os

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('..')

import isolation
import sample_players
import run_match
import my_baseline_player as custom

from multiprocessing.pool import ThreadPool as Pool
from functools import partial

Populating the interactive namespace from numpy and matplotlib
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


### Let's use a dictionary to represent a Node

In [338]:
class State:
    """ Mock class to test the functions. """
    def __init__(self, name):
        self.name = name
        
    def actions(self):
        return [1,2,3,4]
    
    def result(self, action):
        return State('{}_{}'.format(self.name, str(action)))
    
    def terminal_test(self):
        return len(self.name.split('_')) >=3
    
    def utility(self, player_id):
        return player_id
        
    def __str__(self):
        return self.name

In [339]:
def new_node(state, parent=None, from_action=None, N=0, Q=0, children=None):
    node = {'state': state,
            'parent': parent,
            'from_action': from_action,
            'N': N,
            'Q': Q
           }
    if children == None:
        node['children'] = list()
    else:
        node['children'] = children
    if parent is not None:
        parent['children'].append(node)
    return node

In [340]:
root = new_node(State('root'))
root

{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': None,
 'parent': None,
 'state': <__main__.State at 0x117efc710>}

In [341]:
node1 = new_node(State('state_1'), root, from_action=1)

In [342]:
root

{'N': 0,
 'Q': 0,
 'children': [{'N': 0,
   'Q': 0,
   'children': [],
   'from_action': 1,
   'parent': {...},
   'state': <__main__.State at 0x117efc7f0>}],
 'from_action': None,
 'parent': None,
 'state': <__main__.State at 0x117efc710>}

In [343]:
node1

{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 1,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{...}],
  'from_action': None,
  'parent': None,
  'state': <__main__.State at 0x117efc710>},
 'state': <__main__.State at 0x117efc7f0>}

In [344]:
node2 = new_node(State('state_2'), root, from_action=2, N=2, Q=1)
node2

{'N': 2,
 'Q': 1,
 'children': [],
 'from_action': 2,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117efc7f0>},
   {...}],
  'from_action': None,
  'parent': None,
  'state': <__main__.State at 0x117efc710>},
 'state': <__main__.State at 0x117efeeb8>}

## Let's redefine the mcts functions, but this time without taking into account the multiple parents issue.

### Best child

In [345]:
def ucb(node, c=1):
    if node['N'] == 0:
        if c != 0:
            return float('inf')
        else:
            return 0
    return (node['Q'] / node['N']) + c * math.sqrt((2 * math.log(node['N'])) / node['N'])

def best_node(node_list, c=1):
    return max(node_list, key=partial(ucb, c=c))

def best_child(node, c=1):
    return best_node(node['children'], c)

In [346]:
ucb(node1)

inf

In [347]:
ucb(node2)

1.3325546111576978

In [348]:
best_child(root)

{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 1,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{...},
   {'N': 2,
    'Q': 1,
    'children': [],
    'from_action': 2,
    'parent': {...},
    'state': <__main__.State at 0x117efeeb8>}],
  'from_action': None,
  'parent': None,
  'state': <__main__.State at 0x117efc710>},
 'state': <__main__.State at 0x117efc7f0>}

In [349]:
best_child(root, c=0)

{'N': 2,
 'Q': 1,
 'children': [],
 'from_action': 2,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117efc7f0>},
   {...}],
  'from_action': None,
  'parent': None,
  'state': <__main__.State at 0x117efc710>},
 'state': <__main__.State at 0x117efeeb8>}

### Expand

In [350]:
def expand(node):
    a = random.choice(unexplored_actions(node))
    child = new_node(state=node['state'].result(a), 
                     parent=node, 
                     from_action=a)
    node['children'].append(child)
    return child

In [351]:
expand(root)

{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 3,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117efc7f0>},
   {'N': 2,
    'Q': 1,
    'children': [],
    'from_action': 2,
    'parent': {...},
    'state': <__main__.State at 0x117efeeb8>},
   {...},
   {...}],
  'from_action': None,
  'parent': None,
  'state': <__main__.State at 0x117efc710>},
 'state': <__main__.State at 0x117eab6a0>}

In [352]:
expand(root)

{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 4,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117efc7f0>},
   {'N': 2,
    'Q': 1,
    'children': [],
    'from_action': 2,
    'parent': {...},
    'state': <__main__.State at 0x117efeeb8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 3,
    'parent': {...},
    'state': <__main__.State at 0x117eab6a0>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 3,
    'parent': {...},
    'state': <__main__.State at 0x117eab6a0>},
   {...},
   {...}],
  'from_action': None,
  'parent': None,
  'state': <__main__.State at 0x117efc710>},
 'state': <__main__.State at 0x117efecc0>}

### Tree policy

In [353]:
def unexplored_actions(node):
    return list(set(node['state'].actions()) - \
                set(child['from_action'] for child in node['children']))

In [354]:
unexplored_actions(root)

[]

In [355]:
unexplored_actions(node1)

[1, 2, 3, 4]

In [356]:
def is_fully_expanded(node):
    return len(unexplored_actions(node)) == 0

In [402]:
is_fully_expanded(root)

True

In [472]:
def tree_policy(node, nodes=None):
    print('tree_policy got this node:')
    print(node)
    if nodes is None:
        nodes = dict()
    temp_node = node
    while not temp_node['state'].terminal_test():
        if is_fully_expanded(temp_node):
            temp_node = best_child(temp_node)
        else:
            child = expand(temp_node)
            if child['state'] not in nodes.keys():
                nodes[child['state']] = [child]
            else:
                nodes[child['state']].append(child)
            return child
    return temp_node

In [417]:
root = new_node(State('root'))

In [418]:
nodes = dict()
child = tree_policy(root, nodes)
print(child['state'])
child

not fully expanded
tree_policy nodes: 1
root_4


{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 4,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{...}, {...}],
  'from_action': None,
  'parent': None,
  'state': <__main__.State at 0x117f29550>},
 'state': <__main__.State at 0x117f290b8>}

In [419]:
print(len(nodes))
nodes

1


{<__main__.State at 0x117f290b8>: [{'N': 0,
   'Q': 0,
   'children': [],
   'from_action': 4,
   'parent': {'N': 0,
    'Q': 0,
    'children': [{...}, {...}],
    'from_action': None,
    'parent': None,
    'state': <__main__.State at 0x117f29550>},
   'state': <__main__.State at 0x117f290b8>}]}

In [420]:
child = tree_policy(root)
print(child['state'])
child

not fully expanded
tree_policy nodes: 1
root_2


{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 2,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 4,
    'parent': {...},
    'state': <__main__.State at 0x117f290b8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 4,
    'parent': {...},
    'state': <__main__.State at 0x117f290b8>},
   {...},
   {...}],
  'from_action': None,
  'parent': None,
  'state': <__main__.State at 0x117f29550>},
 'state': <__main__.State at 0x117f29e10>}

In [421]:
child = tree_policy(root)
print(child['state'])
child

not fully expanded
tree_policy nodes: 1
root_1


{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 1,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 4,
    'parent': {...},
    'state': <__main__.State at 0x117f290b8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 4,
    'parent': {...},
    'state': <__main__.State at 0x117f290b8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 2,
    'parent': {...},
    'state': <__main__.State at 0x117f29e10>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 2,
    'parent': {...},
    'state': <__main__.State at 0x117f29e10>},
   {...},
   {...}],
  'from_action': None,
  'parent': None,
  'state': <__main__.State at 0x117f29550>},
 'state': <__main__.State at 0x117f29278>}

In [422]:
child = tree_policy(root)
print(child['state'])
child

not fully expanded
tree_policy nodes: 1
root_3


{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 3,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 4,
    'parent': {...},
    'state': <__main__.State at 0x117f290b8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 4,
    'parent': {...},
    'state': <__main__.State at 0x117f290b8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 2,
    'parent': {...},
    'state': <__main__.State at 0x117f29e10>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 2,
    'parent': {...},
    'state': <__main__.State at 0x117f29e10>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117f29278>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117f29278>},
   {...},
   {...}],
  'from_action': None,
  'parent': None,
  'state': <__main__.State at 0x117f29

In [423]:
child = tree_policy(root)
print(child['state'])
child

fully
not fully expanded
tree_policy nodes: 1
root_4_3


{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 3,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{...}, {...}],
  'from_action': 4,
  'parent': {'N': 0,
   'Q': 0,
   'children': [{...},
    {...},
    {'N': 0,
     'Q': 0,
     'children': [],
     'from_action': 2,
     'parent': {...},
     'state': <__main__.State at 0x117f29e10>},
    {'N': 0,
     'Q': 0,
     'children': [],
     'from_action': 2,
     'parent': {...},
     'state': <__main__.State at 0x117f29e10>},
    {'N': 0,
     'Q': 0,
     'children': [],
     'from_action': 1,
     'parent': {...},
     'state': <__main__.State at 0x117f29278>},
    {'N': 0,
     'Q': 0,
     'children': [],
     'from_action': 1,
     'parent': {...},
     'state': <__main__.State at 0x117f29278>},
    {'N': 0,
     'Q': 0,
     'children': [],
     'from_action': 3,
     'parent': {...},
     'state': <__main__.State at 0x117f297f0>},
    {'N': 0,
     'Q': 0,
     'children': [],
     'from_action': 3,
     'parent': {...},
     'st

In [424]:
child = tree_policy(root)
print(child['state'])
child

fully
not fully expanded
tree_policy nodes: 1
root_4_1


{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 1,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 3,
    'parent': {...},
    'state': <__main__.State at 0x117f29cc0>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 3,
    'parent': {...},
    'state': <__main__.State at 0x117f29cc0>},
   {...},
   {...}],
  'from_action': 4,
  'parent': {'N': 0,
   'Q': 0,
   'children': [{...},
    {...},
    {'N': 0,
     'Q': 0,
     'children': [],
     'from_action': 2,
     'parent': {...},
     'state': <__main__.State at 0x117f29e10>},
    {'N': 0,
     'Q': 0,
     'children': [],
     'from_action': 2,
     'parent': {...},
     'state': <__main__.State at 0x117f29e10>},
    {'N': 0,
     'Q': 0,
     'children': [],
     'from_action': 1,
     'parent': {...},
     'state': <__main__.State at 0x117f29278>},
    {'N': 0,
     'Q': 0,
     'children': [],
     'from_action': 1,
     'parent': {...},
     'state': <__

In [425]:
child = tree_policy(root)
print(child['state'])
child

fully
not fully expanded
tree_policy nodes: 1
root_4_4


{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 4,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 3,
    'parent': {...},
    'state': <__main__.State at 0x117f29cc0>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 3,
    'parent': {...},
    'state': <__main__.State at 0x117f29cc0>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117f295f8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117f295f8>},
   {...},
   {...}],
  'from_action': 4,
  'parent': {'N': 0,
   'Q': 0,
   'children': [{...},
    {...},
    {'N': 0,
     'Q': 0,
     'children': [],
     'from_action': 2,
     'parent': {...},
     'state': <__main__.State at 0x117f29e10>},
    {'N': 0,
     'Q': 0,
     'children': [],
     'from_action': 2,
     'parent': {...},
     'state': <__main__.State

In [426]:
child = tree_policy(root)
print(child['state'])
child

fully
not fully expanded
tree_policy nodes: 1
root_4_2


{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 2,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 3,
    'parent': {...},
    'state': <__main__.State at 0x117f29cc0>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 3,
    'parent': {...},
    'state': <__main__.State at 0x117f29cc0>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117f295f8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117f295f8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 4,
    'parent': {...},
    'state': <__main__.State at 0x117f29f98>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 4,
    'parent': {...},
    'state': <__main__.State at 0x117f29f98>},
   {...},
   {...}],
  'from_action': 4,
  'parent': {'N': 0,
   'Q': 0,
   'children': [{...},
    

In [427]:
child = tree_policy(root)
print(child['state'])
child

fully
fully
root_4_3


{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 3,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{...},
   {...},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117f295f8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117f295f8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 4,
    'parent': {...},
    'state': <__main__.State at 0x117f29f98>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 4,
    'parent': {...},
    'state': <__main__.State at 0x117f29f98>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 2,
    'parent': {...},
    'state': <__main__.State at 0x117f2eeb8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 2,
    'parent': {...},
    'state': <__main__.State at 0x117f2eeb8>}],
  'from_action': 4,
  'parent': {'N': 0,
   'Q': 0,
   'children': [{...},
    

In [428]:
child = tree_policy(root)
print(child['state'])
child

fully
fully
root_4_3


{'N': 0,
 'Q': 0,
 'children': [],
 'from_action': 3,
 'parent': {'N': 0,
  'Q': 0,
  'children': [{...},
   {...},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117f295f8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 1,
    'parent': {...},
    'state': <__main__.State at 0x117f295f8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 4,
    'parent': {...},
    'state': <__main__.State at 0x117f29f98>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 4,
    'parent': {...},
    'state': <__main__.State at 0x117f29f98>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 2,
    'parent': {...},
    'state': <__main__.State at 0x117f2eeb8>},
   {'N': 0,
    'Q': 0,
    'children': [],
    'from_action': 2,
    'parent': {...},
    'state': <__main__.State at 0x117f2eeb8>}],
  'from_action': 4,
  'parent': {'N': 0,
   'Q': 0,
   'children': [{...},
    

### Backup

In [429]:
def backup_negamax(node, reward):
    temp_node = node
    temp_node['N'] += 1
    temp_node['Q'] += reward
    while temp_node['parent'] is not None:
        temp_node = temp_node['parent']
        temp_node['N'] += 1
        temp_node['Q'] += reward

### Default policy is kept unchanged, as it works with states

In [430]:
player_id = 0
def default_policy(state):
    temp_state = state
    while not temp_state.terminal_test():
        a = random.choice(temp_state.actions())
        temp_state = temp_state.result(a)

    return temp_state.utility(player_id)

### UCT Search

### First, I will define a function to update the tree with a new root. The problem is that nodes have to be identified by states, but states are reachable from many paths. To avoid joining the nodes with the same state (can be done in a posterior iteration of the algorithm), I will just choose the node with the best ucb value as the root. That way all nodes have a unique parent

In [450]:
def get_root_node(state, nodes=None):
    """
    nodes = {
        state1: [node1_1, node1_2], 
        state2: [node2_1],
        state3: [node3_1, node3_2, node3_3]
    }
    """
    if nodes is None:
        nodes = dict()
    if state not in nodes.keys():
        node = new_node(state)
        nodes[state] = [node]
        return node, nodes
    else:
        return best_node(nodes[state]), nodes

In [451]:
def uct_search(root_n, nodes=None):
    """ 
    nodes contains a dictionary of state:node with the nodes that have 
    already been visited.
    """
    edge_node = tree_policy(root_n, nodes)
    reward = default_policy(edge_node['state'])
    backup_negamax(edge_node, reward)
    return best_child(root_n, 0)['from_action'], nodes  # Greedy, so c = 0

In [452]:
nodes = dict()

In [453]:
root_n, nodes = get_root_node(State('root'))
uct_search(root_n, nodes)

not fully expanded
tree_policy nodes: 2


(3,
 {<__main__.State at 0x117f2eba8>: [{'N': 1,
    'Q': 0,
    'children': [{'N': 1,
      'Q': 0,
      'children': [],
      'from_action': 3,
      'parent': {...},
      'state': <__main__.State at 0x117f2ecf8>},
     {'N': 1,
      'Q': 0,
      'children': [],
      'from_action': 3,
      'parent': {...},
      'state': <__main__.State at 0x117f2ecf8>}],
    'from_action': None,
    'parent': None,
    'state': <__main__.State at 0x117f2eba8>}],
  <__main__.State at 0x117f2ecf8>: [{'N': 1,
    'Q': 0,
    'children': [],
    'from_action': 3,
    'parent': {'N': 1,
     'Q': 0,
     'children': [{...}, {...}],
     'from_action': None,
     'parent': None,
     'state': <__main__.State at 0x117f2eba8>},
    'state': <__main__.State at 0x117f2ecf8>}]})

In [473]:
root_s = State('root')
nodes = dict()
root_n, nodes = get_root_node(root_s, nodes)
root_n['parent'] = None  # Set this one as the new root
root_n['from_action'] = None  # Just for completeness, not used by now

for i in range(500):
    action, nodes = uct_search(root_n, nodes)
    print('\niter {}: action {}, nodes {}'.format(i, action, len(nodes)))
    print([str(state) for state in nodes.keys()])
    # Place to save the best move so far...

tree_policy got this node:
{'state': <__main__.State object at 0x117f35b38>, 'from_action': None, 'Q': 0, 'N': 0, 'children': [], 'parent': None}

iter 0: action 1, nodes 2
['root_1', 'root']
tree_policy got this node:
{'state': <__main__.State object at 0x117f35b38>, 'from_action': None, 'Q': 0, 'N': 1, 'children': [{'state': <__main__.State object at 0x117f35438>, 'from_action': 1, 'Q': 0, 'N': 1, 'children': [], 'parent': {...}}, {'state': <__main__.State object at 0x117f35438>, 'from_action': 1, 'Q': 0, 'N': 1, 'children': [], 'parent': {...}}], 'parent': None}

iter 1: action 1, nodes 3
['root_4', 'root_1', 'root']
tree_policy got this node:
{'state': <__main__.State object at 0x117f35b38>, 'from_action': None, 'Q': 0, 'N': 2, 'children': [{'state': <__main__.State object at 0x117f35438>, 'from_action': 1, 'Q': 0, 'N': 1, 'children': [], 'parent': {...}}, {'state': <__main__.State object at 0x117f35438>, 'from_action': 1, 'Q': 0, 'N': 1, 'children': [], 'parent': {...}}, {'state':

iter 341: action 1, nodes 9
['root_4', 'root_1', 'root_1_2', 'root_1_1', 'root_1_3', 'root_1_4', 'root_2', 'root', 'root_3']
tree_policy got this node:
{'state': <__main__.State object at 0x117f35b38>, 'from_action': None, 'Q': 0, 'N': 342, 'children': [{'state': <__main__.State object at 0x117f35438>, 'from_action': 1, 'Q': 0, 'N': 339, 'children': [{'state': <__main__.State object at 0x117f35cc0>, 'from_action': 4, 'Q': 0, 'N': 335, 'children': [], 'parent': {...}}, {'state': <__main__.State object at 0x117f35cc0>, 'from_action': 4, 'Q': 0, 'N': 335, 'children': [], 'parent': {...}}, {'state': <__main__.State object at 0x117f35e48>, 'from_action': 2, 'Q': 0, 'N': 1, 'children': [], 'parent': {...}}, {'state': <__main__.State object at 0x117f35e48>, 'from_action': 2, 'Q': 0, 'N': 1, 'children': [], 'parent': {...}}, {'state': <__main__.State object at 0x117f35780>, 'from_action': 3, 'Q': 0, 'N': 1, 'children': [], 'parent': {...}}, {'state': <__main__.State object at 0x117f35780>, 'fr

In [436]:
root_s

<__main__.State at 0x117f35e80>

In [441]:
len(nodes)

9

In [469]:
nodes

{<__main__.State at 0x117f48ba8>: [{'N': 1,
   'Q': 0,
   'children': [{'N': 1,
     'Q': 0,
     'children': [],
     'from_action': 1,
     'parent': {...},
     'state': <__main__.State at 0x117f48a20>},
    {'N': 1,
     'Q': 0,
     'children': [],
     'from_action': 1,
     'parent': {...},
     'state': <__main__.State at 0x117f48a20>}],
   'from_action': None,
   'parent': None,
   'state': <__main__.State at 0x117f48ba8>}],
 <__main__.State at 0x117f48a20>: [{'N': 1,
   'Q': 0,
   'children': [],
   'from_action': 1,
   'parent': {'N': 1,
    'Q': 0,
    'children': [{...}, {...}],
    'from_action': None,
    'parent': None,
    'state': <__main__.State at 0x117f48ba8>},
   'state': <__main__.State at 0x117f48a20>}]}

In [440]:
best_node(list(nodes.values()))

TypeError: list indices must be integers or slices, not str