In [1]:
import gym
import torch
import numpy as np

In [2]:
import env

Environment closely follows OpenAI gym API. Currently can not be invoked with ```gym.make("env_id")```, though it should be easy to do.

In [3]:
checkers_env = env.Env()

A state is ```(5+NUMBER_OF_PIECES, BOARD_SIZE, BOARD_SIZE) ndarray```

Where

* ```state[0]``` — black pieces
* ```state[1]``` — white pieces
* ```state[2]``` — pieces ids
* ```state[3]``` — current turn (blacks=1, whites=0)
* ```state[4]``` — whether game in terminal state
* ```state[5+i]``` — allowed moves for piece number i

I should probably move allowed moves elsewhere but I have a lot of RAM to keep things unoptimized.


In [4]:
from matplotlib import pyplot as plt
from matplotlib import colors

%matplotlib widget 

def show_state(state):
    plt.imshow(state)
    plt.show()

def show_board(board):
   cmap = colors.ListedColormap(['white', 'red'])
   bounds=[0,0.5,18]
   norm = colors.BoundaryNorm(bounds, cmap.N)
   # plt.figure(figsize=(4,4))
   plt.matshow(board, cmap=cmap, norm=norm, interpolation='none')
   plt.xticks(np.arange(0.5,8.5), [])
   plt.yticks(np.arange(0.5,8.5), [])

   plt.grid()

def show_piece_and_moves(state, piece_id):
   show_board(state[2]==piece_id)
   show_board(state[5+piece_id])

Plots above: 

1. Piece number 2
2. Possible moves for piece number 2

In [5]:
import monte_carlo_tree


'''
Play a game using random tree strategy
'''
def random_play():
    
    tree = monte_carlo_tree.RandomPlayTree(checkers_env, 8)
    
    root_node = tree.root_node
    terminal_node, reward = tree.simulate(root_node)
    
    return terminal_node.depth(), reward
    
'''
Play a number of random games and display result
'''
def build_random_play_stats(n_games=1000):
    
    black_wins = 0
    white_wins = 0
    draws = 0
    moves = []
    
    for _ in range(n_games):
        m, reward = random_play()
        if reward > 0:
            black_wins += 1
        elif reward < 0:
            white_wins += 1
        elif reward == 0:
            draws += 1
       
        moves.append(m)
    
    print("Blacks: ", black_wins, "Whites: ", white_wins, "Draws: ", draws, "Moves mean:", np.mean(moves))

In [6]:
build_random_play_stats()

Blacks:  0 Whites:  0 Draws:  1000 Moves mean: 1065.927
