In [27]:
import numpy as np
from copy import deepcopy
import matplotlib.pyplot as plt
from matplotlib import colors
from math import sqrt
from math import log
from tqdm import tqdm
from prettytable import PrettyTable

# Move class for Breakthrough

In [28]:
class Move(object):
    def __init__(self, color, x1, y1, x2, y2):
        self.color = color
        self.x1 = x1
        self.y1 = y1
        self.x2 = x2
        self.y2 = y2

    def valid(self, board):
        if self.x2 >= Dx or self.y2 >= Dy or self.x2 < 0 or self.y2 < 0:
            return False
        if self.color == White:
            if self.x2 != self.x1 + 1:
                return False
            if board.board[self.x2][self.y2] == Black:
                if self.y2 == self.y1 + 1 or self.y2 == self.y1 - 1:
                    return True
                return False
            elif board.board[self.x2][self.y2] == Empty:
                if self.y2 == self.y1 + 1 or self.y2 == self.y1 - 1 or self.y2 == self.y1:
                    return True
                return False
        elif self.color == Black:
            if self.x2 != self.x1 - 1:
                return False
            if board.board[self.x2][self.y2] == White:
                if self.y2 == self.y1 + 1 or self.y2 == self.y1 - 1:
                    return True
                return False
            elif board.board[self.x2][self.y2] == Empty:
                if self.y2 == self.y1 + 1 or self.y2 == self.y1 - 1 or self.y2 == self.y1:
                    return True
                return False
        return False

    def code(self, board):
        direction = 0
        if self.y2 > self.y1:
            if board.board[self.x2][self.y2] == Empty:
                direction = 1
            else:
                direction = 2
        if self.y2 < self.y1:
            if board.board[self.x2][self.y2] == Empty:
                direction = 3
            else:
                direction = 4
        if self.color == White:
            return 5 * (Dy * self.x1 + self.y1) + direction
        else:
            return 5 * Dx * Dy + 5 * (Dy * self.x1 + self.y1) + direction

# Board class to play Breakthrough 5x5

In [29]:
import random

Dx = 5
Dy = 5
Empty = 0
White = 1
Black = 2


class Board(object):
    def __init__(self):
        self.h = 0
        self.turn = White
        self.board = np.zeros((Dx, Dy))
        for i in range(0, 2):
            for j in range(0, Dy):
                self.board[i][j] = White
        for i in range(Dx - 2, Dx):
            for j in range(0, Dy):
                self.board[i][j] = Black

    def legalMoves(self):
        moves = []
        for i in range(0, Dx):
            for j in range(0, Dy):
                if self.board[i][j] == self.turn:
                    for k in [-1, 0, 1]:
                        for l in [-1, 0, 1]:
                            m = Move(self.turn, i, j, i + k, j + l)
                            if m.valid(self):
                                moves.append(m)
        return moves

    def score(self):
        for i in range(0, Dy):
            if (self.board[Dx - 1][i] == White):
                return 1.0
            elif (self.board[0][i] == Black):
                return 0.0
        l = self.legalMoves()
        if len(l) == 0:
            if self.turn == Black:
                return 1.0
            else:
                return 0.0
        return 0.5

    def terminal(self):
        if self.score() == 0.5:
            return False
        return True

    def play(self, move):
        self.board[move.x1][move.y1] = Empty
        self.board[move.x2][move.y2] = move.color
        if (self.turn == White):
            self.turn = Black
        else:
            self.turn = White

    def playout(self):
        while (True):
            moves = self.legalMoves()
            if self.terminal():
                return self.score()
            n = random.randint(0, len(moves) - 1)
            self.play(moves[n])

    def print(self):
        print("   1 2 3 4 5")
        for i in range(Dy):
            print("{} |".format(i + 1), end="")
            for j in range(Dx):
                if self.board[i][j] == Black:
                    print("\u265F", end="")
                elif self.board[i][j] == White:
                    print("\u2659", end="")
                else:
                    print(" ", end="")
                if j < Dx:
                    print("|", end="")

            if i < Dy:
                print()


# Flat Monte Carlo

In [30]:
import copy


def flat(board, n):
    moves = board.legalMoves()
    bestScore = 0
    bestMove = 0
    for m in range(len(moves)):
        sum = 0
        for i in range(n // len(moves)):
            b = copy.deepcopy(board)
            b.play(moves[m])
            r = b.playout()
            if board.turn == Black:
                r = 1 - r
            sum = sum + r
        if sum > bestScore:
            bestScore = sum
            bestMove = m
    return moves[bestMove]

In [31]:
b = Board()
b.print()

   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙|♙|♙|♙|
3 | | | | | |
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟|♟|


In [32]:
while not b.terminal():
    m = flat(b, 100)
    b.play(m)
    b.print()
    print(f'Best move: {m.x1 + 1},{m.y1 + 1} -> {m.x2 + 1},{m.y2 + 1}')
    print(f"Score: {b.score()}")
    if b.score() == 0:
        print("Black wins")
    elif b.score() == 1:
        print("White wins")
    else:
        print("Draw")

   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| |♙|♙|♙|
3 |♙| | | | |
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,2 -> 3,1
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| |♙|♙|♙|
3 |♙| |♟| | |
4 |♟| |♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 4,2 -> 3,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 | | |♙|♙|♙|
3 |♙|♙|♟| | |
4 |♟| |♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,1 -> 3,2
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 | | |♙|♙|♙|
3 |♙|♟|♟| | |
4 | | |♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 4,1 -> 3,2
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 | | |♙|♙| |
3 |♙|♟|♟| |♙|
4 | | |♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,5 -> 3,5
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♟| |♙|♙| |
3 |♙| |♟| |♙|
4 | | |♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 3,2 -> 2,1
Score: 0.5
Draw
   1 2 3 4 5
1 |♙| |♙|♙|♙|
2 |♟|♙|♙|♙| |
3 |♙| |♟| |♙|
4 | | |♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 1,2 -> 2,2
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♟|♙|♙|♙|
2 | |♙|♙|♙| |
3 |♙| |♟| |♙|
4 | | |♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,1 -> 1,2
Score: 0.0
Black wins


# UCB

In [33]:
def ucb(board, n):
    moves = board.legalMoves()
    sumScores = [0.0 for x in range(len(moves))]
    nbVisits = [0 for x in range(len(moves))]
    for i in range(n):
        bestMove = 0
        bestUCB = 0
        for m in range(len(moves)):
            score = 1000000
            if nbVisits[m] > 0:
                score = sumScores[m] / nbVisits[m] + 0.4 * sqrt(log(i) / nbVisits[m])
            if score > bestUCB:
                bestUCB = score
                bestMove = m
        b = copy.deepcopy(board)
        b.play(moves[bestMove])
        r = b.playout()
        if board.turn == Black:
            r = 1 - r
        sumScores[bestMove] = sumScores[bestMove] + r
        nbVisits[bestMove] = nbVisits[bestMove] + 1
    bestScore = 0
    bestMove = 0
    for m in range(len(moves)):
        if nbVisits[m] > bestScore:
            bestScore = nbVisits[m]
            bestMove = m
    return moves[bestMove]

In [34]:
b = Board()
b.print()

   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙|♙|♙|♙|
3 | | | | | |
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟|♟|


In [35]:
while not b.terminal():
    m = ucb(b, 100)
    b.play(m)
    b.print()
    print(f'Best move: {m.x1 + 1},{m.y1 + 1} -> {m.x2 + 1},{m.y2 + 1}')
    print(f"Score: {b.score()}")
    if b.score() == 0:
        print("Black wins")
    elif b.score() == 1:
        print("White wins")
    else:
        print("Draw")

   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 | |♙|♙|♙|♙|
3 | |♙| | | |
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,1 -> 3,2
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 | |♙|♙|♙|♙|
3 | |♟| | | |
4 |♟|♟| |♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 4,3 -> 3,2
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 | |♙|♙| |♙|
3 | |♟| |♙| |
4 |♟|♟| |♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,4 -> 3,4
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 | |♙|♟| |♙|
3 | | | |♙| |
4 |♟|♟| |♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 3,2 -> 2,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 | |♙|♟| |♙|
3 | | | | | |
4 |♟|♟|♙|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 3,4 -> 4,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♟|♙|♙|♙|
2 | |♙| | |♙|
3 | | | | | |
4 |♟|♟|♙|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,3 -> 1,2
Score: 0.0
Black wins


In [36]:
board = Board()
while not board.terminal():
    if board.turn == White:
        m = flat(board, 100)
    else:
        m = ucb(board, 100)
    board.play(m)
    board.print()
    print(f'Best move: {m.x1 + 1},{m.y1 + 1} -> {m.x2 + 1},{m.y2 + 1}')
    print(f"Score: {board.score()}")
    if board.score() == 0:
        print("UCB wins")
    elif board.score() == 1:
        print("Flat wins")
    else:
        print("Draw")

   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| |♙|♙|♙|
3 | |♙| | | |
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,2 -> 3,2
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| |♙|♙|♙|
3 | |♙|♟| | |
4 |♟|♟|♟| |♟|
5 |♟|♟|♟|♟|♟|
Best move: 4,4 -> 3,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| | |♙|♙|
3 | |♙|♟|♙| |
4 |♟|♟|♟| |♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,3 -> 3,4
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| | |♟|♙|
3 | |♙| |♙| |
4 |♟|♟|♟| |♟|
5 |♟|♟|♟|♟|♟|
Best move: 3,3 -> 2,4
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙| |
2 |♙| | |♙|♙|
3 | |♙| |♙| |
4 |♟|♟|♟| |♟|
5 |♟|♟|♟|♟|♟|
Best move: 1,5 -> 2,4
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙| |
2 |♙| | |♙|♙|
3 | |♟| |♙| |
4 |♟|♟| | |♟|
5 |♟|♟|♟|♟|♟|
Best move: 4,3 -> 3,2
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙| |
2 |♙| | |♙|♙|
3 | |♟| | | |
4 |♟|♟| | |♙|
5 |♟|♟|♟|♟|♟|
Best move: 3,4 -> 4,5
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙| |
2 |♙| | |♙|♙|
3 | |♟| | | |
4 |♟|♟| | |♟|
5 |♟|♟|♟| |♟|
Best move: 5,4 -> 4,5
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙| | |
2 |♙|

# Board class with hashcode

In [37]:
import random

Dx = 5
Dy = 5
Empty = 0
White = 1
Black = 2

hashTable = [[[random.randint(0, 2 ** 64 - 1) for _ in range(Dy)] for _ in range(Dx)] for _ in range(3)]
hashTurn = random.randint(0, 2 ** 64)
"""hashTable = []
for k in range(3):
    l = []
for i in range(Dx):
    l1 = []
for j in range(Dy):
    l1.append(random.randint(0, 2 ** 64))
l.append(l1)
hashTable.append(l)
hashTurn = random.randint(0, 2 ** 64)"""


class Board(object):
    def __init__(self):
        self.h = 0
        self.turn = White
        self.board = np.zeros((Dx, Dy))
        for i in range(0, 2):
            for j in range(0, Dy):
                self.board[i][j] = White
        for i in range(Dx - 2, Dx):
            for j in range(0, Dy):
                self.board[i][j] = Black

    def legalMoves(self):
        moves = []
        for i in range(0, Dx):
            for j in range(0, Dy):
                if self.board[i][j] == self.turn:
                    for k in [-1, 0, 1]:
                        for l in [-1, 0, 1]:
                            m = Move(self.turn, i, j, i + k, j + l)
                            if m.valid(self):
                                moves.append(m)
        return moves

    def score(self):
        for i in range(0, Dy):
            if (self.board[Dx - 1][i] == White):
                return 1.0
            elif (self.board[0][i] == Black):
                return 0.0
        l = self.legalMoves()
        if len(l) == 0:
            if self.turn == Black:
                return 1.0
            else:
                return 0.0
        return 0.5

    def terminal(self):
        if self.score() == 0.5:
            return False
        return True

    def playout(self):
        while (True):
            moves = self.legalMoves()
            if self.terminal():
                return self.score()
            n = random.randint(0, len(moves) - 1)
            self.play(moves[n])

    def play(self, move):
        col = int(self.board[move.x2][move.y2])
        if col != Empty:
            self.h = self.h ^ hashTable[col][move.x2][move.y2]
        self.h = self.h ^ hashTable[move.color][move.x2][move.y2]
        self.h = self.h ^ hashTable[move.color][move.x1][move.y1]
        self.h = self.h ^ hashTurn
        self.board[move.x2][move.y2] = move.color
        self.board[move.x1][move.y1] = Empty
        if (move.color == White):
            self.turn = Black
        else:
            self.turn = White

    def print(self):
        print("   1 2 3 4 5")
        for i in range(Dy):
            print("{} |".format(i + 1), end="")
            for j in range(Dx):
                if self.board[i][j] == Black:
                    print("\u265F", end="")
                elif self.board[i][j] == White:
                    print("\u2659", end="")
                else:
                    print(" ", end="")
                if j < Dx:
                    print("|", end="")

            if i < Dy:
                print()


# Transposition Table

In [38]:
MaxLegalMoves = 6 * Dx
Table = {}


def add(board):
    nplayouts = [0.0 for x in range(MaxLegalMoves)]
    nwins = [0.0 for x in range(MaxLegalMoves)]
    Table[board.h] = [0, nplayouts, nwins]


def look(board):
    return Table.get(board.h, None)


# UCT

In [39]:
def UCT(board):
    if board.terminal():
        return board.score()
    t = look(board)
    if t is not None:
        bestValue = 0
        best = 0
        moves = board.legalMoves()
        for i in range(len(moves)):
            val = 1000000.0
            n = t[0]
            ni = t[1][i]
            wi = t[2][i]
            if ni > 0:
                Qi = wi / ni
                if board.turn == Black:
                    Qi = 1 - Qi
                val = Qi + 0.4 * sqrt(log(n) / ni)
            if val > bestValue:
                bestValue = val
                best = i
        board.play(moves[best])
        result = UCT(board)
        t[0] += 1
        t[1][best] += 1
        t[2][best] += result
        return result
    else:
        add(board)
        return board.playout()


def BestMoveUCT(board, n):
    global Table
    Table = {}
    for _ in range(n):
        b1 = copy.deepcopy(board)
        result = UCT(b1)
    t = look(board)
    moves = board.legalMoves()
    best = moves[0]
    bestValue = t[1][0]
    for i in range(1, len(moves)):
        if t[1][i] > bestValue:
            best = moves[i]
            bestValue = t[1][i]
    return best

In [40]:
board = Board()
while not board.terminal():
    move = BestMoveUCT(board, 100)
    board.play(move)
    board.print()
    print(f'Best move: {move.x1 + 1},{move.y1 + 1} -> {move.x2 + 1},{move.y2 + 1}')

   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| |♙|♙|
3 | |♙| | | |
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,3 -> 3,2
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| |♙|♙|
3 |♟|♙| | | |
4 | |♟|♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 4,1 -> 3,1
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| |♙|♙|
3 |♟| | | | |
4 | |♟|♙|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 3,2 -> 4,3
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| |♙|♙|
3 |♟| | | | |
4 | |♟|♟|♟|♟|
5 |♟|♟|♟| |♟|
Best move: 5,4 -> 4,3
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| |♙| |
3 |♟| | |♙| |
4 | |♟|♟|♟|♟|
5 |♟|♟|♟| |♟|
Best move: 2,5 -> 3,4
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| |♙| |
3 |♟|♟| |♙| |
4 | | |♟|♟|♟|
5 |♟|♟|♟| |♟|
Best move: 4,2 -> 3,2
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| |♙| |
3 |♟|♟| | | |
4 | | |♙|♟|♟|
5 |♟|♟|♟| |♟|
Best move: 3,4 -> 4,3
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| |♙| |
3 |♟|♟| | | |
4 | | |♟|♟|♟|
5 |♟| |♟| |♟|
Best move: 5,2 -> 4,3
   1 2 3 4 5
1 |♙|♙|♙| |♙|
2 |♙|♙|♙|♙| |
3 |♟|♟| | | |
4 | | |♟|♟|♟|
5 |♟| |♟| |♟|
Best move: 1,4 -> 2,3
   1 2 3 4 5
1 |♙|♙|♙| |♙|
2 |♟|♙|♙|♙| |
3 |♟| | | | |


In [41]:
def playoutAMAF(board, played):
    while True:
        moves = board.legalMoves()
        if len(moves) == 0 or board.terminal():
            return board.score()
        n = random.randint(0, len(moves) - 1)
        played.append(moves[n])
        board.play(moves[n])


MaxCodeLegalMoves = 2 * Dx * Dy * 5


def addAMAF(board, played):
    nplayouts = [0.0 for x in range(MaxCodeLegalMoves)]
    nwins = [0.0 for x in range(MaxCodeLegalMoves)]
    nplayoutsAMAF = [0.0 for x in range(MaxLegalMoves)]
    nwinsAMAF = [0.0 for x in range(MaxLegalMoves)]
    Table[board.h] = [0, nplayouts, nwins, nplayoutsAMAF, nwinsAMAF, played]


def updateAMAF(t, played, result):
    for i in range(len(played)):
        m = played[i].code(board)
        t[3][m] += 1
        t[4][m] += result

# Tournament between Flat, UCB and UCT

In [42]:
def play_match(player1, player2, board, iterations=100):
    while not board.terminal():
        if board.turn == White:
            move = player1(board, iterations)
        else:
            move = player2(board, iterations)
        board.play(move)
    print(f"Winner: {'White' if board.score() == 1 else 'Black'}")
    return board.score()

In [43]:
def update_results(results, name1, name2, result):
    match_outcomes = {
        1.0: ((name1, 'wins', 3), (name2, 'losses', 0)),
        0.5: ((name1, 'draws', 1), (name2, 'draws', 1)),
        0.0: ((name2, 'wins', 3), (name1, 'losses', 0)),
    }

    for winner, outcome, points in match_outcomes[result]:
        results[winner]['points'] += points
        results[winner][outcome] += 1


def display_ranking(results):
    table = PrettyTable()
    table.field_names = ["Strategy", "Points", "Wins", "Draws", "Losses"]
    for name, stats in sorted(results.items(), key=lambda x: (-x[1]["points"], -x[1]["wins"], x[1]["losses"])):
        table.add_row([name, stats["points"], stats["wins"], stats["draws"], stats["losses"]])
    print(table)


def tournament_round(strategies, iterations=100):
    import time
    results = {name: {"points": 0, "wins": 0, "draws": 0, "losses": 0} for name in strategies}

    for name1 in strategies:
        for name2 in strategies:
            if name1 != name2:
                for matchup in [(name1, name2), (name2, name1)]:
                    print(f"{matchup[0]} vs {matchup[1]} (White vs Black)")
                    board = Board()
                    start = time.time()
                    result = play_match(strategies[matchup[0]], strategies[matchup[1]], deepcopy(board), iterations)
                    end = time.time()
                    print(f"Time taken: {end - start} seconds")
                    update_results(results, matchup[0], matchup[1], result)
                    display_ranking(results)
    return results

In [44]:
iterations = 50
strategies = {
    "UCT": BestMoveUCT,
    "Flat": flat,
    "UCB": ucb
}

tournament_round(strategies, iterations)

UCT vs Flat (White vs Black)
Winner: White
Time taken: 1.2412819862365723 seconds
+----------+--------+------+-------+--------+
| Strategy | Points | Wins | Draws | Losses |
+----------+--------+------+-------+--------+
|   UCT    |   3    |  1   |   0   |   0    |
|   UCB    |   0    |  0   |   0   |   0    |
|   Flat   |   0    |  0   |   0   |   1    |
+----------+--------+------+-------+--------+
Flat vs UCT (White vs Black)
Winner: Black
Time taken: 0.4307270050048828 seconds
+----------+--------+------+-------+--------+
| Strategy | Points | Wins | Draws | Losses |
+----------+--------+------+-------+--------+
|   UCT    |   6    |  2   |   0   |   0    |
|   UCB    |   0    |  0   |   0   |   0    |
|   Flat   |   0    |  0   |   0   |   2    |
+----------+--------+------+-------+--------+
UCT vs UCB (White vs Black)
Winner: Black
Time taken: 0.697890043258667 seconds
+----------+--------+------+-------+--------+
| Strategy | Points | Wins | Draws | Losses |
+----------+--------

{'UCT': {'points': 9, 'wins': 3, 'draws': 0, 'losses': 5},
 'Flat': {'points': 6, 'wins': 2, 'draws': 0, 'losses': 6},
 'UCB': {'points': 21, 'wins': 7, 'draws': 0, 'losses': 1}}

In [56]:
# RAVE 

def selectMoveRAVE(board, n):
    t = look(board)
    if t is not None:
        bestValue = 0
        best = 0
        moves = board.legalMoves()
        for i in range(len(moves)):
            val = 1000000.0
            n = t[0]
            ni = t[1][i]
            wi = t[2][i]
            if ni > 0:
                Qi = wi / ni
                if board.turn == Black:
                    Qi = 1 - Qi
                beta = ni / (ni + 4 * n + 4 * n * ni)
                val = (1 - beta) * Qi + beta * t[4][i]
            if val > bestValue:
                bestValue = val
                best = i
        board.play(moves[best])
        result = selectMoveRAVE(board, n)
        t[0] += 1
        t[1][best] += 1
        t[2][best] += result
        updateAMAF(t, t[5], result)
        return result
    else:
        addAMAF(board, [])
        return playoutAMAF(board, [])


def BestMoveRAVE(board, n):
    global Table
    Table = {}
    for _ in range(n):
        b1 = copy.deepcopy(board)
        result = selectMoveRAVE(b1, n)
    t = look(board)
    moves = board.legalMoves()
    best = moves[0]
    bestValue = t[1][0]
    for i in range(1, len(moves)):
        if t[1][i] > bestValue:
            best = moves[i]
            bestValue = t[1][i]
    return best

In [57]:
iterations = 50
strategies = {
    "UCT": BestMoveUCT,
    "RAVE": BestMoveRAVE
}

tournament_round(strategies, iterations)

UCT vs RAVE (White vs Black)
Winner: Black
Time taken: 0.9484131336212158 seconds
+----------+--------+------+-------+--------+
| Strategy | Points | Wins | Draws | Losses |
+----------+--------+------+-------+--------+
|   RAVE   |   3    |  1   |   0   |   0    |
|   UCT    |   0    |  0   |   0   |   1    |
+----------+--------+------+-------+--------+
RAVE vs UCT (White vs Black)
Winner: White
Time taken: 0.4557201862335205 seconds
+----------+--------+------+-------+--------+
| Strategy | Points | Wins | Draws | Losses |
+----------+--------+------+-------+--------+
|   RAVE   |   6    |  2   |   0   |   0    |
|   UCT    |   0    |  0   |   0   |   2    |
+----------+--------+------+-------+--------+
RAVE vs UCT (White vs Black)
Winner: White
Time taken: 0.9889001846313477 seconds
+----------+--------+------+-------+--------+
| Strategy | Points | Wins | Draws | Losses |
+----------+--------+------+-------+--------+
|   RAVE   |   9    |  3   |   0   |   0    |
|   UCT    |   0  

{'UCT': {'points': 3, 'wins': 1, 'draws': 0, 'losses': 3},
 'RAVE': {'points': 9, 'wins': 3, 'draws': 0, 'losses': 1}}

In [58]:
def RAVE(board, played):
    if board.terminal():
        return board.score()

    t = look(board)
    if t is not None:
        bestValue = 0
        best = 0
        moves = board.legalMoves()
        bestcode = moves[0].code(board)

        for i in range(len(moves)):
            val = 10000000.0
            code = moves[i].code(board)

            if t[3][code] > 0:
                beta = t[3][code] / (t[1][i] + t[3][code] + 1e-5 * t[1][i] * t[3][code])
                Q = 1
                if t[1][i] > 0:
                    Q = t[2][i] / t[1][i]
                if board.turn == Black:
                    Q = 1 - Q

                val = (1.0 - beta) * Q + beta * (t[4][code] / t[3][code])

            if val > bestValue:
                bestValue = val
                best = i

        bestcode = moves[best].code(board)
        board.play(moves[best])
        played.append(bestcode)
        res = RAVE(board, played)
        t[0] += 1
        t[1][best] += 1
        t[2][best] += res
        updateAMAF(t, played, res)
        return res
    else:
        addAMAF(played, board)
        return playoutAMAF(board, played)


iterations = 50
strategies = {
    "UCT": BestMoveUCT,
    "RAVE": BestMoveRAVE
}
board = Board()

tournament_round(strategies, iterations)

UCT vs RAVE (White vs Black)
Winner: Black
Time taken: 0.4321260452270508 seconds
+----------+--------+------+-------+--------+
| Strategy | Points | Wins | Draws | Losses |
+----------+--------+------+-------+--------+
|   RAVE   |   3    |  1   |   0   |   0    |
|   UCT    |   0    |  0   |   0   |   1    |
+----------+--------+------+-------+--------+
RAVE vs UCT (White vs Black)
Winner: White
Time taken: 0.786940336227417 seconds
+----------+--------+------+-------+--------+
| Strategy | Points | Wins | Draws | Losses |
+----------+--------+------+-------+--------+
|   RAVE   |   6    |  2   |   0   |   0    |
|   UCT    |   0    |  0   |   0   |   2    |
+----------+--------+------+-------+--------+
RAVE vs UCT (White vs Black)
Winner: White
Time taken: 0.9876878261566162 seconds
+----------+--------+------+-------+--------+
| Strategy | Points | Wins | Draws | Losses |
+----------+--------+------+-------+--------+
|   RAVE   |   9    |  3   |   0   |   0    |
|   UCT    |   0   

{'UCT': {'points': 3, 'wins': 1, 'draws': 0, 'losses': 3},
 'RAVE': {'points': 9, 'wins': 3, 'draws': 0, 'losses': 1}}

In [60]:
def GRAVE(board, played, threshold=100):
    if board.terminal():
        return board.score()

    t = look(board)
    if t is not None:
        bestValue = 0
        best = 0
        moves = board.legalMoves()
        bestcode = moves[0].code(board)

        for i in range(len(moves)):
            val = 10000000.0
            code = moves[i].code(board)

            beta = 0
            Q = 1

            if t[3][code] > threshold:
                if t[1][i] > 0:
                    Q = t[2][i] / t[1][i]
            else:
                if t[3][code] > 0:
                    beta = t[3][code] / (t[1][i] + t[3][code] + 1e-5 * t[1][i] * t[3][code])
                    if t[1][i] > 0:
                        Q = t[2][i] / t[1][i]

            if board.turn == Black:
                Q = 1 - Q

            val = (1.0 - beta) * Q + beta * (t[4][code] / t[3][code])

            if val > bestValue:
                bestValue = val
                best = i

        bestcode = moves[best].code(board)
        board.play(moves[best])
        played.append(bestcode)
        res = GRAVE(board, played, threshold)
        t[0] += 1
        t[1][best] += 1
        t[2][best] += res
        updateAMAF(t, played, res)
        return res
    else:
        addAMAF(played, board)
        return playoutAMAF(board, played)


def BestMoveGRAVE(board, n):
    global Table
    Table = {}
    for _ in range(n):
        b1 = copy.deepcopy(board)
        result = GRAVE(b1, [], 100)
    t = look(board)
    moves = board.legalMoves()
    best = moves[0]
    bestValue = t[1][0]
    for i in range(1, len(moves)):
        if t[1][i] > bestValue:
            best = moves[i]
            bestValue = t[1][i]
    return best


iterations = 50
strategies = {
    "UCT": BestMoveUCT,
    "GRAVE": BestMoveGRAVE
}

board = Board()

In [61]:
tournament_round(strategies, iterations)

UCT vs GRAVE (White vs Black)


AttributeError: 'list' object has no attribute 'h'