In [171]:
import numpy as np
from copy import deepcopy
import matplotlib.pyplot as plt
from matplotlib import colors
from math import sqrt
from math import log

# Move class for Breakthrough

In [172]:
class Move(object):
    def __init__(self, color, x1, y1, x2, y2):
        self.color = color
        self.x1 = x1
        self.y1 = y1
        self.x2 = x2
        self.y2 = y2

    def valid(self, board):
        if self.x2 >= Dx or self.y2 >= Dy or self.x2 < 0 or self.y2 < 0:
            return False
        if self.color == White:
            if self.x2 != self.x1 + 1:
                return False
            if board.board[self.x2][self.y2] == Black:
                if self.y2 == self.y1 + 1 or self.y2 == self.y1 - 1:
                    return True
                return False
            elif board.board[self.x2][self.y2] == Empty:
                if self.y2 == self.y1 + 1 or self.y2 == self.y1 - 1 or self.y2 == self.y1:
                    return True
                return False
        elif self.color == Black:
            if self.x2 != self.x1 - 1:
                return False
            if board.board[self.x2][self.y2] == White:
                if self.y2 == self.y1 + 1 or self.y2 == self.y1 - 1:
                    return True
                return False
            elif board.board[self.x2][self.y2] == Empty:
                if self.y2 == self.y1 + 1 or self.y2 == self.y1 - 1 or self.y2 == self.y1:
                    return True
                return False
        return False

    def code(self, board):
        direction = 0
        if self.y2 > self.y1:
            if board.board[self.x2][self.y2] == Empty:
                direction = 1
            else:
                direction = 2
        if self.y2 < self.y1:
            if board.board[self.x2][self.y2] == Empty:
                direction = 3
            else:
                direction = 4
        if self.color == White:
            return 5 * (Dy * self.x1 + self.y1) + direction
        else:
            return 5 * Dx * Dy + 5 * (Dy * self.x1 + self.y1) + direction

# Board class to play Breakthrough 5x5

In [173]:
import random

Dx = 5
Dy = 5
Empty = 0
White = 1
Black = 2


class Board(object):
    def __init__(self):
        self.h = 0
        self.turn = White
        self.board = np.zeros((Dx, Dy))
        for i in range(0, 2):
            for j in range(0, Dy):
                self.board[i][j] = White
        for i in range(Dx - 2, Dx):
            for j in range(0, Dy):
                self.board[i][j] = Black

    def legalMoves(self):
        moves = []
        for i in range(0, Dx):
            for j in range(0, Dy):
                if self.board[i][j] == self.turn:
                    for k in [-1, 0, 1]:
                        for l in [-1, 0, 1]:
                            m = Move(self.turn, i, j, i + k, j + l)
                            if m.valid(self):
                                moves.append(m)
        return moves

    def score(self):
        for i in range(0, Dy):
            if (self.board[Dx - 1][i] == White):
                return 1.0
            elif (self.board[0][i] == Black):
                return 0.0
        l = self.legalMoves()
        if len(l) == 0:
            if self.turn == Black:
                return 1.0
            else:
                return 0.0
        return 0.5

    def terminal(self):
        if self.score() == 0.5:
            return False
        return True

    def play(self, move):
        self.board[move.x1][move.y1] = Empty
        self.board[move.x2][move.y2] = move.color
        if (self.turn == White):
            self.turn = Black
        else:
            self.turn = White

    def playout(self):
        while (True):
            moves = self.legalMoves()
            if self.terminal():
                return self.score()
            n = random.randint(0, len(moves) - 1)
            self.play(moves[n])

    def print(self):
        print("   1 2 3 4 5")
        for i in range(Dy):
            print("{} |".format(i + 1), end="")
            for j in range(Dx):
                if self.board[i][j] == Black:
                    print("\u265F", end="")
                elif self.board[i][j] == White:
                    print("\u2659", end="")
                else:
                    print(" ", end="")
                if j < Dx:
                    print("|", end="")

            if i < Dy:
                print()


# Flat Monte Carlo

In [174]:
import copy


def flat(board, n):
    moves = board.legalMoves()
    bestScore = 0
    bestMove = 0
    for m in range(len(moves)):
        sum = 0
        for i in range(n // len(moves)):
            b = copy.deepcopy(board)
            b.play(moves[m])
            r = b.playout()
            if board.turn == Black:
                r = 1 - r
            sum = sum + r
        if sum > bestScore:
            bestScore = sum
            bestMove = m
    return moves[bestMove]

In [175]:
b = Board()
b.print()

   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙|♙|♙|♙|
3 | | | | | |
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟|♟|


In [176]:
while not b.terminal():
    m = flat(b, 100)
    b.play(m)
    b.print()
    print(f'Best move: {m.x1 + 1},{m.y1 + 1} -> {m.x2 + 1},{m.y2 + 1}')
    print(f"Score: {b.score()}")
    if b.score() == 0:
        print("Black wins")
    elif b.score() == 1:
        print("White wins")
    else:
        print("Draw")

   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙|♙| |♙|
3 | | |♙| | |
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,4 -> 3,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙|♙| |♙|
3 | | |♙|♟| |
4 |♟|♟|♟|♟| |
5 |♟|♟|♟|♟|♟|
Best move: 4,5 -> 3,4
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 | |♙|♙| |♙|
3 | |♙|♙|♟| |
4 |♟|♟|♟|♟| |
5 |♟|♟|♟|♟|♟|
Best move: 2,1 -> 3,2
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 | |♙|♟| |♙|
3 | |♙|♙| | |
4 |♟|♟|♟|♟| |
5 |♟|♟|♟|♟|♟|
Best move: 3,4 -> 2,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙| |♙|
2 | |♙|♙| |♙|
3 | |♙|♙| | |
4 |♟|♟|♟|♟| |
5 |♟|♟|♟|♟|♟|
Best move: 1,4 -> 2,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙| |♙|
2 | |♙|♙| |♙|
3 | |♙|♙| | |
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟| |
Best move: 5,5 -> 4,5
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙| |♙|
2 | |♙|♙| | |
3 | |♙|♙| |♙|
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟| |
Best move: 2,5 -> 3,5
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙| |♙|
2 | |♙|♙| | |
3 | |♙|♟| |♙|
4 |♟| |♟|♟|♟|
5 |♟|♟|♟|♟| |
Best move: 4,2 -> 3,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙| |♙|
2 | |

# UCB

In [177]:
def ucb(board, n):
    moves = board.legalMoves()
    sumScores = [0.0 for x in range(len(moves))]
    nbVisits = [0 for x in range(len(moves))]
    for i in range(n):
        bestMove = 0
        bestUCB = 0
        for m in range(len(moves)):
            score = 1000000
            if nbVisits[m] > 0:
                score = sumScores[m] / nbVisits[m] + 0.4 * sqrt(log(i) / nbVisits[m])
            if score > bestUCB:
                bestUCB = score
                bestMove = m
        b = copy.deepcopy(board)
        b.play(moves[bestMove])
        r = b.playout()
        if board.turn == Black:
            r = 1 - r
        sumScores[bestMove] = sumScores[bestMove] + r
        nbVisits[bestMove] = nbVisits[bestMove] + 1
    bestScore = 0
    bestMove = 0
    for m in range(len(moves)):
        if nbVisits[m] > bestScore:
            bestScore = nbVisits[m]
            bestMove = m
    return moves[bestMove]

In [178]:
b = Board()
b.print()

   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙|♙|♙|♙|
3 | | | | | |
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟|♟|


In [179]:
while not b.terminal():
    m = ucb(b, 100)
    b.play(m)
    b.print()
    print(f'Best move: {m.x1 + 1},{m.y1 + 1} -> {m.x2 + 1},{m.y2 + 1}')
    print(f"Score: {b.score()}")
    if b.score() == 0:
        print("Black wins")
    elif b.score() == 1:
        print("White wins")
    else:
        print("Draw")

   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| |♙|♙|
3 | |♙| | | |
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,3 -> 3,2
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| |♙|♙|
3 | |♙|♟| | |
4 |♟| |♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 4,2 -> 3,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| |♙| |
3 | |♙|♟| |♙|
4 |♟| |♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,5 -> 3,5
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| |♙| |
3 | |♟|♟| |♙|
4 |♟| | |♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 4,3 -> 3,2
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| | | |
3 | |♟|♙| |♙|
4 |♟| | |♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,4 -> 3,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙|♙| | | |
3 | |♟|♙|♟|♙|
4 |♟| | |♟| |
5 |♟|♟|♟|♟|♟|
Best move: 4,5 -> 3,4
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| | | | |
3 |♙|♟|♙|♟|♙|
4 |♟| | |♟| |
5 |♟|♟|♟|♟|♟|
Best move: 2,2 -> 3,1
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| |♟| | |
3 |♙| |♙|♟|♙|
4 |♟| | |♟| |
5 |♟|♟|♟|♟|♟|
Best move: 3,2 -> 2,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙| |♙|
2 |♙|

In [180]:
board = Board()
while not board.terminal():
    if board.turn == White:
        m = flat(board, 5000)
    else:
        m = ucb(board, 5000)
    board.play(m)
    board.print()
    print(f'Best move: {m.x1 + 1},{m.y1 + 1} -> {m.x2 + 1},{m.y2 + 1}')
    print(f"Score: {board.score()}")
    if board.score() == 0:
        print("UCB wins")
    elif board.score() == 1:
        print("Flat wins")
    else:
        print("Draw")

   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| |♙|♙|♙|
3 | | |♙| | |
4 |♟|♟|♟|♟|♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,2 -> 3,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| |♙|♙|♙|
3 | | |♟| | |
4 |♟|♟|♟| |♟|
5 |♟|♟|♟|♟|♟|
Best move: 4,4 -> 3,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| |♙| |♙|
3 | | |♙| | |
4 |♟|♟|♟| |♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,4 -> 3,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| |♙| |♙|
3 | | |♟| | |
4 |♟| |♟| |♟|
5 |♟|♟|♟|♟|♟|
Best move: 4,2 -> 3,3
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| |♙| | |
3 | | |♟|♙| |
4 |♟| |♟| |♟|
5 |♟|♟|♟|♟|♟|
Best move: 2,5 -> 3,4
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| |♙| | |
3 | | |♟|♟| |
4 |♟| |♟| | |
5 |♟|♟|♟|♟|♟|
Best move: 4,5 -> 3,4
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| | | | |
3 | | |♟|♙| |
4 |♟| |♟| | |
5 |♟|♟|♟|♟|♟|
Best move: 2,3 -> 3,4
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 |♙| | | | |
3 | | |♟|♟| |
4 |♟| | | | |
5 |♟|♟|♟|♟|♟|
Best move: 4,3 -> 3,4
Score: 0.5
Draw
   1 2 3 4 5
1 |♙|♙|♙|♙|♙|
2 | |

# Board class with hashcode

In [181]:
import random

Dx = 5
Dy = 5
Empty = 0
White = 1
Black = 2

hashTable = []
for k in range(3):
    l = []
for i in range(Dx):
    l1 = []
for j in range(Dy):
    l1.append(random.randint(0, 2 ** 64))
l.append(l1)
hashTable.append(l)
hashTurn = random.randint(0, 2 ** 64)


class Board(object):
    def __init__(self):
        self.h = 0
        self.turn = White
        self.board = np.zeros((Dx, Dy))
        for i in range(0, 2):
            for j in range(0, Dy):
                self.board[i][j] = White
        for i in range(Dx - 2, Dx):
            for j in range(0, Dy):
                self.board[i][j] = Black

    def legalMoves(self):
        moves = []
        for i in range(0, Dx):
            for j in range(0, Dy):
                if self.board[i][j] == self.turn:
                    for k in [-1, 0, 1]:
                        for l in [-1, 0, 1]:
                            m = Move(self.turn, i, j, i + k, j + l)
                            if m.valid(self):
                                moves.append(m)
        return moves

    def score(self):
        for i in range(0, Dy):
            if (self.board[Dx - 1][i] == White):
                return 1.0
            elif (self.board[0][i] == Black):
                return 0.0
        l = self.legalMoves()
        if len(l) == 0:
            if self.turn == Black:
                return 1.0
            else:
                return 0.0
        return 0.5

    def terminal(self):
        if self.score() == 0.5:
            return False
        return True

    def playout(self):
        while (True):
            moves = self.legalMoves()
            if self.terminal():
                return self.score()
            n = random.randint(0, len(moves) - 1)
            self.play(moves[n])

    def play(self, move):
        col = int(self.board[move.x2][move.y2])
        if col != Empty:
            self.h = self.h ^ hashTable[col][move.x2][move.y2]
        self.h = self.h ^ hashTable[move.color][move.x2][move.y2]
        self.h = self.h ^ hashTable[move.color][move.x1][move.y1]
        self.h = self.h ^ hashTurn
        self.board[move.x2][move.y2] = move.color
        self.board[move.x1][move.y1] = Empty
        if (move.color == White):
            self.turn = Black
        else:
            self.turn = White

    def print(self):
        print("   1 2 3 4 5")
        for i in range(Dy):
            print("{} |".format(i + 1), end="")
            for j in range(Dx):
                if self.board[i][j] == Black:
                    print("\u265F", end="")
                elif self.board[i][j] == White:
                    print("\u2659", end="")
                else:
                    print(" ", end="")
                if j < Dx:
                    print("|", end="")

            if i < Dy:
                print()


# Transposition Table

In [182]:
MaxLegalMoves = 6 * Dx
Table = {}


def add(board):
    nplayouts = [0.0 for x in range(MaxLegalMoves)]
    nwins = [0.0 for x in range(MaxLegalMoves)]
    Table[board.h] = [0, nplayouts, nwins]


def look(board):
    return Table.get(board.h, None)


# UCT

# Tournament between Flat, UCB and UCT