## Game interface

In [1]:
class Game:
    
    def __init__(self, player1, player2):
        self.gamestate = [' ' for i in range(9)]
        self.turn = 0
        self.players = (player1, player2)
        self.history = []
    
    def get_legal_moves(self, gamestate = None):
        if not gamestate:
            gamestate = self.gamestate
        return [pos for pos, cell in enumerate(gamestate) if cell == ' ']
    
    def project_move(self, gamestate, pos, turn):
        projected_gamestate = list(gamestate)
        projected_gamestate[pos] = turn
        return projected_gamestate
    
    def make_move(self, move):
        self.gamestate[move] = self.turn
        self.history.append(move)
        self.turn = int(not self.turn)
    
    def get_boardstate(self, gamestate = None):
        if not gamestate:
            gamestate = self.gamestate
        print(gamestate[0], gamestate[1], gamestate[2])
        print(gamestate[3], gamestate[4], gamestate[5])
        print(gamestate[6], gamestate[7], gamestate[8])
    
    def result(self, gamestate = None):
        if not gamestate:
            gamestate = self.gamestate
        
        lines = [[0,1,2], [3,4,5], [6,7,8], [0,3,6], [1,4,7], [2,5,8], [0,4,8], [6, 4, 2]]
        for line in lines:
            has_line = gamestate[line[0]] == gamestate[line[1]] and gamestate[line[0]] == gamestate[line[2]] and gamestate[line[0]] != ' '
            if has_line:
                return 'player ' + str(gamestate[line[0]]) + ' won'
        if ' ' not in gamestate:
            return 'draw'
        return 'pending'
    
    def game_start(self, hide = False):        
        while self.result() == 'pending':
            self.players[self.turn].move(self)
        if not hide:
            self.get_boardstate()
        return game.result()

In [2]:
from random import choice

class RandomAlgorithm:
    def move(self, game):
        all_moves = game.get_legal_moves()
        game.make_move(choice(all_moves))

class PlayerInterface:
    def move(self, game):
        game.get_boardstate()
        print('all legal moves', game.get_legal_moves())
        move = int(input('enter move here: '))
        game.make_move(move)

In [3]:
# start a game for two human players
game = Game(PlayerInterface(),PlayerInterface())
game.game_start()

     
     
     
all legal moves [0, 1, 2, 3, 4, 5, 6, 7, 8]


enter move here:  0


0    
     
     
all legal moves [1, 2, 3, 4, 5, 6, 7, 8]


enter move here:  1


0 1  
     
     
all legal moves [2, 3, 4, 5, 6, 7, 8]


enter move here:  4


0 1  
  0  
     
all legal moves [2, 3, 5, 6, 7, 8]


enter move here:  2


0 1 1
  0  
     
all legal moves [3, 5, 6, 7, 8]


enter move here:  8


0 1 1
  0  
    0


'player 0 won'

In [4]:
# start a game for one human player against random robot
game = Game(PlayerInterface(),RandomAlgorithm())
game.game_start()

     
     
     
all legal moves [0, 1, 2, 3, 4, 5, 6, 7, 8]


enter move here:  0


0    
     
    1
all legal moves [1, 2, 3, 4, 5, 6, 7]


enter move here:  4


0    
  0  
  1 1
all legal moves [1, 2, 3, 5, 6]


enter move here:  2


0   0
  0 1
  1 1
all legal moves [1, 3, 6]


enter move here:  6


0   0
  0 1
0 1 1


'player 0 won'

### Minimax algorithm
No alpha-beta pruning implemented

In [5]:
class MinimaxAlgorithm:

    def __init__(self, depth):
        self.depth = depth
        
    def move(self, game):
        
        def evaluate(turn, gamestate):
            lines = [[0,1,2], [3,4,5], [6,7,8], [0,3,6], [1,4,7], [2,5,8], [0,4,8], [6, 4, 2]]
            enemy = int(not turn)
            value = 0
            for line in lines:
                roi = [gamestate[pos] for pos in line]
                if roi.count(turn) == 3:
                    return 500
                if roi.count(turn) == 2 and enemy not in roi:
                    value += 20
                if roi.count(enemy) == 2 and turn not in roi:
                    value -= 50
            return value
        
        def dfs(turn, gamestate, depth):
            if depth == 0:
                return 0, 0
            
            enemy = int(not turn)
            
            all_moves = game.get_legal_moves(gamestate)
            if len(all_moves) == 0:
                return 0, 0

            all_values = []
            for possible_move in all_moves:
                new_gamestate = game.project_move(gamestate, possible_move, turn)
                value = evaluate(turn, new_gamestate)
                value -= dfs(enemy, new_gamestate, depth - 1)[0]
                all_values.append(value)

            return max(all_values), all_moves[all_values.index(max(all_values))]
        
        best_move = dfs(game.turn, game.gamestate, self.depth)
        game.make_move(best_move[1])

In [6]:
# starts a game of player against minimax algorithm (which is buggy)
game = Game(PlayerInterface(),MinimaxAlgorithm(5))
game.game_start()

     
     
     
all legal moves [0, 1, 2, 3, 4, 5, 6, 7, 8]


enter move here:  0


0    
  1  
     
all legal moves [1, 2, 3, 5, 6, 7, 8]


enter move here:  8


0 1  
  1  
    0
all legal moves [2, 3, 5, 6, 7]


enter move here:  7


0 1  
  1  
1 0 0
all legal moves [2, 3, 5]


enter move here:  3


0 1 1
0 1  
1 0 0


'player 1 won'

### Monte Carlo tree search

In [7]:
from math import sqrt, log

class Node:
    def __init__(self, parent, move, turn):
        self.children = {}
        self.t = 0
        self.n = 0

        self.root = parent
        self.parent = parent
        self.move = move
        self.turn = turn

    def get_gamestate(self, game):

        def upward_traversal(node):
            if not node.parent:
                self.root = node
                return game.gamestate
            prev_gamestate = upward_traversal(node.parent)
            return game.project_move(prev_gamestate, node.move, node.turn)

        return upward_traversal(self)

    def get_root(self):
        if not self.parent:
            return
        while self.root.parent:
            self.root = self.root.parent

    def expand(self, game):
        if self.children:
            return
        gamestate = self.get_gamestate(game)
        enemy = int(not self.turn)
        self.children = {move:Node(self, move, enemy) for move in game.get_legal_moves(gamestate)}

    def select(self, game):
        self.get_root()

        if not self.children:
            if self.n == 0:
                score = self.random_path(game)
                self.n += 1
                self.t += score
                return score
            else: 
                self.expand(game)

        if not self.children:
            return 0
        
        def get_value(child):
            if child.n == 0:
                return 999999
            return child.t / child.n + 1.41 * sqrt(log(self.root.n) / child.n)
        values = [get_value(child) for child in self.children.values()]
        selected_node_index = values.index(max(values))
        selected_node = list(self.children.values())[selected_node_index]
        score = selected_node.select(game)
        self.n += 1
        self.t += score
        return score

    def random_path(self, game):
        self.get_root()

        turn = self.turn

        gamestate = self.get_gamestate(game)
        while(game.result(gamestate) == 'pending'):
            turn = int(not turn)
            gamestate = game.project_move(gamestate, choice(game.get_legal_moves(gamestate)), turn)
        if game.result(gamestate) == 'player 0 won' and self.root.turn == 1:
            return 1
        if game.result(gamestate) == 'player 1 won' and self.root.turn == 0:
            return 1
        if game.result(gamestate) == 'draw':
            return 0.5
        return 0

class MonteCarloAlgorithm:

    def __init__(self):
        self.root = None
        
    def move(self, game):
        enemy = int(not game.turn)
        if self.root:
            for move in game.history[-2:]:
                self.root = self.root.children[move]
                self.root.parent = None
                self.root.move = None
                self.root.expand(game)
        else:
            self.root = Node(None, None, enemy)

        self.root.expand(game)
        for i in range(1000):
            self.root.select(game)
                
        possible_moves = [(child.t, child.move) for child in self.root.children.values()]
        best_move = sorted(possible_moves)[-1]
        game.make_move(best_move[1])

In [8]:
game = Game(PlayerInterface(),MonteCarloAlgorithm())
game.game_start()

     
     
     
all legal moves [0, 1, 2, 3, 4, 5, 6, 7, 8]


enter move here:  0


0   1
     
     
all legal moves [1, 3, 4, 5, 6, 7, 8]


enter move here:  8


0   1
  1  
    0
all legal moves [1, 3, 5, 6, 7]


enter move here:  1


0 0 1
  1  
1   0


'player 1 won'