## Tic Tac Toe - Minimax

The algorithm tries to `maximise` my score, while taking into acount that you will try to `minimise` your score.

## Legal moves

Get all `available` legal moves.

In [41]:
import numpy as np

def get_legal_moves(board):
    moves = []
    for i in range(3):
        for j in range(3):
            if board[i][j] == " ":
                moves.append((i, j))
    return moves

board = np.array([
    [" ", "O", "X"],
    ["O", "X", " "],
    ["O", "X", " "],
])

print(get_legal_moves(board))

[(0, 0), (1, 2), (2, 2)]


## Show board

Display the `current board` and color the last move.

In [42]:
def show(board, move=None):
    board_ = np.copy(board).tolist() 

    # Color last move
    if move: 
        i, j = move
        CYAN, ENDC = '\033[96m', '\033[0m'
        board_[i][j] = CYAN + board[move] + ENDC  # color last move

    # Show board 
    for i in range(3):
        print(" ", board_[i][0], "|", board_[i][1], "|", board_[i][2])
        print(" ---+---+---") if i < 2 else ""

board = np.array([
    ["X", "O", "X"],
    ["O", "X", " "],
    ["O", "X", " "],
])

show(board, (0,0))

  [96mX[0m | O | X
 ---+---+---
  O | X |  
 ---+---+---
  O | X |  


## Evaluate score

Evaluate `current score` and check if the current board is in `terminal state`.

In [43]:
def evaluate_score(board):
    for i in range(3):
        if board[i][0] == board[i][1] == board[i][2] and board[i][0] != " ":
            return 1 if board[i][0] == 'X' else -1 # horizontal win score
    for i in range(3):
        if board[0][i] == board[1][i] == board[2][i] and board[0][i] != " ":
            return 1 if board[0][i] == 'X' else -1 # vertical win score

    if (board[0][0] == board[1][1] == board[2][2] or \
        board[0][2] == board[1][1] == board[2][0]) and board[1][1] != " ":
            return 1 if board[1][1] == "X" else -1 # diagonal win score 
    return 0

def is_terminal_state(board):
    if (evaluate_score(board)) ==  1:    return True # X win
    if (evaluate_score(board)) == -1:    return True # O win
    if len(get_legal_moves(board)) == 0: return True # Draw
    return False

board = np.array([
    ["X", "O", "X"],
    ["X", "X", "X"],
    ["O", " ", "O"],
])

assert is_terminal_state(board) == True # horizontal win

## Minimax

We go down recursively in order to `find the best` choises.

In [44]:
def minimax(board, player=True):
    
    best_move = None 
    best_score = float("-inf") if player else float("+inf") # initialize score

    for move in get_legal_moves(board): # possible moves

        new_board = np.copy(board)
        new_board[move] = 'X' if player else 'O'
        
        if is_terminal_state(new_board):  # Base case
            return move, evaluate_score(new_board)  

        # Children scores
        move_, score_ = minimax(new_board, not player) # Recursive

        if player == True:
            if score_ > best_score:
                best_score = score_
                best_move = move

        if player == False:
            if score_ < best_score:
                best_score = score_
                best_move = move

    return best_move, best_score

board = np.array([
    [" ", "O", "X"],
    ["O", "X", " "],
    [" ", "X", " "]
])

best_move, best_score = minimax(board, True) # X to move
print(best_move, best_score)

(2, 0) 1


## Prunning

With `alpha-beta prunning` we avoid traversing irrelevant parts of the tree.

In [45]:
def minimax(board, player=True, alpha=float('-inf'), beta=float('inf')):
    
    best_move = None 
    best_score = float("-inf") if player else float("+inf") # initialize score

    for move in get_legal_moves(board): # possible moves
        new_board = np.copy(board)
        new_board[move] = 'X' if player else 'O'
        
        if is_terminal_state(new_board): 
            return move, evaluate_score(new_board) # Base case

        # Children scores
        move_, score_ = minimax(new_board, not player, alpha, beta)  # Recursive

        if player == True:
            if score_ > best_score:
                best_score = score_ # child best score
                best_move = move    # parent move
            alpha = max(alpha, score_)
            
        if player == False:
            if score_ < best_score:
                best_score = score_
                best_move = move
            beta = min(beta, score_)
        
        if beta <= alpha: # prunning condition
            break

    return best_move, best_score

board = np.array([
    [" ", "O", "X"],
    ["O", "X", " "],
    [" ", "X", " "]
])

best_move, best_score = minimax(board, False) # O to move
print(best_move, best_score)

(2, 0) 0


## Play

In [46]:
def play(board, player=True, expected=None):
    print("\nX" if player else "\nO", "move")

    best_move, best_score = minimax(board, player)
    board[best_move] = 'X' if player else 'O'
    show(board, best_move)

    if is_terminal_state(board):
        if best_score ==  1: print('X won!')
        if best_score == -1: print('O won!')
        if best_score ==  0: print('Draw!')

        assert expected == best_score
        print('Test passed \n')
        return False # Base case

    play(board, not player, expected) # Recursive case

game = np.array([
        ["X", "O", "X"], 
        ["O", "X", " "],
        ["O", "X", " "],]), False, 0

board, player, expected = game
print('X' if player else 'O', 'to move')

show(board)
play(board, player, expected)

O to move
  X | O | X
 ---+---+---
  O | X |  
 ---+---+---
  O | X |  

O move
  X | O | X
 ---+---+---
  O | X |  
 ---+---+---
  O | X | [96mO[0m

X move
  X | O | X
 ---+---+---
  O | X | [96mX[0m
 ---+---+---
  O | X | O
Draw!
Test passed 



## References

[Minimax Algorithm](https://mathspp.com/blog/minimax-algorithm-and-alpha-beta-pruning) mathspp   
[Alpha Beta pruning](https://en.wikipedia.org/wiki/Alpha%E2%80%93beta_pruning) wikipedia   
[Tic Tac Toe game](https://github.com/minte9/algorithms-pages/tree/main/main/applications/tic_tac_toe) github