# Search with Uncertainty: Solving Tic-Tac-Toe with And-Or-Search

## Introduction 

Multiplayer games can be implemented as:
1. The opponent is seen as part of an environment with nondeterministic actions. Non-determinism is the result of the unknown opponent's moves. 
2. Use adversarial search (the opponent acts strategic).

Here we will implement Tic-Tac-Toe (see [rules](https://en.wikipedia.org/wiki/Tic-tac-toe)) by using modeling the opponent as the result of nondeterministic actions. We will search for a __conditional plan__ using And-Or-search. 

## State Space

Each state is a possible board. How large is the state space? Give the equation and the number.

In [1]:
# each of the 9 squares can have 3 values (empty, x and o).

3**9

19683

__Note:__ The state space is symmetric. This could be used to make the search space smaller, but the search space is small and we will ignore this.

## The board

I represent the board as a vector of length 9. The values are `' ', 'x', 'o'`.  

In [2]:
def empty_board():
    return [' '] * 9

board = empty_board()
display(board)

[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']

Some helper functions.

In [3]:
import numpy as np

def show_board(board):
    """display the board"""
    b = np.array(board).reshape((3,3))
    print(b)

board = empty_board()
show_board(board)    

print()
print("Add some x's")
board[0] = 'x'; board[3] = 'x'; board[6] = 'x';  
show_board(board)

[[' ' ' ' ' ']
 [' ' ' ' ' ']
 [' ' ' ' ' ']]

Add some x's
[['x' ' ' ' ']
 ['x' ' ' ' ']
 ['x' ' ' ' ']]


In [4]:
def check_win(board):
    """check the board and return one of x, o, d (draw), or n (for next move)"""
    # check rows and columns
    board = np.array(board).reshape((3,3))
    
    for a_board in [board, np.transpose(board)]:
        for row in a_board:
            if len(set(row)) == 1 and row[0] != ' ':
                return row[0]
    
    # check diagonal
    if len(set([board[i][i] for i in range(len(board))])) == 1 and board[0][0] != ' ':
        return board[0][0]
    if len(set([board[i][len(board)-i-1] for i in range(len(board))])) == 1 and board[0][len(board)-1] != ' ':
        return board[0][len(board)-1]

    # check for draw
    if(np.sum(board == ' ') < 1):
        return 'd'
    
    return 'n'

show_board(board)
print('Win? ' + check_win(board))

print()
show_board(empty_board())
print('Win? ' + check_win(empty_board()))

[['x' ' ' ' ']
 ['x' ' ' ' ']
 ['x' ' ' ' ']]
Win? x

[[' ' ' ' ' ']
 [' ' ' ' ' ']
 [' ' ' ' ' ']]
Win? n


In [5]:
def get_actions(board):
    """return possible actions as a vector ot indices"""
    return np.where(np.array(board) == ' ')[0].tolist()

show_board(board)
get_actions(board)

[['x' ' ' ' ']
 ['x' ' ' ' ']
 ['x' ' ' ' ']]


[1, 2, 4, 5, 7, 8]

In [6]:
def results(state, action, player = 'x', other = 'o'):
    """produce the belief state after the provided action for player. 
       The belief state is the set of boards with the action and all possible reactions by the opponent."""
    state = state.copy()
    state[action] = player
    
    r = list()
    o_actions = get_actions(state)
    
    if len(o_actions) < 1 : return [state]
    
    for o_a in o_actions:
        s = state.copy()
        s[o_a] = other
        r.append(s)    
    
    return r

show_board(empty_board())

print()
print("Belief state for placing an x at position 4:")
results(empty_board(), 4)

[[' ' ' ' ' ']
 [' ' ' ' ' ']
 [' ' ' ' ' ']]

Belief state for placing an x at position 4:


[['o', ' ', ' ', ' ', 'x', ' ', ' ', ' ', ' '],
 [' ', 'o', ' ', ' ', 'x', ' ', ' ', ' ', ' '],
 [' ', ' ', 'o', ' ', 'x', ' ', ' ', ' ', ' '],
 [' ', ' ', ' ', 'o', 'x', ' ', ' ', ' ', ' '],
 [' ', ' ', ' ', ' ', 'x', 'o', ' ', ' ', ' '],
 [' ', ' ', ' ', ' ', 'x', ' ', 'o', ' ', ' '],
 [' ', ' ', ' ', ' ', 'x', ' ', ' ', 'o', ' '],
 [' ', ' ', ' ', ' ', 'x', ' ', ' ', ' ', 'o']]

## Recursive DFS algorithm for And-Or Search

See AIMA page 125. 

The implementation is for player 'x' returns a conditional plan as a list of lists.

Modifications:
* Removed path since it is not used.
* No cycle checking (not needed).
* Goal: 
    - End search when player loses. 
    - Check for loss also in and phase
    - Draw can be set as a goal state or a loss.


In [7]:
def is_goal(state, draw = True):
    goal = check_win(state)        
    if goal == 'x': return 'win' 
    if goal == 'd': 
        if draw: return 'draw' 
        else: return None 
    if goal == 'o': return None  # loss is failure
    return False # continue

In [8]:
# Equation and number

In [9]:
def and_or_search(board, draw = True):
    """start the search. Consider draw a goal state?"""
    return or_search(board, draw)

def or_search(state, draw):
    """Or step of the search: try all possible action and returns a conditional plan for the first action 
    that only has goal states as leaf nodes. If none can be found then failure (None) is returned."""
     
    # I guess I can only lose or have draw here! Win is in during and search.
    g = is_goal(state, draw)
    if g != False: return(g)
        
    #if is_cycle(path) return None  # no cycles for this problem
    
    # check all possible actions in the state
    for action in get_actions(state):
        plan = and_search(results(state, action), draw)
        if plan is not None: return [action, plan]
    return None

def and_search(states, draw):
    """And step of the search: follow all possible states (call the or step). 
    Return a conditional plan only if all paths lead to a goal state."""
    
    # return plans if no state fails
    plans = []
    for s in states:    
        # added another goal check after my move. I think I cannot have a loss here.
        g = is_goal(s, draw)
        if g != False: return(g)
      
        plan = or_search(s, draw)
        if plan is None: return None
        plans.append(['if', s, 'then', plan]) # + else?

    return plans
    

Some tests:

In [10]:
board = empty_board() 
board[0] = 'x'
board[1] = 'o'
board[3] = 'o'
board[4] = 'x'

print("Board:")
show_board(board)

print()
print("Win or draw:")
display(and_or_search(board))

print()
print("Win only:")
display(and_or_search(board, draw = False))

Board:
[['x' 'o' ' ']
 ['o' 'x' ' ']
 [' ' ' ' ' ']]

Win or draw:


[2,
 [['if', ['x', 'o', 'x', 'o', 'x', 'o', ' ', ' ', ' '], 'then', [6, 'win']],
  ['if',
   ['x', 'o', 'x', 'o', 'x', ' ', 'o', ' ', ' '],
   'then',
   [5,
    [['if', ['x', 'o', 'x', 'o', 'x', 'x', 'o', 'o', ' '], 'then', [8, 'win']],
     ['if',
      ['x', 'o', 'x', 'o', 'x', 'x', 'o', ' ', 'o'],
      'then',
      [7, 'draw']]]]],
  ['if',
   ['x', 'o', 'x', 'o', 'x', ' ', ' ', 'o', ' '],
   'then',
   [5,
    [['if', ['x', 'o', 'x', 'o', 'x', 'x', 'o', 'o', ' '], 'then', [8, 'win']],
     ['if',
      ['x', 'o', 'x', 'o', 'x', 'x', ' ', 'o', 'o'],
      'then',
      [6, 'win']]]]],
  ['if',
   ['x', 'o', 'x', 'o', 'x', ' ', ' ', ' ', 'o'],
   'then',
   [5,
    [['if',
      ['x', 'o', 'x', 'o', 'x', 'x', 'o', ' ', 'o'],
      'then',
      [7, 'draw']],
     ['if',
      ['x', 'o', 'x', 'o', 'x', 'x', ' ', 'o', 'o'],
      'then',
      [6, 'win']]]]]]]


Win only:


[2,
 [['if', ['x', 'o', 'x', 'o', 'x', 'o', ' ', ' ', ' '], 'then', [6, 'win']],
  ['if', ['x', 'o', 'x', 'o', 'x', ' ', 'o', ' ', ' '], 'then', [8, 'win']],
  ['if',
   ['x', 'o', 'x', 'o', 'x', ' ', ' ', 'o', ' '],
   'then',
   [5,
    [['if', ['x', 'o', 'x', 'o', 'x', 'x', 'o', 'o', ' '], 'then', [8, 'win']],
     ['if',
      ['x', 'o', 'x', 'o', 'x', 'x', ' ', 'o', 'o'],
      'then',
      [6, 'win']]]]],
  ['if', ['x', 'o', 'x', 'o', 'x', ' ', ' ', ' ', 'o'], 'then', [6, 'win']]]]

In [11]:
board = empty_board() 
board[0] = 'x'
board[1] = 'o'
board[2] = 'x'
#board[3] = 'o'
board[4] = 'o'

print("Board:")
show_board(board)

print()
print("Win or draw:")
display(and_or_search(board))

print()
print("Win only:")
display(and_or_search(board, draw = False))

Board:
[['x' 'o' 'x']
 [' ' 'o' ' ']
 [' ' ' ' ' ']]

Win or draw:


[7,
 [['if',
   ['x', 'o', 'x', 'o', 'o', ' ', ' ', 'x', ' '],
   'then',
   [5,
    [['if', ['x', 'o', 'x', 'o', 'o', 'x', 'o', 'x', ' '], 'then', [8, 'win']],
     ['if',
      ['x', 'o', 'x', 'o', 'o', 'x', ' ', 'x', 'o'],
      'then',
      [6, 'draw']]]]],
  ['if',
   ['x', 'o', 'x', ' ', 'o', 'o', ' ', 'x', ' '],
   'then',
   [3,
    [['if',
      ['x', 'o', 'x', 'x', 'o', 'o', 'o', 'x', ' '],
      'then',
      [8, 'draw']],
     ['if',
      ['x', 'o', 'x', 'x', 'o', 'o', ' ', 'x', 'o'],
      'then',
      [6, 'win']]]]],
  ['if',
   ['x', 'o', 'x', ' ', 'o', ' ', 'o', 'x', ' '],
   'then',
   [3,
    [['if',
      ['x', 'o', 'x', 'x', 'o', 'o', 'o', 'x', ' '],
      'then',
      [8, 'draw']],
     ['if',
      ['x', 'o', 'x', 'x', 'o', ' ', 'o', 'x', 'o'],
      'then',
      [5, 'draw']]]]],
  ['if',
   ['x', 'o', 'x', ' ', 'o', ' ', ' ', 'x', 'o'],
   'then',
   [3,
    [['if', ['x', 'o', 'x', 'x', 'o', 'o', ' ', 'x', 'o'], 'then', [6, 'win']],
     ['if',
      ['x', '


Win only:


None

In [12]:
board = empty_board() 
board[0] = 'o'
board[1] = 'o'
board[3] = 'o'
board[4] = 'x'
board[8] = 'x'

print("Board:")
show_board(board)

print()
print("Win or draw:")
display(and_or_search(board))

print()
print("Win only:")
display(and_or_search(board, draw = False))

Board:
[['o' 'o' ' ']
 ['o' 'x' ' ']
 [' ' ' ' 'x']]

Win or draw:


None


Win only:


None

In [13]:
board = empty_board() 

print("Board:")
show_board(board)

print()
print("Win or draw:")
display(and_or_search(board))

print()
print("Win only:")
display(and_or_search(board, draw = False))

Board:
[[' ' ' ' ' ']
 [' ' ' ' ' ']
 [' ' ' ' ' ']]

Win or draw:


[0,
 [['if',
   ['x', 'o', ' ', ' ', ' ', ' ', ' ', ' ', ' '],
   'then',
   [2,
    [['if',
      ['x', 'o', 'x', 'o', ' ', ' ', ' ', ' ', ' '],
      'then',
      [4,
       [['if',
         ['x', 'o', 'x', 'o', 'x', 'o', ' ', ' ', ' '],
         'then',
         [6, 'win']],
        ['if',
         ['x', 'o', 'x', 'o', 'x', ' ', 'o', ' ', ' '],
         'then',
         [5,
          [['if',
            ['x', 'o', 'x', 'o', 'x', 'x', 'o', 'o', ' '],
            'then',
            [8, 'win']],
           ['if',
            ['x', 'o', 'x', 'o', 'x', 'x', 'o', ' ', 'o'],
            'then',
            [7, 'draw']]]]],
        ['if',
         ['x', 'o', 'x', 'o', 'x', ' ', ' ', 'o', ' '],
         'then',
         [5,
          [['if',
            ['x', 'o', 'x', 'o', 'x', 'x', 'o', 'o', ' '],
            'then',
            [8, 'win']],
           ['if',
            ['x', 'o', 'x', 'o', 'x', 'x', ' ', 'o', 'o'],
            'then',
            [6, 'win']]]]],
        ['if',
        


Win only:


None