# TIC-TAC-TOE Markov Decision Process

In [1]:
import numpy as np

## Auxiliary functions to build the model

In [2]:
def board_to_id(B):
    """Takes a board matrix and returns a base-10 board ID."""
    symbols = B.flatten()
    id = 0
    for i in range(9):
        id += int(symbols[i] * (3 ** (8 - i)))
    return id

def board_to_ids(B):
    """Takes a board matrix and returns a list with the base-10 board ID of all its rotations."""
    ids = np.full(4, 0)
    for i in range(4):
        ids[i] = board_to_id(np.rot90(B, i))
    return np.sort(ids)

def id_to_board(id):
    """Takes a base-10 board ID and returns a board matrix."""
    board_str = np.base_repr(id, base=3).zfill(9)
    return np.array(list(board_str), dtype=np.int8).reshape(3, 3)

def find_wins(B):
    """Takes a board matrix and checks if there are 3 equal markers in a row horizontal, vertical or diagonal and the its number."""
    x_wins = 0
    o_wins = 0
    # 
    for row in B:
        if np.all(row == 1):
            x_wins += 1
        if np.all(row == 2):
            o_wins += 1
    # 
    for col in B.T:
        if np.all(col == 1):
            x_wins += 1
        if np.all(col == 2):
            o_wins += 1
    #
    diag = np.diagonal(B)
    if np.all(diag == 1):
        x_wins += 1
    if np.all(diag == 2):
        o_wins += 1
    #
    flip_B = np.fliplr(B)
    anti_diag = np.diagonal(flip_B)
    if np.all(anti_diag == 1):
        x_wins += 1
    if np.all(anti_diag == 2):
        o_wins += 1
    return x_wins, o_wins

def board_info(B):
    """Takes a board matrix and return its information: terminal, valid or invalid board and the winner or the next player."""
    xs = np.count_nonzero(B == 1)
    os = np.count_nonzero(B == 2)
    diff = xs - os
    if diff == 1:
        # Last player to move was X.
        x_wins, o_wins = find_wins(B)
        if o_wins > 0:
            return -1, -1
        if x_wins > 1:
            return -1, -1
        elif x_wins == 1:
            return 1, 1
        else:
            if xs + os == 9:
                return 1, 0
            else:
                return 0, 2
    elif diff == 0:
        # Last player to move was O.
        x_wins, o_wins = find_wins(B)
        if x_wins > 0:
            return -1, -1
        if o_wins > 0:
            return 1, 2
        else:
            return 0, 1
    else:
        return -1, -1

def populate_lk_tables():
    # Number of boards to checks.
    IDs = 3 ** 9
    # Create the lookup tables.
    id_state_lkt = np.zeros((IDs, 3), dtype=np.int32)
    state_id_lkt = np.array([[0, 0, 0]], dtype=np.int32)
    # Loop for each board.
    state = 0
    for id in range(IDs):
        # Convert id to board.
        B = id_to_board(id)
        # Compute board rotations.
        ids = board_to_ids(B)
        if ids[0] < id:
            continue
        # Get board info.
        info = board_info(B)
        # Check if the board is valid.
        if info[0] == -1:
            # Populate id to state lookup table.
            for id in ids:
                id_state_lkt[id] = [-1, -1, -1]
        else:
            # Populate id to state lookup table.
            for id in ids:
                id_state_lkt[id] = [info[0], info[1], state]
            # Populate state to id lookup table.
            state_id_lkt = np.append(state_id_lkt, [[info[0], info[1], ids[0]]], axis=0)
            # Increase state counter.
            state += 1
            
    # Remove first dummy row.
    state_id_lkt = np.delete(state_id_lkt, 0, axis=0)
    return id_state_lkt, state_id_lkt[1:, :]


In [3]:
B = np.array([[1, 2, 1],[1, 1, 2],[2, 1, 2]])
print(B)
board_info(B)

[[1 2 1]
 [1 1 2]
 [2 1 2]]


(1, 0)

In [4]:
id_state_lkt, state_id_lkt = populate_lk_tables()

In [14]:
for row in state_id_lkt:
    if row[0] == 1:
        print(id_to_board(row[2])) 

[[0 0 0]
 [0 2 2]
 [1 1 1]]
[[0 0 0]
 [1 1 1]
 [0 2 2]]
[[0 0 0]
 [1 1 1]
 [2 0 2]]
[[0 0 0]
 [1 1 1]
 [2 2 0]]
[[0 0 0]
 [2 0 2]
 [1 1 1]]
[[0 0 0]
 [2 2 0]
 [1 1 1]]
[[0 0 1]
 [0 0 1]
 [2 2 1]]
[[0 0 1]
 [0 1 0]
 [1 2 2]]
[[0 0 1]
 [0 1 1]
 [2 2 2]]
[[0 0 1]
 [0 1 2]
 [1 0 2]]
[[0 0 1]
 [0 1 2]
 [1 2 0]]
[[0 0 1]
 [0 2 1]
 [2 0 1]]
[[0 0 1]
 [1 0 1]
 [2 2 2]]
[[0 0 1]
 [1 1 0]
 [2 2 2]]
[[0 0 1]
 [1 1 2]
 [1 2 2]]
[[0 0 1]
 [1 2 1]
 [2 2 1]]
[[0 0 1]
 [2 0 1]
 [0 2 1]]
[[0 0 1]
 [2 0 1]
 [2 0 1]]
[[0 0 1]
 [2 1 0]
 [1 0 2]]
[[0 0 1]
 [2 1 0]
 [1 2 0]]
[[0 0 1]
 [2 1 1]
 [1 2 2]]
[[0 0 1]
 [2 1 1]
 [2 2 1]]
[[0 0 1]
 [2 1 2]
 [1 0 0]]
[[0 0 1]
 [2 1 2]
 [1 1 2]]
[[0 0 1]
 [2 1 2]
 [1 2 1]]
[[0 0 1]
 [2 2 1]
 [0 0 1]]
[[0 0 1]
 [2 2 1]
 [1 2 1]]
[[0 0 1]
 [2 2 1]
 [2 1 1]]
[[0 0 1]
 [2 2 2]
 [0 1 1]]
[[0 0 1]
 [2 2 2]
 [1 0 1]]
[[0 0 1]
 [2 2 2]
 [1 1 0]]
[[0 0 2]
 [0 0 2]
 [1 1 1]]
[[0 0 2]
 [0 1 2]
 [1 1 2]]
[[0 0 2]
 [0 2 0]
 [1 1 1]]
[[0 0 2]
 [0 2 1]
 [2 1 1]]
[[0 0 2]
 [1 0 2]
 [