# TIC-TAC-TOE Markov Decision Process

In [5]:
import numpy as np

## Auxiliary functions to build the model

In [6]:
def board_to_id(B):
    """Takes a board matrix and returns a base-10 board ID."""
    symbols = B.flatten()
    id = 0
    for i in range(9):
        id += int(symbols[i] * (3 ** (8 - i)))
    return id

def board_to_ids(B):
    """Takes a board matrix and returns a list with the base-10 board ID of all its rotations."""
    ids = np.empty([0, 3], dtype=np.int32)
    # No flip.
    for i in range(4):
        id = board_to_id(np.rot90(B, i))
        if not id in ids[:, 0]:
            ids = np.append(ids, [[id, 0, i]], axis=0)
    # Flip left-right.
    flip_lr_B = np.fliplr(B)
    for j in range(4):
        id = board_to_id(np.rot90(flip_lr_B, j))
        if not id in ids[:, 0]:
            ids = np.append(ids, [[id, 1, j]], axis=0)
    # Flip up-down.
    #flip_up_B = np.flipud(B)
    #for k in range(4):
    #    id = board_to_id(np.rot90(flip_up_B, k))
    #    if not id in ids[:, 0]:
    #        ids = np.append(ids, [[id, 2, k]], axis=0)
    return ids[ids[:,0].argsort()]

def id_to_board(id):
    """Takes a base-10 board ID and returns a board matrix."""
    board_str = np.base_repr(id, base=3).zfill(9)
    return np.array(list(board_str), dtype=np.int8).reshape(3, 3)

def find_win(B, mark):
    """Takes a board matrix and checks if there are 3 equal mark in a row horizontal, vertical or diagonal and the its number."""
    # Checks the rows.
    for row in B:
        if np.all(row == mark):
            return True
    # Checks the columns.
    for col in B.T:
        if np.all(col == mark):
            return True
    # Checks the diagonal.
    diag = np.diagonal(B)
    if np.all(diag == mark):
        return True
    # Checks the anti-diagonal.
    flip_B = np.fliplr(B)
    anti_diag = np.diagonal(flip_B)
    if np.all(anti_diag == mark):
        return True
    return False

def board_info(B):
    """Takes a board matrix and return its information: terminal, valid or invalid board and the winner or the next player."""
    xs = np.count_nonzero(B == 1)
    os = np.count_nonzero(B == 2)
    diff = xs - os
    if diff == 1:
        # Last player to move was X.
        if find_win(B, 2):
            return -1, -1
        if find_win(B, 1):
            return 1, 1
        else:
            if xs == 5:
                return 1, 0
            else:
                return 0, 2
    elif diff == 0:
        # Last player to move was O.
        if find_win(B, 1):
            return -1, -1
        if find_win(B, 2):
            return 1, 2
        else:
            return 0, 1
    else:
        return -1, -1

def populate_lk_tables():
    # Number of boards to checks.
    IDs = 3 ** 9
    # Create the lookup tables.
    id_state_lkt = np.zeros((IDs, 5), dtype=np.int32)
    state_id_lkt = np.empty([0, 3], dtype=np.int32)
    # Loop for each board.
    state = 0
    for board_id in range(IDs):
        # Convert id to board.
        B = id_to_board(board_id)
        # Compute board rotations.
        ids = board_to_ids(B)
        if ids[0, 0] < board_id:
            continue
        # Get board info.
        info = board_info(B)
        # Check if the board is valid.
        if info[0] == -1:
            # Populate the id_to_state lookup table.
            for id in ids:
                id_state_lkt[id[0]] = [-1, -1, -1, -1, -1]
        else:
            # Populate the id_to_state lookup table.
            for id in ids:
                id_state_lkt[id[0]] = [info[0], info[1], id[1], id[2], state]
            # Populate the state_to_id lookup table.
            state_id_lkt = np.append(state_id_lkt, [[info[0], info[1], ids[0, 0]]], axis=0)
            # Increase state counter.
            state += 1
            
    # Remove first dummy row.
    #state_id_lkt = np.delete(state_id_lkt, 0, axis=0)
    return id_state_lkt, state_id_lkt

def map_action(action, flip, rot):
    """"""
    B = np.zeros((3, 3), dtype=np.int32)
    old_indices = np.unravel_index(action, (3, 3))
    B[old_indices[0], old_indices[1]] = 1
    if flip == 1:
        flipped_B = np.fliplr(B)
    else:
        flipped_B = B
    rotated_B = np.rot90(flipped_B, rot)
    new_action = np.argmax(rotated_B)
    new_indices = np.unravel_index(new_action, (3, 3))
    return new_action, new_indices

In [7]:
id_state_lkt, state_id_lkt = populate_lk_tables()

In [8]:
B = np.array([[1, 1, 1],[0, 2, 0],[2, 0, 0]])
print(B)
print(find_win(B, 1))
print(find_win(B, 2))
print(board_info(B))
print(board_to_ids(B))

[[1 1 1]
 [0 2 0]
 [2 0 0]]
True
False
(1, 1)
[[  937     1     3]
 [ 1633     0     2]
 [ 6977     0     1]
 [ 8433     1     1]
 [ 9641     1     0]
 [ 9657     0     0]
 [13297     1     2]
 [14041     0     3]]
