# TIC-TAC-TOE Dynamic Programming 

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%run tic_tac_toe_aux_funcs.py
%run ../libs/dynamic_programming.py

In [2]:
# Reload lookup tables, P and R matrices.
id_state_lkt = np.load("ttt_id2s.dat", allow_pickle=True)
state_id_lkt = np.load("ttt_s2id.dat", allow_pickle=True)
P = np.load("ttt_P.dat", allow_pickle=True)
R = np.load("ttt_R.dat", allow_pickle=True)
# Number of states and actions.
S = P.shape[0]
A = R.shape[1]
# Model discount factor.
gamma = 1.0

# Initial values for policies.
# Note that actions MUST BE FEASIBLE even at the beginning.
p_init = np.random.randint(A, size=S)
for s in range(S - 3):
    info = state_id_lkt[s]
    p_init[s] = np.random.choice(get_actions(info[2]))

# Initial values for value function.
v_init = np.random.normal(size=S)
v_init[-3:] = 0.0  # These are terminal states.

## Policy Iteration

In [3]:
p_star_pi, v_star_pi = policy_iteration(P, R, gamma, v_init, p_init)
p_star_pi.dump("ttt_p_pi.dat")

## Value Iteration

In [4]:
p_star_vi, v_star_vi = value_iteration(P, R, gamma, v_init)
p_star_vi.dump("ttt_p_vi.dat")

## Tests and debugging

In [5]:
def rand_action(actions_O):
    return np.random.choice(actions_O)

def user_action(actions_O):
    while True:
        a_O = int(input("Enter number from 0 to 8: "))
        if a_O in actions_O:
            break
    return a_O

def play():
    s = 0
    id = state_id_lkt[s, 2]
    print_board(id_to_board(id))
    while True:
        # X's turn
        info = id_state_lkt[id]
        a_X = map_action(p_star_vi[info[4]], info[2], info[3])[0]
        id += 3 ** (8 - a_X)
        print_board(id_to_board(id))
        if id_state_lkt[id, 0] == 1:
            if id_state_lkt[id, 0] == 0:
                print("DRAW")
            elif id_state_lkt[id, 0] == 1:
                print("WIN")
            else:
                print("LOSS, wtf?")
            break
        # O's turn
        actions_O = get_actions(id)
        a_O = user_action(actions_O)
        id += 2 * (3 ** (8 - a_O))
#play()