# Q_Learning for 3x3 Floppy Cube

In [1]:
from gui.ai_modules.ai_puzzle_class import puzzle_ai

In [2]:
actions = {"f" : [[20, 18], [4, 7], [16, 2], [27, 23], [17, 3]],
           "r" : [[2, 9], [7, 0], [18, 26], [1, 8], [23, 21]],
           "b" : [[26, 24], [21, 29], [9, 6], [0, 12], [11, 10]],
           "l" : [[16, 6], [12, 4], [15, 5], [27, 29], [20, 24]]}
solved_state = [0,0,0,0,0,0,0,1,1,1,0,1,1,0,1,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5]
reward_dict={"solved":10,
            "timeout":-1,
            "move":-0.2}

floppy_ai = puzzle_ai(actions, solved_state, reward_dict, name="floppy_cube")

%time games = floppy_ai.train_q_learning(\
                learning_rate=0.005,\
                discount_factor=0.999,\
                base_exploration_rate=0.6,\
                max_moves=100,\
                num_episodes=1000,\
                reward_dict=reward_dict)

Q_table = floppy_ai.Q_table

final exploration rate: 0.299999999999996
CPU times: user 288 ms, sys: 4.07 ms, total: 292 ms
Wall time: 289 ms


## Some simple tests:

In [3]:
from copy import deepcopy
from gui.ai_modules.twisty_puzzle_model import scramble, perform_action

def print_scramble(scramble):
    print_str = ""
    for move in scramble:
        print_str += move + " "
    print(print_str[:-1])

def test_ai(max_moves=30):
    state = deepcopy(solved_state)
    test_scramble = scramble(state, actions, max_moves=30)
    print("scramble:")
    print_scramble(test_scramble)
    n = 0
    action_hist = ""
    while n < 500:
        action = floppy_ai.choose_Q_action(tuple(state), exploration_rate=0)
        # print(f"action {action} had value {Q_table[(tuple(state), action)]}" )
        perform_action(state, actions[action])
        n+=1
        action_hist += action + " "
        status = floppy_ai.puzzle_solved(state, n, 100)
        if status == "timeout":
            print("timeout")
            break
        if status == "solved":
            print("solved")
            break
    action_hist = action_hist[:-1]
    print("solution:")
    print(action_hist)

In [4]:
test_ai()

scramble:
r l f b r r l l b f b b r l b b f l l r r f r f b r b b r b
solved
solution:
f r


In [5]:
print(len(floppy_ai.Q_table))

768


### import not yet working for twisty puzzles

In [6]:
def export_Q_table(Q_table, filename="Q_table.txt"):
        """
        write the given Q-table into a file
        """
        with open(filename, "w") as file:
            file.write("{\n")
            for key, value in Q_table.items():
                file.write(str(key) + ":" + str(value) + ",\n")
            file.write("}")

def import_Q_table(filename="Q_table.txt"):
    """
    import Q_table as dictionary:
    keys are state-action pairs as a tuple of a tuple (9 integers: 0/1/2) and an integer (0-8)
    values are the corresponding Q-values

    Example:
        Q_table[((0,0,0,0,1,0,0,0,0),2)] -> 0.3
    """
    Q_table = dict()
    with open(filename, "r") as file:
        for line in file.readlines():
            if line == "Q_table = {\n" or line == "}":
                continue
            state_action, value = line[:-2].split(":")
            state = tuple([int(x.strip(" ")) for x in state_action[2:-5].split(",")])
            action = int(state_action[-2])
            Q_table[(state, action)] = float(value)
    return Q_table

In [7]:
export_Q_table(Q_table, filename="floppy_q_table.txt")