In [19]:
import numpy as np
from environment.discover_goldbox import DiscoverGoldbox

In [20]:
env = DiscoverGoldbox()

In [21]:
action_space_size = env.action_space.n
state_space_size = env.observation_space.n

q_table = np.zeros((state_space_size, action_space_size))

In [22]:
num_episodes = 1000
max_steps_per_episode = 25

learning_rate = 0.1
discount_rate = 0.99

exploration_rate = 1.0
max_exploration_rate = 1.0
min_exploration_rate = 0.1
exploration_decay_rate = 0.001

In [23]:
rewards_all_episodes = []

for episode in range(num_episodes):
    state = env.reset()

    rewards_current_episode = 0

    for step in range(max_steps_per_episode):
        exploration_rate_threshold = np.random.uniform(0, 1)
        if exploration_rate_threshold > exploration_rate:
            action = np.argmax(q_table[state, :])
        else:
            action = env.action_space.sample()

        new_state, reward, done = env.step(action)

        q_table[state, action] = q_table[state, action] * (1 - learning_rate) + learning_rate * (reward + discount_rate * np.max(q_table[new_state, :]))

        state = new_state
        rewards_current_episode += reward

        if done:
            break

    exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)

    rewards_all_episodes.append(rewards_current_episode)

Goal Reached
Total battery points in Robot:  17975
Goal Reached
Total battery points in Robot:  18376
Goal Reached
Total battery points in Robot:  18986
Goal Reached
Total battery points in Robot:  19377
Goal Reached
Total battery points in Robot:  18378
Goal Reached
Total battery points in Robot:  18677
Goal Reached
Total battery points in Robot:  19281
Goal Reached
Total battery points in Robot:  19480
Goal Reached
Total battery points in Robot:  18877
Goal Reached
Total battery points in Robot:  19276
Goal Reached
Total battery points in Robot:  19284
Goal Reached
Total battery points in Robot:  18885
Goal Reached
Total battery points in Robot:  19379
Goal Reached
Total battery points in Robot:  19585
Goal Reached
Total battery points in Robot:  19176
Goal Reached
Total battery points in Robot:  19275
Goal Reached
Total battery points in Robot:  19277
Goal Reached
Total battery points in Robot:  19188
Goal Reached
Total battery points in Robot:  19179
Goal Reached
Total battery poin

In [24]:
rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes/1000)
count = 1000
print("~~~~~~~Average Rewards Per Thousand Episodes~~~~~~")
for r in rewards_per_thousand_episodes:
    print(f"{count: <5}: {np.sum(r/1000)}")
    count += 1000

print()
print("~~~~~~~~~~~~~~~~~~~~~~Q-Table~~~~~~~~~~~~~~~~~~~~~~")
print(q_table)

~~~~~~~Average Rewards Per Thousand Episodes~~~~~~
1000 : 1274.68

~~~~~~~~~~~~~~~~~~~~~~Q-Table~~~~~~~~~~~~~~~~~~~~~~
[[-2.00920289e+00 -2.01117291e+00 -2.05500055e+00 -1.94791740e+00]
 [-2.28950009e+00 -2.59116147e+00 -2.28498186e+00 -2.98531859e+00]
 [-3.12115699e+00 -2.92521907e+00 -2.85929219e+00 -3.01823866e+00]
 [-3.62925043e+00 -3.43263550e+00 -3.43615147e+00 -4.33925606e+00]
 [-4.24111334e+00  2.63686321e+01 -4.11679308e+00  5.88185812e+02]
 [ 4.97291561e+02  7.17824709e+02  2.98520594e+01  5.91772808e+03]
 [ 6.56504720e+03  6.38058341e+03  2.64321071e+03  8.53364441e+03]
 [ 8.75576857e+03  8.51459642e+03  8.04891982e+03  8.51513362e+03]
 [ 8.39874662e+03  8.46367001e+03  8.66681409e+03  8.43907076e+03]
 [ 7.97197361e+03  8.39110790e+03  8.57873441e+03  8.43279383e+03]
 [-1.75496093e+00 -1.82246685e+00 -1.88243216e+00 -1.88884141e+00]
 [-8.05629332e+01 -2.34905285e+00 -2.29354344e+00 -2.70652213e+00]
 [-9.31086708e+01 -3.03525732e+00 -2.93946734e+00 -3.62820938e+00]
 [-9.96467

In [25]:
from PIL import Image
canvas = Image.open("images/discover_goldbox_environment.png")
agent = Image.open("images/robot.png")
agent.putalpha(255)

In [26]:
locations = {(x, y): (100 * (y + 1) + 100 * y, (100 * (x + 1) + 100 * x))
             for y in range(10) for x in range(10)}

In [27]:
images = []

agentX = 9
agentY = 0

b = canvas.copy()
b.paste(agent, locations[(agentX, agentY)], agent)
images.append(b)

state = env.reset()

rewards_current_episode = 0

for step in range(max_steps_per_episode):
    action = np.argmax(q_table[state, :])

    if action == 0:
        agentY = min(9, agentY + 1)
    elif action == 1:
        agentY = max(0, agentY - 1)
    elif action == 2:
        agentX = max(0, agentX - 1)
    else:
        agentX = min(9, agentX + 1)

    
    b = canvas.copy()
    b.paste(agent, locations[(agentX, agentY)], agent)
    images.append(b)


    print("Qtable",q_table[state, :])
    print("Action ",action," Co-ordinate ",(agentX, agentY))

    new_state, reward, done = env.step(action)

    state = new_state
    rewards_current_episode += reward

    if done:
        break

#print(f"Reward after : {rewards_current_episode}")

images[0].save('images/discoveredgoldbox.gif',
               save_all=True,
               append_images=images[1:],
               duration=500,
               loop=0)

Qtable [7971.97360956 8391.10790199 8578.73441256 8432.79383054]
Action  2  Co-ordinate  (8, 0)
Qtable [8398.7466163  8463.67000717 8666.8140901  8439.0707552 ]
Action  2  Co-ordinate  (7, 0)
Qtable [8755.76856741 8514.59642141 8048.91982042 8515.13361665]
Action  0  Co-ordinate  (7, 1)
Qtable [8845.51804079 8622.68676812 8311.84882437 8445.16873311]
Action  0  Co-ordinate  (7, 2)
Qtable [8936.07076463 8697.27688924 8585.90324841 8325.58518982]
Action  0  Co-ordinate  (7, 3)
Qtable [9027.44112985 8703.64777641 8687.55607178 8543.06413753]
Action  0  Co-ordinate  (7, 4)
Qtable [9119.72946753 8747.50774411 8471.87648229 9020.32135392]
Action  0  Co-ordinate  (7, 5)
Qtable [8303.37791553 8894.70582211 8230.34196059 9212.912221  ]
Action  3  Co-ordinate  (8, 5)
Qtable [8750.48669549 9043.74620774 8915.4276705  9307.01104732]
Action  3  Co-ordinate  (9, 5)
Qtable [9402.03762599 9169.4712398  9161.83788083 9213.28315836]
Action  0  Co-ordinate  (9, 6)
Qtable [9599.02929801 9204.91970738 9139

<img src="images/discoveredgoldbox.gif" width="50%" />