In [40]:
import numpy as np
from goldbox_detector_environment.goldbox_detector import GoldboxDetector

In [41]:
env = GoldboxDetector()

In [42]:
action_space_size = env.action_space.n
state_space_size = env.observation_space.n

q_table = np.zeros((state_space_size, action_space_size))

In [43]:
num_episodes = 1000
max_steps_per_episode = 25

learning_rate = 0.1
discount_rate = 0.99

exploration_rate = 1.0
max_exploration_rate = 1.0
min_exploration_rate = 0.1
exploration_decay_rate = 0.001

In [44]:
rewards_all_episodes = []

for episode in range(num_episodes):
    state = env.reset()

    rewards_current_episode = 0

    for step in range(max_steps_per_episode):
        exploration_rate_threshold = np.random.uniform(0, 1)
        if exploration_rate_threshold > exploration_rate:
            action = np.argmax(q_table[state, :])
        else:
            action = env.action_space.sample()

        new_state, reward, done = env.step(action)

        q_table[state, action] = q_table[state, action] * (1 - learning_rate) + learning_rate * (reward + discount_rate * np.max(q_table[new_state, :]))

        state = new_state
        rewards_current_episode += reward

        if done:
            break

    exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)

    rewards_all_episodes.append(rewards_current_episode)

Goal Reached!!!, total battery points is:  19077
Goal Reached!!!, total battery points is:  18877
Goal Reached!!!, total battery points is:  18976
Goal Reached!!!, total battery points is:  18986
Goal Reached!!!, total battery points is:  19776
Goal Reached!!!, total battery points is:  19878
Goal Reached!!!, total battery points is:  19878
Goal Reached!!!, total battery points is:  19877
Goal Reached!!!, total battery points is:  19981
Goal Reached!!!, total battery points is:  19581
Goal Reached!!!, total battery points is:  19476
Goal Reached!!!, total battery points is:  19879
Goal Reached!!!, total battery points is:  19485
Goal Reached!!!, total battery points is:  19776
Goal Reached!!!, total battery points is:  19476
Goal Reached!!!, total battery points is:  19880
Goal Reached!!!, total battery points is:  19776
Goal Reached!!!, total battery points is:  19776
Goal Reached!!!, total battery points is:  19878
Goal Reached!!!, total battery points is:  19878
Goal Reached!!!, tot

In [45]:
rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes/1000)
count = 1000
print("~~~~~~~Average Rewards Per Thousand Episodes~~~~~~")
for r in rewards_per_thousand_episodes:
    print(f"{count: <5}: {np.sum(r/1000)}")
    count += 1000

print()
print("~~~~~~~~~~~~~~~~~~~~~~Q-Table~~~~~~~~~~~~~~~~~~~~~~")
print(q_table)

~~~~~~~Average Rewards Per Thousand Episodes~~~~~~
1000 : 313.4789999999999

~~~~~~~~~~~~~~~~~~~~~~Q-Table~~~~~~~~~~~~~~~~~~~~~~
[[-3.72848775e+00 -3.56399570e+00 -3.54243566e+00 -3.70843396e+00]
 [-4.05433364e+00 -4.06789299e+00 -4.03908545e+00 -4.20039104e+00]
 [-4.62607182e+00 -4.57740199e+00 -4.52933063e+00 -4.88450072e+00]
 [-5.51172637e+00 -5.51485777e+00 -5.18351994e+00 -5.37750356e+00]
 [-5.98895855e+00 -5.90405178e+00 -5.88964348e+00 -6.39940541e+00]
 [ 3.49166631e+01 -6.61426813e+00 -6.58550085e+00 -7.23535920e+00]
 [ 9.57588380e+02  2.94866117e+02 -7.13352308e+00  4.08701555e+03]
 [ 6.62197272e+03  4.10426819e+03  2.24869259e+03  3.65247602e+03]
 [ 2.20763750e+03  2.67469280e+03  6.25122319e+03  2.47607184e+03]
 [ 1.20955173e+03  2.91133275e+03  5.72036365e+03  3.36180921e+03]
 [-3.35700763e+00 -3.76243661e+00 -3.63756586e+00 -3.70792907e+00]
 [-9.41733470e+01 -4.00710902e+00 -4.00831213e+00 -4.21209624e+00]
 [-1.00490035e+02 -4.70332846e+00 -4.68952612e+00 -4.70027861e+00]


In [46]:
from PIL import Image
canvas = Image.open("images/goldbox_detector_environment.png")
agent = Image.open("images/agent.png")
agent.putalpha(255)

In [47]:
locations = {(x, y): (100 * (y + 1) + 100 * y, (100 * (x + 1) + 100 * x))
             for y in range(10) for x in range(10)}

In [48]:
images = []

agentX = 9
agentY = 0

b = canvas.copy()
b.paste(agent, locations[(agentX, agentY)], agent)
images.append(b)

state = env.reset()

rewards_current_episode = 0

for step in range(max_steps_per_episode):
    action = np.argmax(q_table[state, :])

    if action == 0:
        agentY = min(9, agentY + 1)
    elif action == 1:
        agentY = max(0, agentY - 1)
    elif action == 2:
        agentX = max(0, agentX - 1)
    else:
        agentX = min(9, agentX + 1)

    
    b = canvas.copy()
    b.paste(agent, locations[(agentX, agentY)], agent)
    images.append(b)


    #print("Qtable",q_table[state, :])
    print("Coordinate ",(agentX, agentY))

    new_state, reward, done = env.step(action)

    state = new_state
    rewards_current_episode += reward

    if done:
        break

#print(f"Reward after : {rewards_current_episode}")

images[0].save('images/detected_goldbox.gif',
               save_all=True,
               append_images=images[1:],
               duration=500,
               loop=0)

Coordinate  (8, 0)
Coordinate  (7, 0)
Coordinate  (7, 1)
Coordinate  (7, 2)
Coordinate  (7, 3)
Coordinate  (7, 4)
Coordinate  (7, 5)
Coordinate  (6, 5)
Coordinate  (6, 6)
Coordinate  (6, 7)
Coordinate  (6, 8)
Coordinate  (6, 9)
Coordinate  (7, 9)
Coordinate  (8, 9)
Coordinate  (9, 9)
Goal Reached!!!, total battery points is:  19985


<img src="images/detected_goldbox.gif" width="50%" />