In [1]:
import numpy as np
from goldbox_detector_environment.goldbox_detector import GoldboxDetector

In [2]:
env = GoldboxDetector()

In [3]:
action_space_size = env.action_space.n
state_space_size = env.observation_space.n

q_table = np.zeros((state_space_size, action_space_size))

In [4]:
num_episodes = 1000
max_steps_per_episode = 25

learning_rate = 0.1
discount_rate = 0.99

exploration_rate = 1.0
max_exploration_rate = 1.0
min_exploration_rate = 0.1
exploration_decay_rate = 0.001

In [5]:
rewards_all_episodes = []

for episode in range(num_episodes):
    state = env.reset()

    rewards_current_episode = 0

    for step in range(max_steps_per_episode):
        exploration_rate_threshold = np.random.uniform(0, 1)
        if exploration_rate_threshold > exploration_rate:
            action = np.argmax(q_table[state, :])
        else:
            action = env.action_space.sample()

        new_state, reward, done = env.step(action)

        q_table[state, action] = q_table[state, action] * (1 - learning_rate) + learning_rate * (reward + discount_rate * np.max(q_table[new_state, :]))

        state = new_state
        rewards_current_episode += reward

        if done:
            break

    exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)

    rewards_all_episodes.append(rewards_current_episode)

Goal Reached!!!, total battery points is:  18977
Goal Reached!!!, total battery points is:  19076
Goal Reached!!!, total battery points is:  19279
Goal Reached!!!, total battery points is:  19282
Goal Reached!!!, total battery points is:  18979
Goal Reached!!!, total battery points is:  19282
Goal Reached!!!, total battery points is:  18478
Goal Reached!!!, total battery points is:  19176
Goal Reached!!!, total battery points is:  18575
Goal Reached!!!, total battery points is:  19282
Goal Reached!!!, total battery points is:  18881
Goal Reached!!!, total battery points is:  19175
Goal Reached!!!, total battery points is:  19176
Goal Reached!!!, total battery points is:  19585
Goal Reached!!!, total battery points is:  19282
Goal Reached!!!, total battery points is:  19277
Goal Reached!!!, total battery points is:  19376
Goal Reached!!!, total battery points is:  19583
Goal Reached!!!, total battery points is:  18875
Goal Reached!!!, total battery points is:  19676
Goal Reached!!!, tot

In [6]:
rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes/1000)
count = 1000
print("~~~~~~~Average Rewards Per Thousand Episodes~~~~~~")
for r in rewards_per_thousand_episodes:
    print(f"{count: <5}: {np.sum(r/1000)}")
    count += 1000

print()
print("~~~~~~~~~~~~~~~~~~~~~~Q-Table~~~~~~~~~~~~~~~~~~~~~~")
print(q_table)

~~~~~~~Average Rewards Per Thousand Episodes~~~~~~
1000 : 1027.23

~~~~~~~~~~~~~~~~~~~~~~Q-Table~~~~~~~~~~~~~~~~~~~~~~
[[-3.88638100e-01 -2.99700100e-01 -2.99700100e-01 -5.92284456e-01]
 [-5.62474323e-01 -6.33504926e-01 -6.15152980e-01 -6.05548704e-01]
 [-1.08156564e+00 -9.41295303e-01 -8.78427244e-01 -3.49548555e+01]
 [-5.77601391e+01 -7.56270279e+01 -1.10860422e+00 -8.99533858e+01]
 [-9.47689278e+01 -1.29036604e+02 -9.16849964e+01  6.85220437e+02]
 [ 1.12075023e+02  2.92320322e+02 -1.15169128e+02  6.82223875e+03]
 [ 5.10966499e+03  6.96700013e+03  3.91114624e+03  8.59210446e+03]
 [ 8.75625914e+03  8.60585190e+03  8.18617227e+03  8.46718065e+03]
 [ 8.43043117e+03  8.54776838e+03  8.66748120e+03  8.40182949e+03]
 [ 7.77891617e+03  8.45674694e+03  8.57938012e+03  8.45078850e+03]
 [-3.79639000e-01 -3.91381400e-01 -3.98518310e-01 -3.19178251e-01]
 [-1.91900000e+01 -5.21969938e-01 -5.09100490e-01 -4.82065870e-01]
 [-4.74661174e+01 -9.78229189e-01 -8.02054991e-01 -3.48806466e+01]
 [-1.03727

In [7]:
from PIL import Image
canvas = Image.open("images/goldbox_detector_environment.png")
agent = Image.open("images/agent.png")
agent.putalpha(255)

In [8]:
locations = {(x, y): (100 * (y + 1) + 100 * y, (100 * (x + 1) + 100 * x))
             for y in range(10) for x in range(10)}

In [9]:
images = []

agentX = 9
agentY = 0

b = canvas.copy()
b.paste(agent, locations[(agentX, agentY)], agent)
images.append(b)

state = env.reset()

rewards_current_episode = 0

for step in range(max_steps_per_episode):
    action = np.argmax(q_table[state, :])

    if action == 0:
        agentY = min(9, agentY + 1)
    elif action == 1:
        agentY = max(0, agentY - 1)
    elif action == 2:
        agentX = max(0, agentX - 1)
    else:
        agentX = min(9, agentX + 1)

    
    b = canvas.copy()
    b.paste(agent, locations[(agentX, agentY)], agent)
    images.append(b)


    #print("Qtable",q_table[state, :])
    print("Coordinate ",(agentX, agentY))

    new_state, reward, done = env.step(action)

    state = new_state
    rewards_current_episode += reward

    if done:
        break

#print(f"Reward after : {rewards_current_episode}")

images[0].save('images/detected_goldbox.gif',
               save_all=True,
               append_images=images[1:],
               duration=500,
               loop=0)

Coordinate  (8, 0)
Coordinate  (7, 0)
Coordinate  (7, 1)
Coordinate  (7, 2)
Coordinate  (7, 3)
Coordinate  (7, 4)
Coordinate  (8, 4)
Coordinate  (9, 4)
Coordinate  (9, 5)
Coordinate  (9, 6)
Coordinate  (9, 7)
Coordinate  (9, 8)
Coordinate  (9, 9)
Goal Reached!!!, total battery points is:  19687


<img src="images/detected_goldbox.gif" width="50%" />