In [1]:
import numpy as np
from goldbox_detector_environment.goldbox_detector import GoldboxDetector

In [2]:
env = GoldboxDetector()

In [3]:
action_space_size = env.action_space.n
state_space_size = env.observation_space.n

q_table = np.zeros((state_space_size, action_space_size))

In [4]:
num_episodes = 1000
max_steps_per_episode = 25

learning_rate = 0.1
discount_rate = 0.99

exploration_rate = 1.0
max_exploration_rate = 1.0
min_exploration_rate = 0.1
exploration_decay_rate = 0.001

In [5]:
rewards_all_episodes = []

for episode in range(num_episodes):
    state = env.reset()

    rewards_current_episode = 0

    for step in range(max_steps_per_episode):
        exploration_rate_threshold = np.random.uniform(0, 1)
        if exploration_rate_threshold > exploration_rate:
            action = np.argmax(q_table[state, :])
        else:
            action = env.action_space.sample()

        new_state, reward, done = env.step(action)

        q_table[state, action] = q_table[state, action] * (1 - learning_rate) + learning_rate * (reward + discount_rate * np.max(q_table[new_state, :]))

        state = new_state
        rewards_current_episode += reward

        if done:
            break

    exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)

    rewards_all_episodes.append(rewards_current_episode)

Goal Reached!!!, total battery points is:  19275
Goal Reached!!!, total battery points is:  18577
Goal Reached!!!, total battery points is:  18475
Goal Reached!!!, total battery points is:  18178
Goal Reached!!!, total battery points is:  19175
Goal Reached!!!, total battery points is:  19178
Goal Reached!!!, total battery points is:  18579
Goal Reached!!!, total battery points is:  19678
Goal Reached!!!, total battery points is:  19280
Goal Reached!!!, total battery points is:  18777
Goal Reached!!!, total battery points is:  18984
Goal Reached!!!, total battery points is:  18476
Goal Reached!!!, total battery points is:  18475
Goal Reached!!!, total battery points is:  19176
Goal Reached!!!, total battery points is:  19382
Goal Reached!!!, total battery points is:  19381
Goal Reached!!!, total battery points is:  19290
Goal Reached!!!, total battery points is:  18677
Goal Reached!!!, total battery points is:  19276
Goal Reached!!!, total battery points is:  18977
Goal Reached!!!, tot

In [6]:
rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes/1000)
count = 1000
print("~~~~~~~Average Rewards Per Thousand Episodes~~~~~~")
for r in rewards_per_thousand_episodes:
    print(f"{count: <5}: {np.sum(r/1000)}")
    count += 1000

print()
print("~~~~~~~~~~~~~~~~~~~~~~Q-Table~~~~~~~~~~~~~~~~~~~~~~")
print(q_table)

~~~~~~~Average Rewards Per Thousand Episodes~~~~~~
1000 : 1508.266

~~~~~~~~~~~~~~~~~~~~~~Q-Table~~~~~~~~~~~~~~~~~~~~~~
[[-1.10000663e+00 -5.88641539e-01 -6.76661146e-01 -8.19291928e-01]
 [-1.26488338e+00 -1.26326090e+00 -1.19923226e+00 -1.73377897e+00]
 [-1.86176004e+00 -2.09365481e+00 -1.84411279e+00 -2.74445557e+00]
 [-2.63022574e+00 -2.82948638e+00 -2.56450651e+00 -3.48167021e+00]
 [ 1.71432109e+01 -3.28857426e+00 -3.25847112e+00  7.08500098e+02]
 [ 1.10470493e+03  1.60797070e+03  1.44542630e+02  6.19252444e+03]
 [ 8.53620345e+03  5.90635877e+03  3.72515796e+03  6.70446383e+03]
 [ 8.75639128e+03  8.63651074e+03  8.19355252e+03  8.55693411e+03]
 [ 8.49247689e+03  8.52556493e+03  8.66769217e+03  8.42778403e+03]
 [ 8.14377969e+03  8.46797318e+03  8.57969132e+03  8.45190560e+03]
 [-9.74104502e-01 -9.10260325e-01 -1.09809121e+00 -1.02917272e+00]
 [-8.44153198e+01 -1.21315439e+00 -1.26182430e+00 -1.23974261e+00]
 [-8.30019262e+01 -1.76156889e+00 -1.65488250e+00 -2.35689126e+00]
 [-1.0004

In [7]:
from PIL import Image
canvas = Image.open("images/goldbox_detector_environment.png")
agent = Image.open("images/agent.png")
agent.putalpha(255)

In [8]:
locations = {(x, y): (100 * (y + 1) + 100 * y, (100 * (x + 1) + 100 * x))
             for y in range(10) for x in range(10)}

In [9]:
images = []

agentX = 9
agentY = 0

b = canvas.copy()
b.paste(agent, locations[(agentX, agentY)], agent)
images.append(b)

state = env.reset()

rewards_current_episode = 0

for step in range(max_steps_per_episode):
    action = np.argmax(q_table[state, :])

    if action == 0:
        agentY = min(9, agentY + 1)
    elif action == 1:
        agentY = max(0, agentY - 1)
    elif action == 2:
        agentX = max(0, agentX - 1)
    else:
        agentX = min(9, agentX + 1)

    
    b = canvas.copy()
    b.paste(agent, locations[(agentX, agentY)], agent)
    images.append(b)


    #print("Qtable",q_table[state, :])
    print("Coordinate ",(agentX, agentY))

    new_state, reward, done = env.step(action)

    state = new_state
    rewards_current_episode += reward

    if done:
        break

#print(f"Reward after : {rewards_current_episode}")

images[0].save('images/detected_goldbox.gif',
               save_all=True,
               append_images=images[1:],
               duration=500,
               loop=0)

Coordinate  (8, 0)
Coordinate  (7, 0)
Coordinate  (7, 1)
Coordinate  (7, 2)
Coordinate  (7, 3)
Coordinate  (7, 4)
Coordinate  (8, 4)
Coordinate  (9, 4)
Coordinate  (9, 5)
Coordinate  (9, 6)
Coordinate  (9, 7)
Coordinate  (9, 8)
Coordinate  (9, 9)
Goal Reached!!!, total battery points is:  19687


<img src="images/detected_goldbox.gif" width="50%" />