forked from LoisLab/qbot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example-physical.py
32 lines (27 loc) · 1.45 KB
/
example-physical.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import numpy as np
from rlbotenv import *
from qhwbot_28byj import *
################################################################################
# #
# Basic Q-Learning: Physical Robot #
# #
################################################################################
e = RlBotEnv(QHwBot_28byj(3,12))
# create the q-table
q = np.random.rand(e.bot.observation_space(), e.bot.action_space())
# try changing these hyper-parameters...
explore = 0.1 # exploration rate (odds of taking a random action)
alpha = 0.1 # learning rate (proportional weight of new v. old information)
gamma = 0.9 # discount rate (relative value of future v. current reward)
for n in range(10):
state = e.reset()
done = False
while not done:
if np.random.random() < explore: # explore the state-action space
action = e.bot.sample() # ...random action
else: # exploit the info in the q-table
action = np.argmax(q[state]) # ...best known action
next_state, reward, done = e.step(action)
# update the q-table (see https://en.wikipedia.org/wiki/Q-learning)
q[state][action] = (1-alpha) * q[state][action] + alpha * (reward + gamma * np.max(q[next_state]))
state = next_state