This repository has been archived by the owner on Jan 24, 2021. It is now read-only.
/
main.py
63 lines (46 loc) · 1.61 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
"""
Reinforcement learning maze example.
This script is our main script, in which a bike driver tries to arrive at FINAL POINT.
This script is modified from https://morvanzhou.github.io/tutorials/
"""
from env_tk import Maze
from RL_brain import QLearningTable
def update():
for episode in range(10000):
# initial observation
# return state, format "A B C ..."
action_space = env.action_space
observation = env.reset()
while True:
# fresh env, tkinter
env.render()
# RL choose action based on observation
#while True:
action = RL.choose_action(str(observation))
if action == 0:
action_step = 'UP'
elif action == 1:
action_step = 'DOWN'
elif action == 2:
action_step = 'RIGHT'
elif action == 3:
action_step = 'LEFT'
#if action_step in action_space:
# break
# RL take action and get next observation and reward
observation_, reward, done, action_space = env.step(action_step)
# RL learn from this transition
RL.learn(str(observation), action, reward, str(observation_))
# swap observation
observation = observation_
# break while loop when end of this episode
if done:
break
# end of game
print('game over')
env.destroy()
if __name__ == "__main__":
env = Maze()
RL = QLearningTable(actions=list(range(4)))
env.after(50, update)
env.mainloop()