-
Notifications
You must be signed in to change notification settings - Fork 0
/
openai-car.py
61 lines (41 loc) · 1.42 KB
/
openai-car.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gym
import matplotlib.pyplot as plt
import time
env = gym.make('MountainCar-v0')
# # Observation and action space
# obs_space = env.observation_space
# action_space = env.action_space
# print("The observation space: {}".format(obs_space))
# print("The action space: {}".format(action_space))
# # reset the environment and see the initial observation
# obs = env.reset()
# print("The initial observation is {}".format(obs))
# # Sample a random action from the entire action space
# random_action = env.action_space.sample()
# # # Take the action and get the new observation space
# new_obs, reward, done, info = env.step(random_action)
# print("The new observation is {}".format(new_obs))
# #render ss
# env_screen = env.render(mode="rgb_array")
# env.close()
# plt.imshow(env_screen)
# plt.show()
# Number of steps you run the agent for
num_steps = 1500
obs = env.reset()
for step in range(num_steps):
# take random action, but you can also do something more intelligent
# action = my_intelligent_agent_fn(obs)
action = env.action_space.sample()
# apply the action
obs, reward, done, info = env.step(action)
# Render the env
env.render()
# Wait a bit before the next frame unless you want to see a crazy fast video
time.sleep(0.001)
# If the epsiode is up, then start another one
if done:
env.reset()
# Close the env
env.close()
type(env.observation_space)