-
Notifications
You must be signed in to change notification settings - Fork 8.6k
/
continuous_mountain_car.py
149 lines (117 loc) · 5.15 KB
/
continuous_mountain_car.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# -*- coding: utf-8 -*-
"""
@author: Olivier Sigaud
A merge between two sources:
* Adaptation of the MountainCar Environment from the "FAReinforcement" library
of Jose Antonio Martin H. (version 1.0), adapted by 'Tom Schaul, tom@idsia.ch'
and then modified by Arnaud de Broissia
* the OpenAI/gym MountainCar environment
itself from
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
permalink: https://perma.cc/6Z2N-PFWC
"""
import math
import numpy as np
import gym
from gym import spaces
from gym.utils import seeding
class Continuous_MountainCarEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}
def __init__(self, goal_velocity = 0):
self.min_action = -1.0
self.max_action = 1.0
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
self.goal_velocity = goal_velocity
self.power = 0.0015
self.low_state = np.array([self.min_position, -self.max_speed])
self.high_state = np.array([self.max_position, self.max_speed])
self.viewer = None
self.action_space = spaces.Box(low=self.min_action, high=self.max_action,
shape=(1,), dtype=np.float32)
self.observation_space = spaces.Box(low=self.low_state, high=self.high_state,
dtype=np.float32)
self.seed()
self.reset()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self, action):
position = self.state[0]
velocity = self.state[1]
force = min(max(action[0], -1.0), 1.0)
velocity += force*self.power -0.0025 * math.cos(3*position)
if (velocity > self.max_speed): velocity = self.max_speed
if (velocity < -self.max_speed): velocity = -self.max_speed
position += velocity
if (position > self.max_position): position = self.max_position
if (position < self.min_position): position = self.min_position
if (position==self.min_position and velocity<0): velocity = 0
done = bool(position >= self.goal_position and velocity >= self.goal_velocity)
reward = 0
if done:
reward = 100.0
reward-= math.pow(action[0],2)*0.1
self.state = np.array([position, velocity])
return self.state, reward, done, {}
def reset(self):
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
return np.array(self.state)
# def get_state(self):
# return self.state
def _height(self, xs):
return np.sin(3 * xs)*.45+.55
def render(self, mode='human'):
screen_width = 600
screen_height = 400
world_width = self.max_position - self.min_position
scale = screen_width/world_width
carwidth=40
carheight=20
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(screen_width, screen_height)
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
xys = list(zip((xs-self.min_position)*scale, ys*scale))
self.track = rendering.make_polyline(xys)
self.track.set_linewidth(4)
self.viewer.add_geom(self.track)
clearance = 10
l,r,t,b = -carwidth/2, carwidth/2, carheight, 0
car = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])
car.add_attr(rendering.Transform(translation=(0, clearance)))
self.cartrans = rendering.Transform()
car.add_attr(self.cartrans)
self.viewer.add_geom(car)
frontwheel = rendering.make_circle(carheight/2.5)
frontwheel.set_color(.5, .5, .5)
frontwheel.add_attr(rendering.Transform(translation=(carwidth/4,clearance)))
frontwheel.add_attr(self.cartrans)
self.viewer.add_geom(frontwheel)
backwheel = rendering.make_circle(carheight/2.5)
backwheel.add_attr(rendering.Transform(translation=(-carwidth/4,clearance)))
backwheel.add_attr(self.cartrans)
backwheel.set_color(.5, .5, .5)
self.viewer.add_geom(backwheel)
flagx = (self.goal_position-self.min_position)*scale
flagy1 = self._height(self.goal_position)*scale
flagy2 = flagy1 + 50
flagpole = rendering.Line((flagx, flagy1), (flagx, flagy2))
self.viewer.add_geom(flagpole)
flag = rendering.FilledPolygon([(flagx, flagy2), (flagx, flagy2-10), (flagx+25, flagy2-5)])
flag.set_color(.8,.8,0)
self.viewer.add_geom(flag)
pos = self.state[0]
self.cartrans.set_translation((pos-self.min_position)*scale, self._height(pos)*scale)
self.cartrans.set_rotation(math.cos(3 * pos))
return self.viewer.render(return_rgb_array = mode=='rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None