-
Notifications
You must be signed in to change notification settings - Fork 0
/
env_v4.py
237 lines (187 loc) · 9.53 KB
/
env_v4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import gym
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle, Rectangle
from matplotlib.collections import LineCollection
class MobileRobotEnv(gym.Env):
def __init__(self, num_obs=6,maxtime=1000):
super().__init__()
self.robot_size = 0.3
self.robot_mass = 0.3
self.max_speed = 5
self.max_acc = 2
self.time_step = 0.05
self.target_pos = np.float32([17,10])
self.max_distance = 20
self.num_obs = num_obs
self.detection_range = 5
self.maxtime=maxtime
self.action_space = gym.spaces.Box(
low=0, high=10, shape=(2 * (self.num_obs + 1),), dtype=np.float32
)
self.observation_space = gym.spaces.Box(
low=0, high=self.max_distance, shape=(2 * (self.num_obs + 1),), dtype=np.float32
)
self.obstacle_damping_coef = np.zeros(num_obs)
self.obstacle_spring_coef = np.zeros(num_obs)
self.target_damping_coef = 0
self.target_spring_coef = 0
self.obs_force = 0
self.count_in_area = 0
self.desired_positions = np.zeros((self.num_obs, 2))
self.reset()
def reset(self):
self.robot_pos = np.random.uniform(low=self.robot_size, high=self.max_distance - self.robot_size, size=2)
self.obstacle_pos = np.random.uniform(low=self.robot_size,
high=self.max_distance - self.robot_size,
size=(self.num_obs, 2),
)
self.obstacle_vel = np.random.normal(loc=0.0, scale=1.0, size=(10, 2))
self.robot_vel = np.zeros(2)
self.t = 0
self.target_area = 4*self.robot_size
return self._get_observation()
def _get_observation(self):
self.target_dist = self.target_pos - self.robot_pos
obs_pos = self.obstacle_pos - self.robot_pos
dists = np.linalg.norm(obs_pos, axis=1)
self.count_in_area = np.count_nonzero(dists < self.detection_range)
sorted_indices = np.argsort(dists)
self.closest_indices = sorted_indices[:self.count_in_area]
observation = np.zeros((2 * (self.num_obs + 1),))
observation[-2:] = self.target_dist
for i in range(self.count_in_area):
observation[2 * i: 2 * (i + 1)] = obs_pos[self.closest_indices[i]]
for i in range(self.count_in_area, self.num_obs):
observation[2 * i: 2 * (i + 1)] = [0, 0]
return observation, self.count_in_area
def step(self, action):
self.target_damping_coef, self.target_spring_coef = (
action[-2],
action[-1],
)
obs_pos = self.obstacle_pos - self.robot_pos
dists = np.linalg.norm(obs_pos, axis=1)
sorted_indices = np.argsort(dists)
self.closest_indices = sorted_indices[:self.count_in_area]
for i in range(self.count_in_area):
idx = self.closest_indices[i]
self.obstacle_damping_coef[idx] = action[2 * i]
self.obstacle_spring_coef[idx] = action[2 * i + 1]
target_force = (
self.target_spring_coef * (self.target_pos - self.robot_pos)
+ self.target_damping_coef * (0 - self.robot_vel)
)
self.obstacle_vel = 2*np.random.uniform(low=-2, high=2, size=(self.num_obs, 2))
self.target_vel = 2*np.random.uniform(low=-2, high=2, size=(2))
obstacle_force = np.zeros(2)
self.desired_positions = np.zeros((self.num_obs, 2))
for i, (pos, vel, spring_coef, damping_coef) in enumerate(
zip(self.obstacle_pos, self.obstacle_vel, self.obstacle_spring_coef, self.obstacle_damping_coef)
):
obs_dist = self.robot_pos - pos
dist_norm = np.linalg.norm(obs_dist)
if dist_norm < self.detection_range:
desired_pos = pos + self.detection_range * obs_dist / dist_norm
desired_vel = np.zeros_like(vel)
self.obs_force = (
spring_coef * (desired_pos - self.robot_pos) + damping_coef * (desired_vel - self.robot_vel)
)
obstacle_force += self.obs_force
self.desired_positions[i] = desired_pos
total_force = target_force + obstacle_force
# total_force = obstacle_force
acc = np.clip(total_force / self.robot_mass, -self.max_acc, self.max_acc)
self.robot_vel += acc * self.time_step
self.robot_vel = np.clip(self.robot_vel, -self.max_speed, self.max_speed)
self.robot_pos += self.robot_vel * self.time_step
self.robot_pos = np.clip(self.robot_pos, 0, self.max_distance - self.robot_size)
self.obstacle_pos += self.obstacle_vel * self.time_step
self.obstacle_pos = np.clip(self.obstacle_pos, 0, self.max_distance - self.robot_size)
# self.target_pos += self.target_vel * self.time_step
self.target_pos = np.float32([10 + 7*np.cos(2*np.pi*self.t/300), 10 + 7*np.sin(2*np.pi*self.t/300)])
done = False
reward = 0
success=False
if np.linalg.norm(self.target_pos - self.robot_pos) < self.target_area:
done = True
success=True
reward = 40
elif np.linalg.norm(self.target_pos - self.robot_pos) < self.detection_range:
reward = 10-np.linalg.norm(self.target_pos - self.robot_pos)
elif any(np.linalg.norm(self.robot_pos - pos) < self.robot_size for pos in self.obstacle_pos):
done = True
reward = -100
success=False
elif self.t > self.maxtime:
done = True
reward = 10
success=False
else:
reward = -np.linalg.norm(self.target_pos - self.robot_pos)
self.t += 1
return self._get_observation(), reward, done, {}, self.count_in_area , success
def render(self):
if not hasattr(self, 'fig') or self.fig.canvas.manager.window is None:
self.fig, self.ax = plt.subplots(figsize=(15, 15))
plt.ion()
self.ax.clear()
self.ax.set_xlim(0-3, self.max_distance+3)
self.ax.set_ylim(0-3, self.max_distance+3)
self.ax.set_aspect('equal')
# Draw the robot
robot = Circle(self.robot_pos, self.robot_size, color='b', label="Robot")
self.ax.add_patch(robot)
# Draw the target
self.ax.scatter(self.target_pos[0], self.target_pos[1], c='red', marker='x', s=100, label='Target')
# Draw the obstacles
for pos, vel, desired_pos in zip(self.obstacle_pos, self.obstacle_vel, self.desired_positions):
self.ax.scatter(pos[0], pos[1], c='black', marker='s', s=100, label='Obstacle')
if np.linalg.norm(self.robot_pos - pos) < self.detection_range:
self.ax.plot([pos[0], desired_pos[0]], [pos[1], desired_pos[1]], color='y', linestyle='--')
self.ax.scatter(desired_pos[0], desired_pos[1], c='r', marker='o', s=30, label='Push_des')
# # Draw the fixed, large rectangular obstacle
# rect_obstacle = Rectangle((5, 5), 2, 5, color='m', label="Fixed Obstacle")
# self.ax.add_patch(rect_obstacle)
# Draw the legend
handles, labels = self.ax.get_legend_handles_labels()
by_label = dict(zip(labels, handles))
self.ax.legend(by_label.values(), by_label.keys(), loc='upper right')
# Draw detection range circle
detection_circle = Circle(self.robot_pos, self.detection_range, fill=False, linestyle='dashed', color='gray')
target_area_circle = Circle(self.target_pos, self.target_area, fill=False, linestyle='dashed', color='gray')
self.ax.add_patch(detection_circle)
self.ax.add_patch(target_area_circle)
self.ax.quiver(
self.robot_pos[0],
self.robot_pos[1],
self.robot_vel[0],
self.robot_vel[1],
angles="xy",
scale_units="xy",
scale=1,
color="blue",
)
# Draw lines between robot, obstacles, and target with colors representing force strength
lines = []
colors = []
for idx, (pos, vel) in enumerate(zip(self.obstacle_pos, self.obstacle_vel)):
obs_dist = pos - self.robot_pos
if np.linalg.norm(obs_dist) < self.detection_range:
# Get the order of the current obstacle in closest_indices
if idx in self.closest_indices:
obstacle_order = np.where(self.closest_indices == idx)[0][0] + 1
self.ax.text(pos[0] + 0.5, pos[1], f"{obstacle_order}\nS: {self.obstacle_spring_coef[idx]:.2f}\nD: {self.obstacle_damping_coef[idx]:.2f}", fontsize=12, color='black')
lines.append([self.robot_pos, pos])
colors.append(self.obs_force)
else:
self.ax.text(pos[0] + 0.5, pos[1], f"Out", fontsize=12, color='black')
lines.append([self.robot_pos, self.target_pos])
line_collection = LineCollection(lines, cmap='twilight_shifted_r', linewidths=1)
self.ax.add_collection(line_collection)
self.ax.text(self.target_pos[0] + 0.5, self.target_pos[1], f"D: {self.target_damping_coef:.2f}\nS: {self.target_spring_coef:.2f}", fontsize=12, color='red')
self.fig.canvas.draw()
self.fig.canvas.flush_events()
def close(self):
if hasattr(self, 'fig'):
plt.close(self.fig)