-
Notifications
You must be signed in to change notification settings - Fork 1
/
training_data_gen.py
203 lines (162 loc) · 10.4 KB
/
training_data_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import metaworld
import random
import os
import sys
from metaworld.envs import (ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE,
ALL_V2_ENVIRONMENTS_GOAL_HIDDEN)
from metaworld.policies import *
from tests.metaworld.envs.mujoco.sawyer_xyz.test_scripted_policies import test_cases_latest_nonoise
from metaworld.data.dataset import *
from datetime import datetime
import gym
import argparse
# Suppress float conversion warnings
gym.logger.set_level(40)
play_cases = [
# name, policy
['bin-picking-v2', PlayPolicy(SawyerBinPickingV2Policy)],
['box-close-v2', PlayPolicy(SawyerBoxCloseV2Policy)],
['door-lock-v2', PlayPolicy(SawyerDoorLockV2Policy)],
['door-unlock-v2', PlayPolicy(SawyerDoorUnlockV2Policy)],
['hand-insert-v2', PlayPolicy(SawyerHandInsertV2Policy)]
]
###########################
# Instructions for using different renderers (CPU vs GPU) with mujoco-py: http://vedder.io/misc/mujoco_py.html
###########################
##########################
"""
For constructing semi-shaped reward, note that every env. has an evaluate_state(.) method, which returns an info dict with
various reward components., such as in_place_reward, near_object, etc. We just need to interpret them and assign our reward instead
of the one provided by MW.
"""
# Which action noise level to apply? For ideas, see https://github.com/rlworkgroup/metaworld/blob/cfd837e31d65c9d2b62b7240c68a26b04a9166d9/tests/metaworld/envs/mujoco/sawyer_xyz/test_scripted_policies.py
def gen_data(tasks, num_traj, noise, res, include_depth, camera, data_dir_path,
write_data=True, write_video=False, video_fps=80,
use_play_policy=False, counter_max=None, grip_flip_p=None):
res = (res, res)
MAX_steps_at_goal = 10
act_tolerance = 1e-5
lim = 1 - act_tolerance
#print(f'Available tasks: {metaworld.ML1.ENV_NAMES}, in total {len(metaworld.ML1.ENV_NAMES)} tasks.') # Check out the available environments
#print(','.join([x[0:len(x)-3] for x in metaworld.ML1.ENV_NAMES]))
cases = play_cases if use_play_policy else test_cases_latest_nonoise
for case in cases:
if case[0] not in tasks: # target_tasks:
continue
task_name = case[0]
policy = case[1]
if use_play_policy:
policy.counter_max = counter_max
policy.grip_flip_p = grip_flip_p
print(f'----------Running task {task_name}------------')
# Note that, although the environment will generate dense reward (goal_cost_reward=False), we will be able to construct any goal-cost reward and subgoal reward
# when we load this dataset.
env = metaworld.mw_gym_make(task_name, goal_cost_reward=False, stop_at_goal=True, steps_at_goal=MAX_steps_at_goal, cam_height=res[0], cam_width=res[1], depth=include_depth, train_distrib=True)
action_space_ptp = env.action_space.high - env.action_space.low
num_successes = 0
dt = datetime.now()
height, width = res
data_file_name = task_name + '-num-traj_' + str(num_traj) + '-noise_' + str(noise) + (("-PLAYPOLICY"+"-ctr-max_" +str(counter_max) + "-grip-flip-p_" + str(grip_flip_p)) if use_play_policy else "") + '-res_' + str(height) + '_' + str(width) + '-cam_' + camera + '-depth_' + str(include_depth) + '_' + dt.strftime("%d-%m-%Y-%H.%M.%S") + '.hdf5'
video_path_root = 'movies'
video_dir_path = os.path.join(video_path_root, task_name + '-noise_' + str(noise) + (("-PLAYPOLICY"+"-ctr-max_" +str(counter_max) + "-grip-flip-p_" + str(grip_flip_p)) if use_play_policy else "") + '-res_' + str(height) + '_' + str(width) + '-cam_' + camera + '_' + dt.strftime("%d-%m-%Y-%H.%M.%S"))
task_noise_dir_path = os.path.join(data_dir_path,task_name,'Sawyer', f'noise{noise}')
assert not os.path.exists(task_noise_dir_path), f"ERROR: {task_noise_dir_path} already exists. You are risking generating more data than you intend to without noticing."
os.makedirs(task_noise_dir_path)
data_writer = MWDatasetWriter(task_noise_dir_path, data_file_name, env, task_name, res, camera, include_depth, act_tolerance, MAX_steps_at_goal, write_data=write_data)
for attempt in range(num_traj):
video_writer = MWVideoWriter(video_dir_path, task_name + '-' + str(attempt + 1), video_fps, res, write_video=write_video)
episode = []
stages = []
if use_play_policy:
policy.reset()
state = env.reset()
start_time = time.time()
for t in range(env.max_path_length):
action = policy.get_action(state['full_state'])
action = np.random.normal(action, noise * action_space_ptp)
# Clip the action
action = np.clip(action, -lim, lim)
new_state, reward, done, info = env.step(action)
episode.append((state['full_state'], state['proprio_state'], state['image'], state['depth'], action, reward, done, info))
if use_play_policy:
stages.append(policy.current_stage)
strpr = f"Step {t} |||"
for k in info:
strpr += f"{k}: {info[k]}, "
#print(strpr)
state = new_state
if done:
if info['task_accomplished']:
print(f'Attempt {attempt + 1} succeeded at step {t}')
num_successes += 1
end_time = time.time()
else:
print(f'Attempt {attempt + 1} ended unsuccessfully at time step {t}')
end_time = time.time()
print(f"Average time per step: {(end_time-start_time) / t}")
break
if use_play_policy:
if 'play' in stages:
# Find which stage preceded play
prev_stage = stages[stages.index('play')-1]
# Find indices of that stage
prev_stage_indices = [i for i, v in enumerate(stages) if v == prev_stage]
# Pick one randomly
start_index = random.choice(prev_stage_indices)
print(f'Starting recording from t = {start_index}')
else:
start_index = None
else:
start_index = 0
if start_index is not None:
# Write data
for tup in episode[start_index:]:
image = tup[2]
data_writer.append_data(*tup)
video_writer.write(image)
data_writer.write_trajectory()
else:
print('Skipping trajectory (play did not kick in)')
data_writer.close()
print(f'--------------------------------------------------------\n')
print(f'Success rate for {task_name}: {num_successes / num_traj}\n')
# Check the created dataset
if write_data:
qlearning_dataset(os.path.join(task_noise_dir_path, data_file_name), reward_type='subgoal')
def add_boolean_arg(parser, name, true, false, default):
assert true.startswith('--') and false.startswith('--')
assert type(default) is bool
true_false = parser.add_mutually_exclusive_group()
true_false.add_argument(true, dest=name, action='store_true')
true_false.add_argument(false, dest=name, action='store_false')
parser.set_defaults(**{name: default})
## Example command for generating data with a noisy expert policy, including a depth camera stream:
#python metaworld/data/training_data_gen.py --tasks=door-open-v2 -d=data --num_traj=10 --noise=0.1 --res=84 -f=20 --camera=corner --write_data --include_depth --nowrite_video
## Example command for generating data with a play policy, with recording a video for visualization:
#python metaworld/data/training_data_gen.py --tasks=door-lock-v2 -d=data --num_traj=50 --noise=0 --res=84 -f=20 --use_play_policy --grip_flip_p=0.01 --camera=corner --write_data --write_video
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-t", "--tasks", type=str, help = "Tasks for which to generate trajectories from scripted policies")
parser.add_argument("-n", "--num_traj", type=int, help = "Number of trajectories to generate for each task")
parser.add_argument("-p", "--noise", type=float, default=0, help = "Action noise as a fraction of the action space, e.g., 0.1")
parser.add_argument("-r", "--res", type=int, default=84, help = "Resolution of image observations (r x r)")
parser.add_argument("-c", "--camera", type=str, default='corner', help = "Camera. Possible values: 'corner', 'topview', 'corner2', 'corner3', 'behindGripper', 'gripperPOV'")
parser.add_argument("-f", "--video_fps", type=int, default=80, help = "Fps for recording videos. Ignored if the --nowrite_video flag is present.")
parser.add_argument("-d", "--data_dir_path", type=str, default='data', help = "Directory where the demonstration data is to be written. Ignored if the ---nowrite_data flag is present.")
# Should we generate depth frames (HxW arrays whose entries are distances from the camera to objects in the scene, *in millimeters*) in addition to RGB frames?
add_boolean_arg(parser, 'include_depth', true='--include_depth', false='--noinclude_depth', default=False)
add_boolean_arg(parser, 'write_data', true='--write_data', false='--nowrite_data', default=True)
add_boolean_arg(parser, 'write_video', true='--write_video', false='--nowrite_video', default=False)
add_boolean_arg(parser, 'use_play_policy', true='--use_play_policy', false='--use_expert_policy', default=False)
# args below only for play policy
parser.add_argument('--counter_max', type=int, default=100)
parser.add_argument('--grip_flip_p', type=float, default=0.005)
args = parser.parse_args()
tasks = args.tasks.split(',')
print(f'\n')
print(f'Generating {args.num_traj} trajectories with action noise {args.noise} for tasks {tasks} with video resolution {args.res}x{args.res} and {args.camera} camera view.')
if args.write_video:
print(f'Videos will be generated at {args.video_fps} fps\n')
gen_data(tasks, args.num_traj, args.noise, args.res, args.include_depth, args.camera, args.data_dir_path,
write_data=args.write_data, write_video=args.write_video, video_fps=args.video_fps,
use_play_policy=args.use_play_policy, counter_max=args.counter_max, grip_flip_p=args.grip_flip_p)