-
Notifications
You must be signed in to change notification settings - Fork 0
/
lt_pred.py
233 lines (204 loc) · 12.5 KB
/
lt_pred.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import os
import tensorflow as tf
import numpy as np
from scipy.io import savemat
from dmbrl.misc.DotmapUtils import get_required_argument
class lt_pred(object):
def __init__(self, params, policy):
"""Creates class instance.
Arguments:
params
.env (gym.env): Environment for which this controller will be used.
.update_fns (list<func>): A list of functions that will be invoked
(possibly with a tensorflow session) every time this controller is reset.
.ac_ub (np.ndarray): (optional) An array of action upper bounds.
Defaults to environment action upper bounds.
.ac_lb (np.ndarray): (optional) An array of action lower bounds.
Defaults to environment action lower bounds.
.per (int): (optional) Determines how often the action sequence will be optimized.
Defaults to 1 (reoptimizes at every call to act()).
.prop_cfg
.model_init_cfg (DotMap): A DotMap of initialization parameters for the model.
.model_constructor (func): A function which constructs an instance of this
model, given model_init_cfg.
.model_train_cfg (dict): (optional) A DotMap of training parameters that will be passed
into the model every time is is trained. Defaults to an empty dict.
.model_pretrained (bool): (optional) If True, assumes that the model
has been trained upon construction.
.mode (str): Propagation method. Choose between [E, DS, TSinf, TS1, MM].
See https://arxiv.org/abs/1805.12114 for details.
.npart (int): Number of particles used for DS, TSinf, TS1, and MM propagation methods.
.ign_var (bool): (optional) Determines whether or not variance output of the model
will be ignored. Defaults to False unless deterministic propagation is being used.
.obs_preproc (func): (optional) A function which modifies observations (in a 2D matrix)
before they are passed into the model. Defaults to lambda obs: obs.
Note: Must be able to process both NumPy and Tensorflow arrays.
.obs_postproc (func): (optional) A function which returns vectors calculated from
the previous observations and model predictions, which will then be passed into
the provided cost function on observations. Defaults to lambda obs, model_out: model_out.
Note: Must be able to process both NumPy and Tensorflow arrays.
.obs_postproc2 (func): (optional) A function which takes the vectors returned by
obs_postproc and (possibly) modifies it into the predicted observations for the
next time step. Defaults to lambda obs: obs.
Note: Must be able to process both NumPy and Tensorflow arrays.
.targ_proc (func): (optional) A function which takes current observations and next
observations and returns the array of targets (so that the model learns the mapping
obs -> targ_proc(obs, next_obs)). Defaults to lambda obs, next_obs: next_obs.
Note: Only needs to process NumPy arrays.
.opt_cfg
.mode (str): Internal optimizer that will be used. Choose between [CEM, Random].
.cfg (DotMap): A map of optimizer initializer parameters.
.plan_hor (int): The planning horizon that will be used in optimization.
.obs_cost_fn (func): A function which computes the cost of every observation
in a 2D matrix.
Note: Must be able to process both NumPy and Tensorflow arrays.
.ac_cost_fn (func): A function which computes the cost of every action
in a 2D matrix.
.log_cfg
.save_all_models (bool): (optional) If True, saves models at every iteration.
Defaults to False (only most recent model is saved).
Warning: Can be very memory-intensive.
.log_traj_preds (bool): (optional) If True, saves the mean and variance of predicted
particle trajectories. Defaults to False.
.log_particles (bool) (optional) If True, saves all predicted particles trajectories.
Defaults to False. Note: Takes precedence over log_traj_preds.
Warning: Can be very memory-intensive
"""
self.dX = get_required_argument(params, "dX", "Must provide state dim.")
self.dU = get_required_argument(params, "dU", "Must provide action dim.")
self.model = get_required_argument(
params.prop_cfg.model_init_cfg, "model_constructor", "Must provide a model constructor."
)(params.prop_cfg.model_init_cfg)
self.model_train_cfg = params.prop_cfg.get("model_train_cfg", {})
self.prop_mode = get_required_argument(params.prop_cfg, "mode", "Must provide propagation method.")
self.npart = get_required_argument(params.prop_cfg, "npart", "Must provide number of particles.")
self.ign_var = params.prop_cfg.get("ign_var", False) or self.prop_mode == "E"
self.plan_hor = get_required_argument(params.opt_cfg, "plan_hor", "Must provide planning horizon.")
self.policy = policy
# Perform argument checks
if self.prop_mode not in ["E", "DS", "MM", "TS1", "TSinf"]:
raise ValueError("Invalid propagation method.")
if self.prop_mode in ["TS1", "TSinf"] and self.npart % self.model.num_nets != 0:
raise ValueError("Number of particles must be a multiple of the ensemble size.")
if self.prop_mode == "E" and self.npart != 1:
raise ValueError("Deterministic propagation methods only need one particle.")
# Controller state variables
self.has_been_trained = params.prop_cfg.get("model_pretrained", False)
self.train_in = np.array([]).reshape(0, self.dU + np.zeros([1, self.dX]).shape[-1])
self.train_targs = np.array([]).reshape(0, np.zeros([1, self.dX]).shape[-1])
if self.model.is_tf_model:
self.sy_cur_obs = tf.Variable(np.zeros(self.dX), dtype=tf.float32)
self.pred_traj = self.lt_predict(get_pred_trajs=True)
self.model.sess.run(tf.variables_initializer([self.sy_cur_obs]))
else:
raise NotImplementedError()
print("Created an lt_pred prop mode %s, %d particles. " % (self.prop_mode, self.npart) +
("Ignoring variance." if self.ign_var else ""))
def train(self, obs_trajs, acs_trajs, rews_trajs):
"""Trains the internal model of this controller. Once trained,
this controller switches from applying random actions to using MPC.
Arguments:
obs_trajs: A list of observation matrices, observations in rows.
acs_trajs: A list of action matrices, actions in rows.
rews_trajs: A list of reward arrays.
Returns: None.
"""
# Construct new training points and add to training set
new_train_in, new_train_targs = [], []
for obs, acs in zip(obs_trajs, acs_trajs):
new_train_in.append(np.concatenate([obs[:-1], acs], axis=-1))
new_train_targs.append(obs[:-1], obs[1:])
self.train_in = new_train_in
self.train_targs = new_train_targs
# Train the model
self.model.train(self.train_in, self.train_targs, **self.model_train_cfg)
self.has_been_trained = True
def lt_predict(self, get_pred_trajs=False):
# t, nopt = tf.constant(0), tf.shape(ac_seqs)[0]
t, nopt = tf.constant(0), tf.convert_to_tensor(self.plan_hor, dtype=tf.int32)
init_obs = tf.tile(self.sy_cur_obs[None], [nopt * self.npart, 1])
def continue_prediction(t, *args):
return tf.less(t, self.plan_hor)
if get_pred_trajs:
pred_trajs = init_obs[None]
def iteration(t, cur_obs, pred_trajs):
cur_acs = self.policy(cur_obs, t)
next_obs = self._predict_next_obs(cur_obs, cur_acs)
pred_trajs = tf.concat([pred_trajs, next_obs[None]], axis=0)
return t + 1, next_obs, pred_trajs
_, _, pred_trajs = tf.while_loop(
cond=continue_prediction, body=iteration, loop_vars=[t, init_obs, pred_trajs],
shape_invariants=[
t.get_shape(), init_obs.get_shape(), tf.TensorShape([None, None, self.dX])
]
)
# Replace nan costs with very high cost
pred_trajs = tf.reshape(pred_trajs, [self.plan_hor + 1, -1, self.npart, self.dX])
return pred_trajs
def _predict_next_obs(self, obs, acs):
proc_obs = obs
if self.model.is_tf_model:
# TS Optimization: Expand so that particles are only passed through one of the networks.
if self.prop_mode == "TS1":
proc_obs = tf.reshape(proc_obs, [-1, self.npart, proc_obs.get_shape()[-1]])
sort_idxs = tf.nn.top_k(
tf.random_uniform([tf.shape(proc_obs)[0], self.npart]),
k=self.npart
).indices
tmp = tf.tile(tf.range(tf.shape(proc_obs)[0])[:, None], [1, self.npart])[:, :, None]
idxs = tf.concat([tmp, sort_idxs[:, :, None]], axis=-1)
proc_obs = tf.gather_nd(proc_obs, idxs)
proc_obs = tf.reshape(proc_obs, [-1, proc_obs.get_shape()[-1]])
if self.prop_mode == "TS1" or self.prop_mode == "TSinf":
proc_obs, acs = self._expand_to_ts_format(proc_obs), self._expand_to_ts_format(acs)
# Obtain model predictions
inputs = tf.concat([proc_obs, acs], axis=-1)
mean, var = self.model.create_prediction_tensors(inputs)
if self.model.is_probabilistic and not self.ign_var:
predictions = mean + tf.random_normal(shape=tf.shape(mean), mean=0, stddev=1) * tf.sqrt(var)
if self.prop_mode == "MM":
model_out_dim = predictions.get_shape()[-1].value
predictions = tf.reshape(predictions, [-1, self.npart, model_out_dim])
prediction_mean = tf.reduce_mean(predictions, axis=1, keep_dims=True)
prediction_var = tf.reduce_mean(tf.square(predictions - prediction_mean), axis=1, keep_dims=True)
z = tf.random_normal(shape=tf.shape(predictions), mean=0, stddev=1)
samples = prediction_mean + z * tf.sqrt(prediction_var)
predictions = tf.reshape(samples, [-1, model_out_dim])
else:
predictions = mean
# TS Optimization: Remove additional dimension
if self.prop_mode == "TS1" or self.prop_mode == "TSinf":
predictions = self._flatten_to_matrix(predictions)
if self.prop_mode == "TS1":
predictions = tf.reshape(predictions, [-1, self.npart, predictions.get_shape()[-1]])
sort_idxs = tf.nn.top_k(
-sort_idxs,
k=self.npart
).indices
idxs = tf.concat([tmp, sort_idxs[:, :, None]], axis=-1)
predictions = tf.gather_nd(predictions, idxs)
predictions = tf.reshape(predictions, [-1, predictions.get_shape()[-1]])
return predictions
else:
raise NotImplementedError()
def _expand_to_ts_format(self, mat):
dim = mat.get_shape()[-1]
return tf.reshape(
tf.transpose(
tf.reshape(mat, [-1, self.model.num_nets, self.npart // self.model.num_nets, dim]),
[1, 0, 2, 3]
),
[self.model.num_nets, -1, dim]
)
def _flatten_to_matrix(self, ts_fmt_arr):
dim = ts_fmt_arr.get_shape()[-1]
return tf.reshape(
tf.transpose(
tf.reshape(ts_fmt_arr, [self.model.num_nets, -1, self.npart // self.model.num_nets, dim]),
[1, 0, 2, 3]
),
[-1, dim]
)