Skip to content

Commit

Permalink
Fix value functions
Browse files Browse the repository at this point in the history
  • Loading branch information
hartikainen committed May 28, 2018
1 parent d2ea56a commit 16cf48b
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 57 deletions.
2 changes: 0 additions & 2 deletions examples/mujoco_all_sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,6 @@ def run_experiment(variant):
save_full_state=False,
)

algorithm._sess.run(tf.global_variables_initializer())

algorithm.train()


Expand Down
3 changes: 2 additions & 1 deletion examples/multigoal_sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ def run(variant):
mode="train",
squash=True,
bijector_config=bijector_config,
observations_preprocessor=None
observations_preprocessor=None,
q_function=qf
)

plotter = QFPolicyPlotter(
Expand Down
3 changes: 1 addition & 2 deletions softlearning/algorithms/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,7 @@ def _create_td_update(self):
target_actions = tf.random_uniform(
(1, self._value_n_particles, self._action_dim), -1, 1)
q_value_targets = self.qf.output_for(
observations=self._next_observations_ph[:, None, :],
actions=target_actions)
self._next_observations_ph[:, None, :], target_actions)
assert_shape(q_value_targets, [None, self._value_n_particles])

self._q_values = self.qf.output_for(
Expand Down
13 changes: 7 additions & 6 deletions softlearning/misc/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,26 +55,27 @@ def __init__(self, inputs, name, layer_sizes, output_nonlinearity=None):
self._layer_sizes = list(layer_sizes)
self._output_nonlinearity = output_nonlinearity

self._output = self._output_for(self._inputs)
self._output = self.output_for(*self._inputs)

def _output_for(self, inputs, reuse=False):
def output_for(self, *inputs, reuse=False):
with tf.variable_scope(self._name, reuse=reuse):
out = feedforward_net(
inputs=inputs,
output_nonlinearity=self._output_nonlinearity,
layer_sizes=self._layer_sizes)

return out[..., 0]
return out

def _eval(self, inputs):
feeds = {pl: val for pl, val in zip(self._inputs, inputs)}
def eval(self, *inputs):
feeds = {ph: val for ph, val in zip(self._inputs, inputs)}

return tf_utils.get_default_session().run(self._output, feeds)

def get_params_internal(self, scope='', **tags):
if len(tags) > 0:
raise NotImplementedError

scope += '/' + self._name if scope else self._name
scope = scope or tf.get_variable_scope().name
scope += '/' + self._name if len(scope) else self._name

return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
33 changes: 13 additions & 20 deletions softlearning/preprocessors/mlp_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,22 @@
from softlearning.misc import tf_utils

class MLPPreprocessor(MLPFunction):
def __init__(self, env_spec, layer_sizes=(128, 16),
output_nonlinearity=None, name='observations_preprocessor'):
def __init__(self,
env_spec,
layer_sizes=(128, 16),
output_nonlinearity=None,
name='observations_preprocessor'):

Parameterized.__init__(self)
Serializable.quick_init(self, locals())

self._name = name

self._Do = env_spec.observation_space.flat_dim

obs_ph = tf.placeholder(
tf.float32,
shape=(None, self._Do),
name='observations',
self._observations_ph = tf.placeholder(
tf.float32, shape=(None, self._Do), name='observations')

super(MLPPreprocessor, self).__init__(
(self._observations_ph, ),
name=name,
layer_sizes=layer_sizes,
output_nonlinearity=output_nonlinearity
)

self._inputs = (obs_ph, )
self._layer_sizes = layer_sizes
self._output_nonlinearity = output_nonlinearity

self._output_t = self.output_for(obs_ph, reuse=tf.AUTO_REUSE)

def output_for(self, observations, reuse=False):
return super(MLPPreprocessor, self)._output_for(
(observations, ),
reuse=reuse)[..., None]
47 changes: 21 additions & 26 deletions softlearning/value_functions/value_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,61 +10,55 @@
class NNVFunction(MLPFunction):
def __init__(self,
env_spec,
hidden_layer_sizes=(100, 100),
name='value_function'):
hidden_layer_sizes=(128, 128),
name='v_function'):
Serializable.quick_init(self, locals())

self._Do = env_spec.observation_space.flat_dim
self._observations_ph = tf.placeholder(
tf.float32, shape=[None, self._Do], name='observations')
tf.float32, shape=(None, self._Do), name='observations')

layer_sizes = tuple(hidden_layer_sizes) + (1, )
super(NNVFunction, self).__init__(
inputs=(self._observations_ph, ),
(self._observations_ph, ),
name=name,
layer_sizes=layer_sizes)

def eval(self, observations):
return super(NNVFunction, self)._eval((observations, ))

def output_for(self, observations, reuse=False):
return super(NNVFunction, self)._output_for(
(observations, ), reuse=reuse)
def output_for(self, *args, **kwargs):
out = super(NNVFunction, self).output_for(*args, **kwargs)
return out[..., 0]


class NNQFunction(MLPFunction):
def __init__(self,
env_spec,
hidden_layer_sizes=(100, 100),
hidden_layer_sizes=(128, 128),
name='q_function'):
Serializable.quick_init(self, locals())

self._Da = env_spec.action_space.flat_dim
self._Do = env_spec.observation_space.flat_dim

self._observations_ph = tf.placeholder(
tf.float32, shape=[None, self._Do], name='observations')
tf.float32, shape=(None, self._Do), name='observations')
self._actions_ph = tf.placeholder(
tf.float32, shape=[None, self._Da], name='actions')
tf.float32, shape=(None, self._Da), name='actions')

layer_sizes = tuple(hidden_layer_sizes) + (1, )
super(NNQFunction, self).__init__(
inputs=(self._observations_ph, self._actions_ph),
(self._observations_ph, self._actions_ph, ),
name=name,
layer_sizes=layer_sizes)

def output_for(self, observations, actions, reuse=False):
return super(NNQFunction, self)._output_for(
(observations, actions), reuse=reuse)

def eval(self, observations, actions):
return super(NNQFunction, self)._eval((observations, actions))
def output_for(self, *args, **kwargs):
out = super(NNQFunction, self).output_for(*args, **kwargs)
return out[..., 0]


class NNDiscriminatorFunction(MLPFunction):
def __init__(self,
env_spec,
hidden_layer_sizes=(100, 100),
hidden_layer_sizes=(128, 128),
num_skills=None,
name='discriminator_function'):
assert num_skills is not None
Expand All @@ -74,18 +68,19 @@ def __init__(self,
self._Do = env_spec.observation_space.flat_dim

self._observations_ph = tf.placeholder(
tf.float32, shape=[None, self._Do], name='observations')
tf.float32, shape=(None, self._Do), name='observations')
self._action_pl = tf.placeholder(
tf.float32, shape=[None, self._Da], name='actions')
tf.float32, shape=(None, self._Da), name='actions')

layer_sizes = tuple(hidden_layer_sizes) + (num_skills, )
super(NNDiscriminatorFunction, self).__init__(
inputs=(self._observations_ph, self._actions_ph),
(self._observations_ph, self._actions_ph, ),
name=name,
layer_sizes=layer_sizes)

self._output_t = self._output


class SumQFunction(Serializable):
def __init__(self, env_spec, q_functions):
Serializable.quick_init(self, locals())
Expand All @@ -96,9 +91,9 @@ def __init__(self, env_spec, q_functions):
self._Do = env_spec.observation_space.flat_dim

self._observations_ph = tf.placeholder(
tf.float32, shape=[None, self._Do], name='observations')
tf.float32, shape=(None, self._Do), name='observations')
self._actions_ph = tf.placeholder(
tf.float32, shape=[None, self._Da], name='actions')
tf.float32, shape=(None, self._Da), name='actions')

self._output = self.output_for(
self._observations_ph, self._actions_ph, reuse=True)
Expand Down

0 comments on commit 16cf48b

Please sign in to comment.