Skip to content

Commit

Permalink
Refactor ContinuousMLPBaseline (#1686)
Browse files Browse the repository at this point in the history
  • Loading branch information
yeukfu committed Jul 2, 2020
1 parent 59648a0 commit 490f5b7
Show file tree
Hide file tree
Showing 7 changed files with 231 additions and 412 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def continuous_mlp_baseline(ctxt, env_id, seed):

baseline = ContinuousMLPBaseline(
env_spec=env.spec,
regressor_args=dict(hidden_sizes=(64, 64)),
hidden_sizes=(64, 64),
)

algo = PPO(env_spec=env.spec,
Expand Down
172 changes: 138 additions & 34 deletions src/garage/tf/baselines/continuous_mlp_baseline.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
"""A value function (baseline) based on a MLP model."""
from dowel import tabular
import numpy as np
import tensorflow as tf

from garage import make_optimizer
from garage.np.baselines import Baseline
from garage.tf.regressors import ContinuousMLPRegressor
from garage.tf.misc import tensor_utils
from garage.tf.models import NormalizedInputMLPModel
from garage.tf.optimizers import LbfgsOptimizer


class ContinuousMLPBaseline(Baseline):
# pylint: disable=too-many-ancestors
class ContinuousMLPBaseline(NormalizedInputMLPModel, Baseline):
"""A value function using a MLP network.
It fits the input data by performing linear regression
Expand All @@ -15,26 +21,101 @@ class ContinuousMLPBaseline(Baseline):
env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
num_seq_inputs (float): Number of sequence per input. By default
it is 1.0, which means only one single sequence.
regressor_args (dict): Arguments for regressor.
name (str): Name of baseline.
hidden_sizes (list[int]): Output dimension of dense layer(s) for
the MLP for mean. For example, (32, 32) means the MLP consists
of two hidden layers, each with 32 hidden units.
hidden_nonlinearity (Callable): Activation function for intermediate
dense layer(s). It should return a tf.Tensor. Set it to
None to maintain a linear activation.
hidden_w_init (Callable): Initializer function for the weight
of intermediate dense layer(s). The function should return a
tf.Tensor.
hidden_b_init (Callable): Initializer function for the bias
of intermediate dense layer(s). The function should return a
tf.Tensor.
output_nonlinearity (Callable): Activation function for output dense
layer. It should return a tf.Tensor. Set it to None to
maintain a linear activation.
output_w_init (Callable): Initializer function for the weight
of output dense layer(s). The function should return a
tf.Tensor.
output_b_init (Callable): Initializer function for the bias
of output dense layer(s). The function should return a
tf.Tensor.
optimizer (garage.tf.Optimizer): Optimizer for minimizing the negative
log-likelihood.
optimizer_args (dict): Arguments for the optimizer. Default is None,
which means no arguments.
normalize_inputs (bool): Bool for normalizing inputs or not.
"""

def __init__(self,
env_spec,
num_seq_inputs=1,
regressor_args=None,
name='ContinuousMLPBaseline'):
if regressor_args is None:
regressor_args = dict()

self._regressor = ContinuousMLPRegressor(
input_shape=(env_spec.observation_space.flat_dim *
num_seq_inputs, ),
output_dim=1,
name=name,
**regressor_args)
self.name = name
name='ContinuousMLPBaseline',
hidden_sizes=(32, 32),
hidden_nonlinearity=tf.nn.tanh,
hidden_w_init=tf.initializers.glorot_uniform(),
hidden_b_init=tf.zeros_initializer(),
output_nonlinearity=None,
output_w_init=tf.initializers.glorot_uniform(),
output_b_init=tf.zeros_initializer(),
optimizer=None,
optimizer_args=None,
normalize_inputs=True):
self._env_spec = env_spec
self._normalize_inputs = normalize_inputs
self._name = name

if optimizer_args is None:
optimizer_args = dict()
if optimizer is None:
self._optimizer = make_optimizer(LbfgsOptimizer, **optimizer_args)
else:
self._optimizer = make_optimizer(optimizer, **optimizer_args)

super().__init__(input_shape=(env_spec.observation_space.flat_dim *
num_seq_inputs, ),
output_dim=1,
name=name,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
hidden_w_init=hidden_w_init,
hidden_b_init=hidden_b_init,
output_nonlinearity=output_nonlinearity,
output_w_init=output_w_init,
output_b_init=output_b_init)

self._x_mean = None
self._x_std = None
self._y_hat = None
self._initialize()

def _initialize(self):
input_var = tf.compat.v1.placeholder(tf.float32,
shape=(None, ) +
self._input_shape)

ys_var = tf.compat.v1.placeholder(dtype=tf.float32,
name='ys',
shape=(None, self._output_dim))

(self._y_hat, self._x_mean,
self._x_std) = self.build(input_var).outputs

loss = tf.reduce_mean(tf.square(self._y_hat - ys_var))
self._f_predict = tensor_utils.compile_function([input_var],
self._y_hat)
optimizer_args = dict(
loss=loss,
target=self,
network_outputs=[ys_var],
)
optimizer_args['inputs'] = [input_var, ys_var]
with tf.name_scope('update_opt'):
self._optimizer.update_opt(**optimizer_args)

def fit(self, paths):
"""Fit regressor based on paths.
Expand All @@ -43,9 +124,21 @@ def fit(self, paths):
paths (dict[numpy.ndarray]): Sample paths.
"""
observations = np.concatenate([p['observations'] for p in paths])
returns = np.concatenate([p['returns'] for p in paths])
self._regressor.fit(observations, returns.reshape((-1, 1)))
xs = np.concatenate([p['observations'] for p in paths])
ys = np.concatenate([p['returns'] for p in paths])
ys = ys.reshape((-1, 1))
if self._normalize_inputs:
# recompute normalizing constants for inputs
self._x_mean.load(np.mean(xs, axis=0, keepdims=True))
self._x_std.load(np.std(xs, axis=0, keepdims=True) + 1e-8)

inputs = [xs, ys]
loss_before = self._optimizer.loss(inputs)
tabular.record('{}/LossBefore'.format(self._name), loss_before)
self._optimizer.optimize(inputs)
loss_after = self._optimizer.loss(inputs)
tabular.record('{}/LossAfter'.format(self._name), loss_after)
tabular.record('{}/dLoss'.format(self._name), loss_before - loss_after)

def predict(self, paths):
"""Predict value based on paths.
Expand All @@ -57,32 +150,43 @@ def predict(self, paths):
numpy.ndarray: Predicted value.
"""
return self._regressor.predict(paths['observations']).flatten()
return self._f_predict(paths['observations']).flatten()

def get_param_values(self):
"""Get parameter values.
@property
def recurrent(self):
"""bool: If this module has a hidden state."""
return False

@property
def env_spec(self):
"""Policy environment specification.
Returns:
List[np.ndarray]: A list of values of each parameter.
garage.EnvSpec: Environment specification.
"""
return self._regressor.get_param_values()
return self._env_spec

def set_param_values(self, flattened_params):
"""Set param values.
def __getstate__(self):
"""Object.__getstate__.
Args:
flattened_params (np.ndarray): A numpy array of parameter values.
Returns:
dict: the state to be pickled for the instance.
"""
self._regressor.set_param_values(flattened_params)
new_dict = super().__getstate__()
del new_dict['_f_predict']
del new_dict['_x_mean']
del new_dict['_x_std']
del new_dict['_y_hat']
return new_dict

def get_params_internal(self):
"""Get the params, which are the trainable variables.
def __setstate__(self, state):
"""Object.__setstate__.
Returns:
List[tf.Variable]: A list of trainable variables in the current
variable scope.
Args:
state (dict): unpickled state.
"""
return self._regressor.get_params()
super().__setstate__(state)
self._initialize()
6 changes: 2 additions & 4 deletions src/garage/tf/regressors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@
from garage.tf.regressors.bernoulli_mlp_regressor import BernoulliMLPRegressor
from garage.tf.regressors.categorical_mlp_regressor import (
CategoricalMLPRegressor)
from garage.tf.regressors.continuous_mlp_regressor import (
ContinuousMLPRegressor)
from garage.tf.regressors.gaussian_cnn_regressor_model import (
GaussianCNNRegressorModel)
from garage.tf.regressors.gaussian_mlp_regressor import GaussianMLPRegressor
from garage.tf.regressors.regressor import Regressor, StochasticRegressor

__all__ = [
'BernoulliMLPRegressor', 'CategoricalMLPRegressor',
'ContinuousMLPRegressor', 'GaussianCNNRegressorModel',
'GaussianMLPRegressor', 'Regressor', 'StochasticRegressor'
'GaussianCNNRegressorModel', 'GaussianMLPRegressor', 'Regressor',
'StochasticRegressor'
]
Loading

0 comments on commit 490f5b7

Please sign in to comment.