Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor NPO #1189

Merged
merged 9 commits into from
Mar 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion src/garage/misc/tensor_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,28 @@ def discount_cumsum(x, discount):
axis=0)[::-1]


def explained_variance_1d(ypred, y):
def explained_variance_1d(ypred, y, valids=None):
"""Explained variation for 1D inputs.

It is the proportion of the variance in one variable that is explained or
predicted from another variable.

Args:
ypred (np.ndarray): Sample data from the first variable.
Shape: :math:`(N, max_path_length)`.
y (np.ndarray): Sample data from the second variable.
Shape: :math:`(N, max_path_length)`.
valids (np.ndarray): Optional argument. Array indicating valid indices.
If None, it assumes the entire input array are valid.
Shape: :math:`(N, max_path_length)`.

Returns:
float: The explained variance.

"""
if valids is not None:
ypred = ypred[valids.astype(np.bool)]
y = y[valids.astype(np.bool)]
assert y.ndim == 1 and ypred.ndim == 1
vary = np.var(y)
if np.isclose(vary, 0):
Expand Down Expand Up @@ -163,6 +171,33 @@ def stack_tensor_dict_list(tensor_dict_list):
return ret


def stack_and_pad_tensor_n(paths, key, max_len):
"""Stack and pad array of list of tensors.

Input paths are a list of N dicts, each with values of shape
:math:`(D, S^*)`. This function stack and pad the values with the input
key with max_len, so output will be shape :math:`(N, D, S^*)`.

Args:
paths (list[dict]): List of dict to be stacked and padded.
Value of each dict will be shape of :math:`(D, S^*)`.
key (str): Key of the values in the paths to be stacked and padded.
max_len (int): Maximum length for padding.

Returns:
numpy.ndarray: Stacked and padded tensor. Shape: :math:`(N, D, S^*)`
where N is the len of input paths.

"""
ret = [path[key] for path in paths]
if isinstance(ret[0], dict):
ret = stack_tensor_dict_list(
[pad_tensor_dict(p, max_len) for p in ret])
else:
ret = pad_tensor_n(np.array(ret), max_len)
return ret


def concat_tensor_dict_list(tensor_dict_list):
"""Concatenate dictionary of list of tensor.

Expand Down
24 changes: 11 additions & 13 deletions src/garage/tf/algos/npo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from garage.tf.misc.tensor_utils import center_advs
from garage.tf.misc.tensor_utils import compile_function
from garage.tf.misc.tensor_utils import compute_advantages
from garage.tf.misc.tensor_utils import concat_tensor_list
from garage.tf.misc.tensor_utils import discounted_returns
from garage.tf.misc.tensor_utils import filter_valids
from garage.tf.misc.tensor_utils import filter_valids_dict
Expand Down Expand Up @@ -192,7 +191,12 @@ def optimize_policy(self, itr, samples_data):
pol_ent = self._f_policy_entropy(*policy_opt_input_values)
tabular.record('{}/Entropy'.format(self.policy.name), np.mean(pol_ent))

self._fit_baseline(samples_data)
self._fit_baseline_with_data(samples_data)

ev = np_tensor_utils.explained_variance_1d(samples_data['baselines'],
samples_data['returns'],
samples_data['valids'])
tabular.record('{}/ExplainedVariance'.format(self.baseline.name), ev)

def _build_inputs(self):
"""Build input variables.
Expand Down Expand Up @@ -562,7 +566,7 @@ def _build_entropy_term(self, i):

return policy_entropy

def _fit_baseline(self, samples_data):
def _fit_baseline_with_data(self, samples_data):
"""Update baselines from samples.

Args:
Expand All @@ -579,7 +583,6 @@ def _fit_baseline(self, samples_data):

paths = samples_data['paths']
valids = samples_data['valids']
baselines = [path['baselines'] for path in paths]

# Recompute parts of samples_data
aug_rewards = []
Expand All @@ -590,15 +593,10 @@ def _fit_baseline(self, samples_data):
path['returns'] = ret[val.astype(np.bool)]
aug_rewards.append(path['rewards'])
aug_returns.append(path['returns'])
aug_rewards = concat_tensor_list(aug_rewards)
aug_returns = concat_tensor_list(aug_returns)
samples_data['rewards'] = aug_rewards
samples_data['returns'] = aug_returns

# Calculate explained variance
ev = np_tensor_utils.explained_variance_1d(np.concatenate(baselines),
aug_returns)
tabular.record('{}/ExplainedVariance'.format(self.baseline.name), ev)
samples_data['rewards'] = np_tensor_utils.pad_tensor_n(
aug_rewards, self.max_path_length)
samples_data['returns'] = np_tensor_utils.pad_tensor_n(
aug_returns, self.max_path_length)

# Fit baseline
logger.log('Fitting baseline...')
Expand Down
14 changes: 12 additions & 2 deletions src/garage/tf/policies/gaussian_gru_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,22 @@ def state_info_specs(self):
return []

def __getstate__(self):
"""See `Object.__getstate__`."""
"""See `Object.__getstate__`.

Returns:
dict: Parameters to save.

"""
new_dict = super().__getstate__()
del new_dict['_f_step_mean_std']
return new_dict

def __setstate__(self, state):
"""See `Object.__setstate__`."""
"""See `Object.__setstate__`.

Args:
state (dict): Parameters to restore from.

"""
super().__setstate__(state)
self._initialize()
21 changes: 21 additions & 0 deletions tests/garage/misc/test_tensor_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import numpy as np

from garage.misc.tensor_utils import concat_tensor_dict_list
from garage.misc.tensor_utils import explained_variance_1d
from garage.misc.tensor_utils import normalize_pixel_batch
from garage.misc.tensor_utils import pad_tensor
from garage.misc.tensor_utils import stack_and_pad_tensor_n
from garage.misc.tensor_utils import stack_tensor_dict_list
from garage.tf.envs import TfEnv
from tests.fixtures.envs.dummy import DummyBoxEnv
Expand Down Expand Up @@ -79,3 +81,22 @@ def test_pad_tensor(self):

results = pad_tensor(self.tensor, self.max_len, mode='last')
assert np.array_equal(results, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

def test_explained_variance_1d(self):
y = np.array([1, 2, 3, 4, 5, 0, 0, 0, 0, 0])
y_hat = np.array([2, 3, 4, 5, 6, 0, 0, 0, 0, 0])
valids = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
result = explained_variance_1d(y, y_hat, valids)
assert result == 1.0
result = explained_variance_1d(y, y_hat)
np.testing.assert_almost_equal(result, 0.95)

def test_stack_and_pad_tensor_n(self):
result = stack_and_pad_tensor_n(paths=self.data, key='obs', max_len=5)
assert np.array_equal(result,
np.array([[1, 1, 1, 0, 0], [1, 1, 1, 0, 0]]))
result = stack_and_pad_tensor_n(paths=self.data, key='info', max_len=5)
assert np.array_equal(result['lala'],
np.array([[1, 1, 0, 0, 0], [1, 1, 0, 0, 0]]))
assert np.array_equal(result['baba'],
np.array([[2, 2, 0, 0, 0], [2, 2, 0, 0, 0]]))
2 changes: 1 addition & 1 deletion tests/garage/tf/algos/test_batch_polopt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@

class TestBatchPolopt2(TfGraphTestCase):

@mock.patch.multiple(BatchPolopt2, __abstractmethods__=set())
# pylint: disable=abstract-class-instantiated, no-member
@mock.patch.multiple(BatchPolopt2, __abstractmethods__=set())
def test_process_samples_continuous_non_recurrent(self):
env = TfEnv(DummyBoxEnv())
policy = GaussianMLPPolicy(env_spec=env.spec)
Expand Down