rlworkgroup · ahtsan · Mar 10, 2020 · Mar 3, 2020 · Mar 3, 2020 · Mar 3, 2020
@@ -23,20 +23,28 @@ def discount_cumsum(x, discount):
                                 axis=0)[::-1]
 
 
-def explained_variance_1d(ypred, y):
+def explained_variance_1d(ypred, y, valids=None):
     """Explained variation for 1D inputs.
 
     It is the proportion of the variance in one variable that is explained or
     predicted from another variable.
 
     Args:
         ypred (np.ndarray): Sample data from the first variable.
+            Shape: :math:`(N, max_path_length)`.
         y (np.ndarray): Sample data from the second variable.
+            Shape: :math:`(N, max_path_length)`.
+        valids (np.ndarray): Optional argument. Array indicating valid indices.
+            If None, it assumes the entire input array are valid.
+            Shape: :math:`(N, max_path_length)`.
 
     Returns:
         float: The explained variance.
 
     """
+    if valids is not None:
+        ypred = ypred[valids.astype(np.bool)]
+        y = y[valids.astype(np.bool)]
     assert y.ndim == 1 and ypred.ndim == 1
     vary = np.var(y)
     if np.isclose(vary, 0):
@@ -163,6 +171,33 @@ def stack_tensor_dict_list(tensor_dict_list):
     return ret
 
 
+def stack_and_pad_tensor_n(paths, key, max_len):
+    """Stack and pad array of list of tensors.
+
+    Input paths are a list of N dicts, each with values of shape
+    :math:`(D, S^*)`. This function stack and pad the values with the input
+    key with max_len, so output will be shape :math:`(N, D, S^*)`.
+
+    Args:
+        paths (list[dict]): List of dict to be stacked and padded.
+            Value of each dict will be shape of :math:`(D, S^*)`.
+        key (str): Key of the values in the paths to be stacked and padded.
+        max_len (int): Maximum length for padding.
+
+    Returns:
+        numpy.ndarray: Stacked and padded tensor. Shape: :math:`(N, D, S^*)`
+            where N is the len of input paths.
+
+    """
+    ret = [path[key] for path in paths]
+    if isinstance(ret[0], dict):
+        ret = stack_tensor_dict_list(
+            [pad_tensor_dict(p, max_len) for p in ret])
+    else:
+        ret = pad_tensor_n(np.array(ret), max_len)
+    return ret
+
+
 def concat_tensor_dict_list(tensor_dict_list):
     """Concatenate dictionary of list of tensor.
 

@@ -8,7 +8,6 @@
 from garage.tf.misc.tensor_utils import center_advs
 from garage.tf.misc.tensor_utils import compile_function
 from garage.tf.misc.tensor_utils import compute_advantages
-from garage.tf.misc.tensor_utils import concat_tensor_list
 from garage.tf.misc.tensor_utils import discounted_returns
 from garage.tf.misc.tensor_utils import filter_valids
 from garage.tf.misc.tensor_utils import filter_valids_dict
@@ -192,7 +191,12 @@ def optimize_policy(self, itr, samples_data):
         pol_ent = self._f_policy_entropy(*policy_opt_input_values)
         tabular.record('{}/Entropy'.format(self.policy.name), np.mean(pol_ent))
 
-        self._fit_baseline(samples_data)
+        self._fit_baseline_with_data(samples_data)
+
+        ev = np_tensor_utils.explained_variance_1d(samples_data['baselines'],
+                                                   samples_data['returns'],
+                                                   samples_data['valids'])
+        tabular.record('{}/ExplainedVariance'.format(self.baseline.name), ev)
 
     def _build_inputs(self):
         """Build input variables.
@@ -562,7 +566,7 @@ def _build_entropy_term(self, i):
 
         return policy_entropy
 
-    def _fit_baseline(self, samples_data):
+    def _fit_baseline_with_data(self, samples_data):
         """Update baselines from samples.
 
         Args:
@@ -579,7 +583,6 @@ def _fit_baseline(self, samples_data):
 
         paths = samples_data['paths']
         valids = samples_data['valids']
-        baselines = [path['baselines'] for path in paths]
 
         # Recompute parts of samples_data
         aug_rewards = []
@@ -590,15 +593,10 @@ def _fit_baseline(self, samples_data):
             path['returns'] = ret[val.astype(np.bool)]
             aug_rewards.append(path['rewards'])
             aug_returns.append(path['returns'])
-        aug_rewards = concat_tensor_list(aug_rewards)
-        aug_returns = concat_tensor_list(aug_returns)
-        samples_data['rewards'] = aug_rewards
-        samples_data['returns'] = aug_returns
-
-        # Calculate explained variance
-        ev = np_tensor_utils.explained_variance_1d(np.concatenate(baselines),
-                                                   aug_returns)
-        tabular.record('{}/ExplainedVariance'.format(self.baseline.name), ev)
+        samples_data['rewards'] = np_tensor_utils.pad_tensor_n(
+            aug_rewards, self.max_path_length)
+        samples_data['returns'] = np_tensor_utils.pad_tensor_n(
+            aug_returns, self.max_path_length)
 
         # Fit baseline
         logger.log('Fitting baseline...')

@@ -273,12 +273,22 @@ def state_info_specs(self):
         return []
 
     def __getstate__(self):
-        """See `Object.__getstate__`."""
+        """See `Object.__getstate__`.
+
+        Returns:
+            dict: Parameters to save.
+
+        """
         new_dict = super().__getstate__()
         del new_dict['_f_step_mean_std']
         return new_dict
 
     def __setstate__(self, state):
-        """See `Object.__setstate__`."""
+        """See `Object.__setstate__`.
+
+        Args:
+            state (dict): Parameters to restore from.
+
+        """
         super().__setstate__(state)
         self._initialize()
@@ -5,8 +5,10 @@
 import numpy as np
 
 from garage.misc.tensor_utils import concat_tensor_dict_list
+from garage.misc.tensor_utils import explained_variance_1d
 from garage.misc.tensor_utils import normalize_pixel_batch
 from garage.misc.tensor_utils import pad_tensor
+from garage.misc.tensor_utils import stack_and_pad_tensor_n
 from garage.misc.tensor_utils import stack_tensor_dict_list
 from garage.tf.envs import TfEnv
 from tests.fixtures.envs.dummy import DummyBoxEnv
@@ -79,3 +81,22 @@ def test_pad_tensor(self):
 
         results = pad_tensor(self.tensor, self.max_len, mode='last')
         assert np.array_equal(results, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
+
+    def test_explained_variance_1d(self):
+        y = np.array([1, 2, 3, 4, 5, 0, 0, 0, 0, 0])
+        y_hat = np.array([2, 3, 4, 5, 6, 0, 0, 0, 0, 0])
+        valids = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
+        result = explained_variance_1d(y, y_hat, valids)
+        assert result == 1.0
+        result = explained_variance_1d(y, y_hat)
+        np.testing.assert_almost_equal(result, 0.95)
+
+    def test_stack_and_pad_tensor_n(self):
+        result = stack_and_pad_tensor_n(paths=self.data, key='obs', max_len=5)
+        assert np.array_equal(result,
+                              np.array([[1, 1, 1, 0, 0], [1, 1, 1, 0, 0]]))
+        result = stack_and_pad_tensor_n(paths=self.data, key='info', max_len=5)
+        assert np.array_equal(result['lala'],
+                              np.array([[1, 1, 0, 0, 0], [1, 1, 0, 0, 0]]))
+        assert np.array_equal(result['baba'],
+                              np.array([[2, 2, 0, 0, 0], [2, 2, 0, 0, 0]]))
@@ -18,8 +18,8 @@
 
 class TestBatchPolopt2(TfGraphTestCase):
 
-    @mock.patch.multiple(BatchPolopt2, __abstractmethods__=set())
     # pylint: disable=abstract-class-instantiated, no-member
+    @mock.patch.multiple(BatchPolopt2, __abstractmethods__=set())
     def test_process_samples_continuous_non_recurrent(self):
         env = TfEnv(DummyBoxEnv())
         policy = GaussianMLPPolicy(env_spec=env.spec)