pytorch · vmoens · Dec 13, 2022 · Dec 13, 2022 · Dec 13, 2022 · Dec 13, 2022
diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
@@ -449,6 +449,11 @@ def _project(self, val: torch.Tensor) -> torch.Tensor:
             maximum = maximum.expand_as(val)
             val[val < minimum] = minimum[val < minimum]
             val[val > maximum] = maximum[val > maximum]
+        except RuntimeError:
+            minimum = minimum.expand_as(val)
+            maximum = maximum.expand_as(val)
+            val[val < minimum] = minimum[val < minimum]
+            val[val > maximum] = maximum[val > maximum]
         return val
 
     def is_in(self, val: torch.Tensor) -> bool:

diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py
@@ -252,6 +252,13 @@ class GAE(nn.Module):
         gradient_mode (bool): if True, gradients are propagated throught the computation of the value function.
             Default is `False`.
 
+    GAE will return an :obj:`"advantage"` entry containing the advange value. It will also
+    return a :obj:`"value_target"` entry with the return value that is to be used
+    to train the value network. Finally, if :obj:`gradient_mode` is :obj:`True`,
+    an additional and differentiable :obj:`"value_error"` entry will be returned,
+    which simple represents the difference between the return and the value network
+    output (i.e. an additional distance loss should be applied to that signed value).
+
     """
 
     def __init__(
@@ -336,6 +343,7 @@ def forward(
             )
 
         tensordict.set("advantage", adv.detach())
+        tensordict.set("value_target", value_target)
         if self.gradient_mode:
             tensordict.set("value_error", value_target - value)