Merge with main

riiswa · riiswa · commit 4362e672bca4 · 2023-01-03T20:13:00.000+01:00
diff --git a/test/test_tensor_spec.py b/test/test_tensor_spec.py
@@ -259,7 +259,6 @@ def test_multi_discrete(shape, ns):
     np.random.seed(0)
     ts = MultiDiscreteTensorSpec(ns)
     _real_shape = shape if shape is not None else []
-    _len_ns = [len(ns)] if len(ns) > 1 else []
     nvec_shape = torch.tensor(ns).size()
     if nvec_shape == torch.Size([1]):
         nvec_shape = []
diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
@@ -1087,12 +1087,11 @@ def _project(self, val: torch.Tensor) -> torch.Tensor:
     def is_in(self, val: torch.Tensor) -> bool:
         if val.ndim < 1:
             val = val.unsqueeze(0)
-        val_is_too_small = len(self.shape) > val.ndim
         val_have_wrong_dim = (
             self.shape != torch.Size([1])
             and val.shape[-len(self.shape) :] != self.shape
         )
-        if self.dtype != val.dtype or val_is_too_small or val_have_wrong_dim:
+        if self.dtype != val.dtype or len(self.shape) > val.ndim or val_have_wrong_dim:
             return False
 
         for permutation in itertools.product(*[range(d) for d in self.shape]):
@@ -1107,7 +1106,6 @@ def to_onehot(self) -> MultOneHotDiscreteTensorSpec:
                 f"DiscreteTensorSpec with shape != tensor.Size([1]) can't be converted OneHotDiscreteTensorSpec. Got "
                 f"shape={self.shape}."
             )
-
         return MultOneHotDiscreteTensorSpec(
             [_space.n for _space in self.space], self.device, self.dtype
         )
diff --git a/torchrl/objectives/ppo.py b/torchrl/objectives/ppo.py
@@ -233,13 +233,10 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
                 f"and {log_weight.shape})"
             )
         gain1 = log_weight.exp() * advantage
-        log_weight_clip = torch.empty_like(log_weight)
-        # log_weight_clip.data.clamp_(*self._clip_bounds)
-        idx_pos = advantage >= 0
-        log_weight_clip[idx_pos] = log_weight[idx_pos].clamp_max(self._clip_bounds[1])
-        log_weight_clip[~idx_pos] = log_weight[~idx_pos].clamp_min(self._clip_bounds[0])
 
+        log_weight_clip = log_weight.clamp(*self._clip_bounds)
         gain2 = log_weight_clip.exp() * advantage
+
         gain = torch.stack([gain1, gain2], -1).min(dim=-1)[0]
         td_out = TensorDict({"loss_objective": -gain.mean()}, [])