pytorch · vmoens · Jan 3, 2023 · Jan 1, 2023 · Jan 3, 2023 · Jan 3, 2023
diff --git a/torchrl/objectives/ppo.py b/torchrl/objectives/ppo.py
@@ -233,13 +233,10 @@ def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
                 f"and {log_weight.shape})"
             )
         gain1 = log_weight.exp() * advantage
-        log_weight_clip = torch.empty_like(log_weight)
-        # log_weight_clip.data.clamp_(*self._clip_bounds)
-        idx_pos = advantage >= 0
-        log_weight_clip[idx_pos] = log_weight[idx_pos].clamp_max(self._clip_bounds[1])
-        log_weight_clip[~idx_pos] = log_weight[~idx_pos].clamp_min(self._clip_bounds[0])
 
+        log_weight_clip = log_weight.clamp(*self._clip_bounds)
         gain2 = log_weight_clip.exp() * advantage
+
         gain = torch.stack([gain1, gain2], -1).min(dim=-1)[0]
         td_out = TensorDict({"loss_objective": -gain.mean()}, [])