Pass along bijector into pi_forward's calculation

sgoodfriend · sgoodfriend · commit 983cb75e43e5 · 2023-04-11T08:45:17.000-07:00
Fixes CarRacing-v0 and other users of StateDependentNoise
diff --git a/rl_algo_impls/shared/actor/state_dependent_noise.py b/rl_algo_impls/shared/actor/state_dependent_noise.py
@@ -172,7 +172,7 @@ def forward(
             not action_masks
         ), f"{self.__class__.__name__} does not support action_masks"
         pi = self._distribution(obs)
-        return pi_forward(pi, actions)
+        return pi_forward(pi, actions, self.bijector)
 
     def sample_weights(self, batch_size: int = 1) -> None:
         std = self._get_std()
@@ -187,13 +187,13 @@ def action_shape(self) -> Tuple[int, ...]:
 
 
 def pi_forward(
-    distribution: Distribution, actions: Optional[torch.Tensor] = None
+    distribution: Distribution,
+    actions: Optional[torch.Tensor] = None,
+    bijector: Optional[TanhBijector] = None,
 ) -> PiForward:
     logp_a = None
     entropy = None
     if actions is not None:
         logp_a = distribution.log_prob(actions)
-        entropy = (
-            -logp_a if self.bijector else sum_independent_dims(distribution.entropy())
-        )
+        entropy = -logp_a if bijector else sum_independent_dims(distribution.entropy())
     return PiForward(distribution, logp_a, entropy)