ray-project · sven1977 · May 22, 2024 · May 10, 2024 · May 13, 2024 · May 14, 2024
@@ -20,6 +20,9 @@ class PPORLModule(RLModule, abc.ABC):
     def setup(self):
         # __sphinx_doc_begin__
         catalog = self.config.get_catalog()
+        # If we have a stateful model states for the critic need to be collected
+        # during sampling and `inference-only` needs to be `False`.
+        self.inference_only = not self.config.model_config_dict["use_lstm"]
         # If this is not a learner module, we use only a single value network. This
         # network is then either the share encoder network from the learner module
         # or the actor encoder network from the learner module (if the value network

@@ -32,7 +32,8 @@ def setup(self):
     def get_state(self, inference_only: bool = False) -> Dict[str, Any]:
         state_dict = self.state_dict()
         # If this module is not for inference, but the state dict is.
-        if not self.inference_only and inference_only:
+        # Note, for stateful modules, we need the full state dict.
+        if not self.inference_only and not self.is_stateful() and inference_only:
             # Call the local hook to remove or rename the parameters.
             return self._inference_only_get_state_hook(state_dict)
         # Otherwise, the state dict is for checkpointing or saving the model.

@@ -288,7 +288,7 @@ def add(
                             for i in range(len(eps))
                         ]
                     )
-                # Increase index.
+                # Increase index to the new length of `self._indices`.
                 j = len(self._indices)
 
     @override(EpisodeReplayBuffer)