ray-project · sven1977 · Apr 4, 2024 · Apr 3, 2024 · Apr 3, 2024
@@ -984,16 +984,17 @@ def place_processed_samples_on_learner_thread_queue(self) -> None:
         NOTE: This method is called if self.config._enable_new_api_stack is False.
 
         """
-        while self.batches_to_place_on_learner:
-            batch = self.batches_to_place_on_learner[0]
+        for i, batch in enumerate(self.batches_to_place_on_learner):
             try:
-                # Setting block = True prevents the learner thread,
-                # the main thread, and the gpu loader threads from
-                # thrashing when there are more samples than the
-                # learner can reasonable process.
-                # see https://github.com/ray-project/ray/pull/26581#issuecomment-1187877674  # noqa
-                self._learner_thread.inqueue.put(batch, block=True)
-                self.batches_to_place_on_learner.pop(0)
+                self._learner_thread.inqueue.put(
+                    batch,
+                    # Setting block = True for the very last item in our list prevents
+                    # the learner thread, this main thread, and the GPU loader threads
+                    # from thrashing when there are more samples than the learner can
+                    # reasonably process.
+                    # see https://github.com/ray-project/ray/pull/26581#issuecomment-1187877674  # noqa
+                    block=i == len(self.batches_to_place_on_learner) - 1
+                )
                 self._counters["num_samples_added_to_queue"] += (
                     batch.agent_steps()
                     if self.config.count_steps_by == "agent_steps"
@@ -1002,6 +1003,8 @@ def place_processed_samples_on_learner_thread_queue(self) -> None:
             except queue.Full:
                 self._counters["num_times_learner_queue_full"] += 1
 
+        self.batches_to_place_on_learner.clear()
+
     def process_trained_results(self) -> ResultDict:
         """Process training results that are outputed by the learner thread.