fix env_sampler eval info list issue

microsoft · Oct 27, 2023 · c19038a · c19038a
1 parent 607d3b6
commit c19038a
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/maro/rl/rollout/env_sampler.py b/maro/rl/rollout/env_sampler.py
@@ -533,7 +533,7 @@ def sample(
 
         return {
             "experiences": [total_experiences],
-            "info": [deepcopy(self._info)],  # TODO: may have overhead issues. Leave to future work.
+            "info": [deepcopy(self._info)],
         }
 
     def set_policy_state(self, policy_state_dict: Dict[str, dict]) -> None:
@@ -592,7 +592,7 @@ def eval(self, policy_state: Dict[str, Dict[str, Any]] = None, num_episodes: int
                 self._step(list(env_action_dict.values()))
                 cache_element.next_state = self._state
 
-                if self._reward_eval_delay is None:  # TODO: necessary to calculate reward in eval()?
+                if self._reward_eval_delay is None:
                     self._calc_reward(cache_element)
                     self._post_eval_step(cache_element)
 
@@ -606,7 +606,7 @@ def eval(self, policy_state: Dict[str, Dict[str, Any]] = None, num_episodes: int
                     self._calc_reward(cache_element)
                     self._post_eval_step(cache_element)
 
-            info_list.append(self._info)
+            info_list.append(deepcopy(self._info))
 
         return {"info": info_list}