fix: correctly close gymnasium environments (#340)

This commit ensures that gymnasium environments get correctly closed down when used.
rickstaa · Aug 25, 2023 · a179176 · a179176
1 parent 90e16e9
commit a179176
Show file tree

Hide file tree

Showing 13 changed files with 30 additions and 5 deletions.
diff --git a/examples/manual_env_policy_inference.py b/examples/manual_env_policy_inference.py
@@ -12,7 +12,7 @@
 )
 
 AGENT_TYPE = "torch"  # The type of agent that was trained. Options: 'tf2' and 'torch'.
-AGENT_FOLDER = "/home/ricks/Development/stable-learning-control/data/2022-02-24_staa_lac_panda_reach/2022-02-24_09-12-19-staa_lac_panda_reach_s25"  # noqa: E501
+AGENT_FOLDER = "/home/ricks/development/work/stable-learning-control/data/cmd_lac_pytorch/cmd_lac_pytorch_s0"  # noqa: E501
 
 if __name__ == "__main__":
     # NOTE: STEP 1a: Try to load the policy and environment
@@ -45,3 +45,5 @@
             "'AGENT_FOLDER' and try again. If the problem persists please open a issue "
             "on https://github.com/rickstaa/stable-learning-control/issues."
         )
+
+    env.close()
diff --git a/sandbox/test_finite_horizon_replay_buffer.py b/sandbox/test_finite_horizon_replay_buffer.py
@@ -48,3 +48,6 @@
                 print("Environment terminated or truncated. Resetting.")
                 o, _ = env.reset()
                 ep_ret, ep_len, t = 0, 0, 0
+
+    print("Done")
+    env.close()
diff --git a/sandbox/test_gym_env.py b/sandbox/test_gym_env.py
@@ -48,4 +48,6 @@
     handles, labels = ax.get_legend_handles_labels()
     ax.legend(handles, labels, loc=2, fancybox=False, shadow=False)
     plt.show()
+
     print("Done")
+    env.close()
diff --git a/sandbox/test_replay_buffer.py b/sandbox/test_replay_buffer.py
@@ -44,3 +44,6 @@
                 print("Environment terminated or truncated. Resetting.")
                 o, _ = env.reset()
                 ep_ret, ep_len = 0, 0
+
+    print("Done")
+    env.close()
diff --git a/sandbox/test_traj_buffer.py b/sandbox/test_traj_buffer.py
@@ -53,3 +53,6 @@
 
         # Print data.
         print(f"Epoch {epoch}:")
+
+    print("Done")
+    env.close()
diff --git a/stable_learning_control/algos/pytorch/lac/lac.py b/stable_learning_control/algos/pytorch/lac/lac.py
@@ -1449,6 +1449,8 @@ def lac(
         type="info",
     )
 
+    # Close environment and return policy and replay buffer.
+    env.close()
     return policy, replay_buffer
 
 

diff --git a/stable_learning_control/algos/pytorch/sac/sac.py b/stable_learning_control/algos/pytorch/sac/sac.py
@@ -1260,6 +1260,8 @@ def sac(
         type="info",
     )
 
+    # Close environment and return policy and replay buffer.
+    env.close()
     return policy, replay_buffer
 
 

diff --git a/stable_learning_control/algos/tf2/lac/lac.py b/stable_learning_control/algos/tf2/lac/lac.py
@@ -1368,6 +1368,8 @@ def lac(
         type="info",
     )
 
+    # Close environment and return policy and replay buffer.
+    env.close()
     return policy, replay_buffer
 
 

diff --git a/stable_learning_control/algos/tf2/sac/sac.py b/stable_learning_control/algos/tf2/sac/sac.py
@@ -1190,6 +1190,8 @@ def sac(
         type="info",
     )
 
+    # Close environment and return policy and replay buffer.
+    env.close()
     return policy, replay_buffer
 
 

diff --git a/tests/algos/gpu/test_algos_gpu.py b/tests/algos/gpu/test_algos_gpu.py
@@ -31,7 +31,8 @@ def env(self):
         env.action_space.seed(0)
         env.observation_space.seed(0)
 
-        return env
+        yield env
+        env.close()
 
     def test_reproducibility(self, algo, device, snapshot, env):
         """Checks if the algorithm is still working as expected."""

diff --git a/tests/algos/test_algos.py b/tests/algos/test_algos.py
@@ -30,7 +30,8 @@ def env(self):
         env.action_space.seed(0)
         env.observation_space.seed(0)
 
-        return env
+        yield env
+        env.close()
 
     def test_reproducibility(self, algo, snapshot, env):
         """Checks if the algorithm is still working as expected."""

diff --git a/tests/algos/tf2/gpu/test_tf2_algos_gpu.py b/tests/algos/tf2/gpu/test_tf2_algos_gpu.py
@@ -32,7 +32,8 @@ def env(self):
         env.action_space.seed(0)
         env.observation_space.seed(0)
 
-        return env
+        yield env
+        env.close()
 
     def test_reproducibility(self, algo, device, snapshot, env):
         """Checks if the algorithm is still working as expected."""

diff --git a/tests/algos/tf2/test_tf2_algos.py b/tests/algos/tf2/test_tf2_algos.py
@@ -30,7 +30,8 @@ def env(self):
         env.action_space.seed(0)
         env.observation_space.seed(0)
 
-        return env
+        yield env
+        env.close()
 
     def test_reproducibility(self, algo, snapshot, env):
         """Checks if the algorithm is still working as expected."""