In [1]:
#|hide
from fastrl.test_utils import initialize_notebook
initialize_notebook()

In [2]:
#|default_exp envs.continuous_debug_env

In [3]:
#|export
# Python native modules
import os
# Third party libs
import gymnasium as gym
from gymnasium import spaces
from gymnasium.envs.registration import register
import numpy as np
# Local modules

# Debug Env
> Hyper simple env for debugging continous motion agents.

In [4]:
#|export
class ContinuousDebugEnv(gym.Env):
    metadata = {'render_modes': ['console']}  # Corrected metadata key
    
    def __init__(self, goal_position=None, proximity_threshold=0.5):
        super(ContinuousDebugEnv, self).__init__()

        self.goal_position = goal_position if goal_position is not None else np.random.uniform(-10, 10)

        if goal_position is not None:
            self.observation_space = spaces.Box(low=-goal_position, high=goal_position, shape=(1,), dtype=np.float32)
        else:
            self.observation_space = spaces.Box(low=-10, high=10, shape=(1,), dtype=np.float32)

        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)
        

        self.proximity_threshold = proximity_threshold
        self.state = None

    def step(self, action):
        self.state[0] += action[0]  # Assuming action is a NumPy array, use the first element
        
        distance_to_goal = np.abs(self.state[0] - self.goal_position)
        reward = -distance_to_goal.item()  # Ensure reward is a float
        
        done = distance_to_goal <= self.proximity_threshold
        done = bool(done.item())  # Ensure done is a boolean
        
        info = {}
        
        return self.state, reward, done,done, info

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)  # Call the superclass reset, which handles the seeding
        if self.goal_position is None:
            self.goal_position = np.random.uniform(-10, 10)
        # The state is {current position, goal position}
        self.state = np.array([0.0, self.goal_position], dtype=np.float32)
        
        return self.state, {}  # Return observation and an empty info dictionary


    def render(self, mode='console'):
        if mode != 'console':
            raise NotImplementedError("Only console mode is supported.")
        print(f"Position: {self.state} Goal: {self.goal_position}")


register(
     id="fastrl/ContinuousDebugEnv-v0",
     entry_point="fastrl.envs.continuous_debug_env:ContinuousDebugEnv",
     max_episode_steps=300,
)


In [5]:
env = gym.make('fastrl/ContinuousDebugEnv-v0')
obs, info = env.reset()
done = False
while not done:
    action = env.action_space.sample()  # Take a random action
    obs, reward, done,terminated, info = env.step(action)
    env.render()

  logger.warn(f"Overriding environment {new_spec.id} already in registry.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(


Position: [-0.8365295  4.2245593] Goal: 4.224559422011199
Position: [-0.61684215  4.2245593 ] Goal: 4.224559422011199
Position: [-1.2976267  4.2245593] Goal: 4.224559422011199
Position: [-1.7950554  4.2245593] Goal: 4.224559422011199
Position: [-1.3699143  4.2245593] Goal: 4.224559422011199
Position: [-1.4716704  4.2245593] Goal: 4.224559422011199
Position: [-0.86188215  4.2245593 ] Goal: 4.224559422011199
Position: [-0.7006067  4.2245593] Goal: 4.224559422011199
Position: [-1.3716518  4.2245593] Goal: 4.224559422011199
Position: [-1.7273074  4.2245593] Goal: 4.224559422011199
Position: [-2.1644132  4.2245593] Goal: 4.224559422011199
Position: [-1.1645694  4.2245593] Goal: 4.224559422011199
Position: [-0.5456029  4.2245593] Goal: 4.224559422011199
Position: [-0.56844705  4.2245593 ] Goal: 4.224559422011199
Position: [-0.7515511  4.2245593] Goal: 4.224559422011199
Position: [-0.4227327  4.2245593] Goal: 4.224559422011199
Position: [-1.2697878  4.2245593] Goal: 4.224559422011199
Position

Position: [-13.860599    4.2245593] Goal: 4.224559422011199
Position: [-14.094043    4.2245593] Goal: 4.224559422011199
Position: [-13.13691     4.2245593] Goal: 4.224559422011199
Position: [-13.341913    4.2245593] Goal: 4.224559422011199
Position: [-14.209486    4.2245593] Goal: 4.224559422011199
Position: [-13.946554    4.2245593] Goal: 4.224559422011199
Position: [-14.352644    4.2245593] Goal: 4.224559422011199
Position: [-14.631141    4.2245593] Goal: 4.224559422011199
Position: [-15.308446    4.2245593] Goal: 4.224559422011199
Position: [-15.079626    4.2245593] Goal: 4.224559422011199
Position: [-15.007167    4.2245593] Goal: 4.224559422011199
Position: [-15.767818    4.2245593] Goal: 4.224559422011199
Position: [-15.458918    4.2245593] Goal: 4.224559422011199
Position: [-15.884812    4.2245593] Goal: 4.224559422011199
Position: [-16.540907    4.2245593] Goal: 4.224559422011199
Position: [-17.520212    4.2245593] Goal: 4.224559422011199
Position: [-17.275293    4.2245593] Goal

In [6]:
#|hide
#|eval: false
!nbdev_export