In [7]:
#|hide
from fastrl.test_utils import initialize_notebook
initialize_notebook()

In [8]:
#|default_exp envs.continuous_debug_env

In [9]:
#|export
# Python native modules
import os
# Third party libs
import gymnasium as gym
from gymnasium import spaces
from gymnasium.envs.registration import register
import numpy as np
# Local modules

# Debug Env
> Hyper simple env for debugging continous motion agents.

In [10]:
#|export
class ContinuousDebugEnv(gym.Env):
    metadata = {'render_modes': ['console']}  # Corrected metadata key
    
    def __init__(self, goal_position=None, proximity_threshold=0.5):
        super(ContinuousDebugEnv, self).__init__()

        self.goal_position = goal_position if goal_position is not None else np.random.uniform(-10, 10)

        if goal_position is not None:
            self.observation_space = spaces.Box(low=-goal_position, high=goal_position, shape=(1,), dtype=np.float32)
        else:
            self.observation_space = spaces.Box(low=-10, high=10, shape=(1,), dtype=np.float32)

        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)
        

        self.proximity_threshold = proximity_threshold
        self.state = None

    def step(self, action):
        self.state += action[0]  # Assuming action is a NumPy array, use the first element
        
        distance_to_goal = np.abs(self.state - self.goal_position)
        reward = -distance_to_goal.item()  # Ensure reward is a float
        
        done = distance_to_goal <= self.proximity_threshold
        done = bool(done.item())  # Ensure done is a boolean
        
        info = {}
        
        return self.state, reward, done,done, info

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)  # Call the superclass reset, which handles the seeding
        
        self.state = np.array([0.0], dtype=np.float32)
        if self.goal_position is None:
            self.goal_position = np.random.uniform(-10, 10)
        
        return self.state, {}  # Return observation and an empty info dictionary


    def render(self, mode='console'):
        if mode != 'console':
            raise NotImplementedError("Only console mode is supported.")
        print(f"Position: {self.state} Goal: {self.goal_position}")


register(
     id="fastrl/ContinuousDebugEnv-v0",
     entry_point="fastrl.envs.continuous_debug_env:ContinuousDebugEnv",
     max_episode_steps=300,
)


  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


In [11]:
env = gym.make('fastrl/ContinuousDebugEnv-v0')
obs, info = env.reset()
done = False
while not done:
    action = env.action_space.sample()  # Take a random action
    obs, reward, done,terminated, info = env.step(action)
    env.render()

Position: [-0.0546182] Goal: -4.276741291625905
Position: [0.5164447] Goal: -4.276741291625905
Position: [0.5772327] Goal: -4.276741291625905
Position: [1.4852788] Goal: -4.276741291625905
Position: [2.0711896] Goal: -4.276741291625905
Position: [1.2937609] Goal: -4.276741291625905
Position: [0.96672064] Goal: -4.276741291625905
Position: [1.5753617] Goal: -4.276741291625905
Position: [1.1796584] Goal: -4.276741291625905
Position: [1.6677196] Goal: -4.276741291625905
Position: [1.0250015] Goal: -4.276741291625905
Position: [0.23744571] Goal: -4.276741291625905
Position: [0.94255] Goal: -4.276741291625905
Position: [0.6150867] Goal: -4.276741291625905
Position: [1.5755142] Goal: -4.276741291625905
Position: [2.0553827] Goal: -4.276741291625905
Position: [2.2224948] Goal: -4.276741291625905
Position: [2.528675] Goal: -4.276741291625905
Position: [3.3200028] Goal: -4.276741291625905
Position: [4.2360926] Goal: -4.276741291625905
Position: [3.4168172] Goal: -4.276741291625905
Position: [4.

  logger.warn(


In [13]:
#|hide
#|eval: false
!nbdev_export