In [1]:
import gymnasium as gym
from gymnasium.spaces import Dict, Box
import numpy as np

from examples.agents.policies import MPSTD3Policy
from src.spaces.interval_union import IntervalUnion
from ray.rllib.policy.policy import PolicySpec

from envs.rllib import DummyEnvironment, NavigationEnvironment
from examples.restrictors.navigation_restrictor import NavigationRestrictor
from src.wrappers.rllib import UniformlyRestrictedEnvironment
from src.utils import test_environment

In [3]:
test_environment(DummyEnvironment)

Testing <class 'envs.rllib.DummyEnvironment'>...
Reset: {'0': array([0.7862554 , 0.09598847], dtype=float32), '1': array([0.39947695, 0.65278894], dtype=float32)}
Actions: {'0': 1, '1': 2}
Step:  {'0': array([0.1364285, 0.8953858], dtype=float32), '1': array([0.38328525, 0.0073029 ], dtype=float32)}, {'0': 0.11057595029742373, '1': 0.9079943688748312}, {'__all__': False}
Actions: {'0': 2, '1': 2}
Step:  {'0': array([0.9688025, 0.7681059], dtype=float32), '1': array([0.8119259 , 0.13966975], dtype=float32)}, {'0': 0.7931401435961336, '1': 0.026237086691505707}, {'__all__': False}
Actions: {'0': 0, '1': 1}
Step:  {'0': array([0.3413117, 0.1446504], dtype=float32), '1': array([0.60589707, 0.5302609 ], dtype=float32)}, {'0': 0.9410268625036521, '1': 0.6303095628330038}, {'__all__': False}
Actions: {'0': 0, '1': 2}
Step:  {'0': array([0.13044316, 0.24048765], dtype=float32), '1': array([0.61571777, 0.2582697 ], dtype=float32)}, {'0': 0.0462611125367014, '1': 0.8177573684699918}, {'__all__':

In [2]:
env_config = {'action_space': gym.spaces.Box(0.0, 1.0)}

test_environment(UniformlyRestrictedEnvironment({ 'env': DummyEnvironment, 'env_config': env_config, 'governance_action_space': gym.spaces.Box(0.0, 1.0)}))

Testing <class 'src.wrappers.rllib.UniformlyRestrictedEnvironment'>...
Reset: {'gov': {'0': array([0.14592128, 0.42996204], dtype=float32), '1': array([0.19030397, 0.38722435], dtype=float32)}}
Actions: {'gov': array([0.28150117], dtype=float32)}
Step:  {'0': {'observation': array([0.14592128, 0.42996204], dtype=float32), 'allowed_actions': array([0.28150117], dtype=float32)}, '1': {'observation': array([0.19030397, 0.38722435], dtype=float32), 'allowed_actions': array([0.28150117], dtype=float32)}}, {}, {'__all__': False}
Actions: {'0': array([0.29532138], dtype=float32), '1': array([0.16975395], dtype=float32)}
Step:  {'gov': {'0': array([0.8136685 , 0.98700196], dtype=float32), '1': array([0.42833635, 0.03936921], dtype=float32)}}, {'gov': 0.7428823935768555}, {'__all__': False}
Actions: {'gov': array([0.02379974], dtype=float32)}
Step:  {'0': {'observation': array([0.8136685 , 0.98700196], dtype=float32), 'allowed_actions': array([0.02379974], dtype=float32)}, '1': {'observation': 

In [2]:
env_config = {
    'STEPS_PER_EPISODE': 4,
    'ACTION_RANGE': 220,
    'DT': 1.0,
    'REWARD': {
        'TIMESTEP_PENALTY_COEFFICIENT': 0.05,
        'REWARD_COEFFICIENT': 5.0,
        'GOAL': 50,
        'COLLISION': -20.0
    },
    'MAP': {
        'HEIGHT': 15.0,
        'WIDTH': 15.0,
        'AGENT': {'x': 1.0, 'y': 1.0, 'angle': 90.0, 'step_size': 1.0, 'radius': 0.4},
        'GOAL': {'x': 12.0, 'y': 12.0, 'radius': 0.5}
    }
}

governance_config = {
    'COUNT': 3,
    'POSITION_COVARIANCE': [[8.0, 0.0],
                            [0.0, 8.0]],
    'MEAN_SIZE': 1.0,
    'VARIANCE_SIZE': 0.25,
    'RANGE_SIZE': 0.75,
    'START_SEED': 50,
    'SAFETY_ANGLE': 6
}

governance_observation_space = gym.spaces.Dict({'location': gym.spaces.Box(0.0, 20.0, shape=(2,)),
                                                'perspective': gym.spaces.Box(0.0, 360.0, shape=(1,)),
                                                'map': gym.spaces.Box(0.0, 20.0, shape=(2,)),
                                                'step': gym.spaces.Box(0.0, 40.0, shape=(1,)),
                                                'step_radius': gym.spaces.Box(0.0, 5.0, shape=(1,)),
                                                'step_size': gym.spaces.Box(0.0, 5.0, shape=(1,)),
                                                'action_range': gym.spaces.Box(0.0, 360.0, shape=(1,)),
                                                'dt': gym.spaces.Box(0.0, 5.0, shape=(1,))})

model_config={

}

run_config = {
    'env': NavigationEnvironment,
    'env_config': env_config,
    'multiagent': {
        'policies':
            {
                'agent': PolicySpec(
                    policy_class=MPSTD3Policy,
                    observation_space=Dict({
                        'observation': {
                            'location': Box(low=0.0, high=15.0, shape=(2,), dtype=np.float32),
                            'perspective': Box(low=0.0, high=360.0, shape=(1,), dtype=np.float32),
                            'target_angle': Box(low=0.0, high=360.0, shape=(1,), dtype=np.float32),
                            'target_distance': Box(low=0.0, high=50.0, shape=(1,), dtype=np.float32),
                            'current_step': Box(low=0.0, high=40.0, shape=(1,), dtype=np.float32)
                        },
                        'allowed_actions': IntervalUnion(-110.0, 110.0)
                    }),
                    action_space=Box(low=-110.0, high=110.0, shape=(1,), dtype=np.float32),
                    config=model_config
                ),
                'gov': PolicySpec(
                    policy_class=NavigationRestrictor,
                    observation_space=governance_observation_space,
                    action_space=IntervalUnion(-110.0, 110.0),
                    config=governance_config
                )
            },
        'policy_mapping_fn': lambda x: x,
        'policies_to_train': ['agent']
    }
}

def governance_observation_fn(wrapper):
    return {'location': np.array([wrapper.env.agent.x, wrapper.env.agent.y], dtype=np.float32),
            'perspective': np.array([wrapper.env.agent.perspective], dtype=np.float32),
            'map': np.array([wrapper.env.HEIGHT, wrapper.env.WIDTH], dtype=np.float32),
            'step': np.array([wrapper.env.current_step], dtype=np.float32),
            'step_radius': np.array([wrapper.env.agent.radius], dtype=np.float32),
            'step_size': np.array([wrapper.env.agent.step_size], dtype=np.float32),
            'action_range': np.array([wrapper.env.ACTION_RANGE], dtype=np.float32),
            'dt': np.array([wrapper.env.DT], dtype=np.float32)}

def governance_reward_fn(wrapper):
    return -sum(wrapper.rewards.values())

test_environment(UniformlyRestrictedEnvironment({ 'env': NavigationEnvironment, 'env_config': env_config,
                                                  'governance_action_space': gym.spaces.Box(0.0, 1.0),
                                                  'governance_observation_space': governance_observation_space,
                                                  'governance_observation_fn': governance_observation_fn,
                                                  'governance_reward_fn': governance_reward_fn}),
                 NavigationRestrictor(governance_config))

Testing <class 'src.wrappers.rllib.UniformlyRestrictedEnvironment'>...
Reset: {'gov': {'location': array([1., 1.], dtype=float32), 'perspective': array([90.], dtype=float32), 'map': array([15., 15.], dtype=float32), 'step': array([0.], dtype=float32), 'step_radius': array([0.4], dtype=float32), 'step_size': array([1.], dtype=float32), 'action_range': array([220.], dtype=float32), 'dt': array([1.], dtype=float32)}}
Actions: {'gov': [[-110.0, 30.92]]}
Step:  {'agent': {'observation': {'location': array([1., 1.], dtype=float32), 'perspective': array([90.], dtype=float32), 'target_angle': array([45.], dtype=float32), 'target_distance': array([0.], dtype=float32), 'current_step': array([0.], dtype=float32)}, 'allowed_actions': [[-110.0, 30.92]]}}, {}, {'__all__': False}
Actions: {'agent': array([42.65785], dtype=float32)}
Step:  {'gov': {'location': array([0.32235, 1.7354 ], dtype=float32), 'perspective': array([132.66], dtype=float32), 'map': array([15., 15.], dtype=float32), 'step': array

In [None]:
import ray

analysis = ray.tune.run()