<a href="https://colab.research.google.com/github/lcipolina/Ray_tutorials/blob/main/TorchMultiCategorical.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Example of issues in the TorchMuliCategoricalDistribution

In [1]:
!pip install ray[rllib] --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.9/56.9 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.6/101.6 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m836.9/836.9 kB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [1]:
import torch
import numpy as np
import gymnasium as gym

Let's create an environment with action space MultiDiscrete

In [5]:

class MyCustomEnv_gymnasium(gym.Env):


    def __init__(self, config_dict):
        super(MyCustomEnv_gymnasium, self).__init__()

        self.num_agents = config_dict.get('num_agents',2)
        self.n          = config_dict.get('num_samples', 100)
        self.priors     = config_dict.get('priors',[0.5, 0.5])
        self.reservation_prices = config_dict.get('reservation_prices', [50]* self.num_agents)

        # Define action and observation space
        self.action_space      = gym.spaces.MultiDiscrete([self.n + 1]*self.num_agents)           # MultiDiscrete is mapped to Categorical distribution.
        self.observation_space = gym.spaces.Box(low=0, high=self.n, shape=(2 * self.num_agents,))

        self.state = None
        self.offer = None
        self.acceptance = None
        self.reward = None

    def calculate_reward(self):
        '''Returns an array stating who has accepted/rejected the offer'''
        return torch.tensor([1 if accept else -1 for accept in self.acceptance]).sum().item()


    def reset(self,*, seed=None, options=None):

        self.state = np.zeros(2 * self.num_agents)
        self.offer = None
        self.acceptance = None
        self.reward = None
        return self.state, {} #observation, info_dict


    def step(self, actions):
        self.offer = actions

        self.acceptance = [action >= res_price for action, res_price in zip(actions, self.reservation_prices)]


        self.reward = self.calculate_reward()

        self.state = np.array([*actions, *self.acceptance])

        done = all(self.acceptance)
        terminateds = truncateds = done
        return self.state, self.reward, terminateds, truncateds, {}



 Register and run

In [3]:
def test_env_gymnasium_rllib():
    import ray
    from ray import tune
    from ray.tune.registry import register_env

    if ray.is_initialized(): ray.shutdown()
    ray.init(local_mode=True, include_dashboard=False, ignore_reinit_error=True)


    # Custom env config
    env_config = {
        'num_agents': 2,
        'num_samples': 100,
        'priors'     : [0.5, 0.5],
        'reservation_prices': [30, 30]
    }
    def env_creator(env_config):
        return MyCustomEnv_gymnasium(env_config)
    register_env("my_custom_env", env_creator)

    # Trial config
    model_config = {
        "env": "my_custom_env",
        "framework": "torch",
        "num_workers": 0,
        }

    stop = {
        "timesteps_total": 1
    }

    results = tune.run("PPO", config=model_config, stop=stop)

In [4]:
test_env_gymnasium_rllib()

2023-08-04 08:11:07,602	INFO worker.py:1621 -- Started a local Ray instance.
2023-08-04 08:11:09,748	INFO tune.py:666 -- [output] This will use the new output engine with verbosity 2. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949
  if (distutils.version.LooseVersion(tf.__version__) <
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  pkg_resources.declare_namespace(__name__)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(parent)
  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  

+--------------------------------------------------------+
| Configuration for experiment     PPO                   |
+--------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator |
| Scheduler                        FIFOScheduler         |
| Number of trials                 1                     |
+--------------------------------------------------------+

View detailed results here: /root/ray_results/PPO
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/PPO`

:job_id:01000000
:task_name:bundle_reservation_check_func


:actor_name:PPO


:actor_name:PPO


2023-08-04 08:11:16,913	ERROR actor.py:970 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::PPO.__init__()[39m (pid=4986, ip=172.28.0.12, actor_id=abaeb2ab5bf99ae50b0282b301000000, repr=PPO)
  File "/usr/local/lib/python3.10/dist-packages/ray/rllib/algorithms/algorithm.py", line 517, in __init__
    super().__init__(
  File "/usr/local/lib/python3.10/dist-packages/ray/tune/trainable/trainable.py", line 169, in __init__
    self.setup(copy.deepcopy(self.config))
  File "/usr/local/lib/python3.10/dist-packages/ray/rllib/algorithms/algorithm.py", line 639, in setup
    self.workers = WorkerSet(
  File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/worker_set.py", line 157, in __init__
    self._setup(
  File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/worker_set.py", line 247, in _setup
    self._local_worker = self._make_worker(
  File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evalua

Trial status: 1 PENDING
Current time: 2023-08-04 08:11:17. Total running time: 0s
Logical resource usage: 1.0/2 CPUs, 0/0 GPUs
+------------------------------------------+
| Trial name                      status   |
+------------------------------------------+
| PPO_my_custom_env_7c146_00000   PENDING  |
+------------------------------------------+





Trial status: 1 PENDING
Current time: 2023-08-04 08:11:23. Total running time: 7s
Logical resource usage: 1.0/2 CPUs, 0/0 GPUs
+------------------------------------------+
| Trial name                      status   |
+------------------------------------------+
| PPO_my_custom_env_7c146_00000   PENDING  |
+------------------------------------------+



KeyboardInterrupt: ignored