Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] DQN Rainbow on new API stack: RLModule and Catalog together with TorchNoisyMLP. #43199

Merged
merged 25 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
760a33f
Started programming DQN Rainbow in new stack.
simonsays1980 Feb 6, 2024
d670b5a
Merge branch 'master' into dqn-rainbow-rl-module
simonsays1980 Feb 7, 2024
9b368b5
Added 'DQNRainbowTorchRLModule'.
simonsays1980 Feb 7, 2024
2956906
Added functionality for target network updates.
simonsays1980 Feb 9, 2024
8af30fb
LINTER
simonsays1980 Feb 9, 2024
293662a
Added training step for the new API stack with EnvRunner.
simonsays1980 Feb 9, 2024
99ef649
Fixed some bugs in 'DQNRainbowTorchModule' and added exploration with…
simonsays1980 Feb 12, 2024
8a9b7ef
Merge branch 'master' into dqn-rainbow-rl-module
simonsays1980 Feb 12, 2024
e602e23
Changed multinomial sampling weights.
simonsays1980 Feb 12, 2024
c64c51f
Merge branch 'master' into dqn-rainbow-rl-module
simonsays1980 Feb 12, 2024
e56e9e3
Implemented most parts of the DQN Rainbow algorithm with new stack.
simonsays1980 Feb 14, 2024
ea3b5d2
Implemented distirbutional Q-learning and dueling networks as well as…
simonsays1980 Feb 15, 2024
a138915
Moved changes in 'dqn.py', 'simple_q.py' and 'prioritized_episode_rep…
simonsays1980 Feb 15, 2024
b52b58d
Added docs and renamed functions. Furthermore, added typing.
simonsays1980 Feb 16, 2024
d9a06ed
Merge branch 'master' into dqn-rainbow-rl-module
simonsays1980 Feb 22, 2024
724d0cc
Implemented some parts of @sven1977's review. Furthermore, added all …
simonsays1980 Feb 22, 2024
a4f9b68
Implemented review from @sven1977. Furthermore, made a cleanup and ad…
simonsays1980 Feb 23, 2024
a3a27eb
Merge branch 'master' into dqn-rainbow-rl-module
simonsays1980 Feb 23, 2024
dc6adad
CHanged torch devices.
simonsays1980 Feb 23, 2024
790a034
Added the output specs to the module.
simonsays1980 Feb 23, 2024
b968f5f
Added epsilon scheduler to 'DQNRLModule' for epsilon-greedy sampling.…
simonsays1980 Feb 26, 2024
e47de0b
Merged Master.
simonsays1980 Feb 27, 2024
72fb694
Fixed specs bug. It needed 'kwargs' in the 'forward_exploration' def…
simonsays1980 Feb 27, 2024
31f76cf
Changed back torch device -> devices
simonsays1980 Feb 27, 2024
e129c50
Added a sync functionality for global environment steps to 'WorkerSet…
simonsays1980 Feb 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
191 changes: 191 additions & 0 deletions rllib/algorithms/dqn/dqn_rainbow_catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
# __sphinx_doc_begin__
import gymnasium as gym

from ray.rllib.algorithms.dqn.dqn_rainbow_noisy_net_configs import (
NoisyMLPEncoderConfig,
NoisyMLPHeadConfig,
)
from ray.rllib.core.models.catalog import Catalog
from ray.rllib.core.models.base import Model
from ray.rllib.core.models.configs import MLPHeadConfig
from ray.rllib.models.torch.torch_distributions import TorchCategorical
from ray.rllib.utils.annotations import (
ExperimentalAPI,
override,
OverrideToImplementCustomLogic,
)


@ExperimentalAPI
class DQNRainbowCatalog(Catalog):
"""The catalog class used to build models for DQN Rainbow.

`DQNRainbowCatalog` provides the following models:
- Encoder: The encoder used to encode the observations.
- Target_Encoder: The encoder used to encode the observations
for the target network.
- Af Head: Either the head of the advantage stream, if a dueling
architecture is used or the head of the Q-function.
- Vf Head (optional): The head of the value function in case a
dueling architecture is chosen.
simonsays1980 marked this conversation as resolved.
Show resolved Hide resolved

All networks can include noisy layers, if `noisy` is `True`.

Any custom head can be built by overridng the `build_af_head()` and
`build_vf_head()`. Alternatively, the `AfHeadConfig` or `VfHeadConfig`
can be overridden to build custom logic during `RLModule` runtime.

All heads can optionally use distributional learning. In this case the
simonsays1980 marked this conversation as resolved.
Show resolved Hide resolved
number of output neurons corresponds to the number of actions times the
number of support atoms of the discrete distribution.
"""

@override(Catalog)
def __init__(
self,
observation_space: gym.Space,
action_space: gym.Space,
model_config_dict: dict,
view_requirements: dict = None,
simonsays1980 marked this conversation as resolved.
Show resolved Hide resolved
):
"""Initializes the DQNRainbowCatalog.

Args:
observation_space: The observation space of the Encoder.
action_space: The action space for the Af Head.
model_config_dict: The model config to use.
"""
super().__init__(
observation_space=observation_space,
action_space=action_space,
model_config_dict=model_config_dict,
)

# Is a noisy net used.
self.uses_noisy = self._model_config_dict["noisy"]
# If a noisy network should be used.
if self.uses_noisy:
# TODO (simon): Add all other arguments here.
# In this case define the encoder.
if self._model_config_dict["encoder_latent_dim"]:
self.af_and_vf_encoder_hiddens = self._model_config_dict[
"fcnet_hiddens"
]
else:
self.af_and_vf_encoder_hiddens = self._model_config_dict[
"fcnet_hiddens"
][:-1]
self.af_and_vf_encoder_activation = self._model_config_dict[
"fcnet_activation"
]
# TODO (simon): Once the old stack is gone, rename to `std_init`.
self.std_init = self._model_config_dict["sigma0"]

# Define the heads.
self.af_and_vf_head_hiddens = self._model_config_dict["post_fcnet_hiddens"]
self.af_and_vf_head_activation = self._model_config_dict[
"post_fcnet_activation"
]

# Advantage and value streams have MLP heads. Note, the advantage
# stream will has an output dimension that is the product of the
# action space dimension and the number of atoms to approximate the
# return distribution in distributional reinforcement learning.
if self.uses_noisy:
# Note, we are overriding the default behavior of `Catalog`. Like
# this we can use the default method `build_encoder()`.
self._encoder_config = NoisyMLPEncoderConfig(
input_dims=self.observation_space.shape,
hidden_layer_dims=self.af_and_vf_encoder_hiddens,
hidden_layer_activation=self.af_and_vf_encoder_activation,
output_layer_activation=self.af_and_vf_encoder_activation,
output_layer_dim=self.latent_dims[0],
std_init=self.std_init,
)
# TODO (simon): Add all other arguments to the Heads.
if self.uses_noisy:
# In case of noisy networks we need to provide the intial standard
# deviation and use the corresponding `NoisyMLPHeadConfig`.
self.af_head_config = NoisyMLPHeadConfig(
input_dims=self.latent_dims,
hidden_layer_dims=self.af_and_vf_head_hiddens,
hidden_layer_activation=self.af_and_vf_head_activation,
output_layer_activation="linear",
output_layer_dim=int(
action_space.n * self._model_config_dict["num_atoms"]
),
std_init=self.std_init,
)
self.vf_head_config = NoisyMLPHeadConfig(
input_dims=self.latent_dims,
hidden_layer_dims=self.af_and_vf_head_hiddens,
hidden_layer_activation=self.af_and_vf_head_activation,
output_layer_activation="linear",
output_layer_dim=1,
std_init=self.std_init,
)
else:
self.af_head_config = MLPHeadConfig(
input_dims=self.latent_dims,
hidden_layer_dims=self.af_and_vf_head_hiddens,
hidden_layer_activation=self.af_and_vf_head_activation,
output_layer_activation="linear",
output_layer_dim=int(
action_space.n * self._model_config_dict["num_atoms"]
),
)
self.vf_head_config = MLPHeadConfig(
input_dims=self.latent_dims,
hidden_layer_dims=self.af_and_vf_head_hiddens,
hidden_layer_activation=self.af_and_vf_head_activation,
output_layer_activation="linear",
output_layer_dim=1,
)

@OverrideToImplementCustomLogic
def build_af_head(self, framework: str) -> Model:
"""Build the A/Q-function head.

Note, if no dueling architecture is chosen, this will
be the Q-function head.

The default behavior is to build the head from the `af_head_config`.
This can be overridden to build a custom policy head as a means to
configure the behavior of a `DQNRainbowRLModule` implementation.

Args:
framework: The framework to use. Either "torch" or "tf2".

Returns:
The advantage head in case a dueling architecutre is chosen or
the Q-function head in the other case.
"""
return self.af_head_config.build(framework=framework)

@OverrideToImplementCustomLogic
def build_vf_head(self, framework: str) -> Model:
"""Build the value function head.

Note, this function is only called in case of a dueling architecture.

The default behavior is to build the head from the `vf_head_config`.
This can be overridden to build a custom policy head as a means to
configure the behavior of a `DQNRainbowRLModule` implementation.

Args:
framework: The framework to use. Either "torch" or "tf2".

Returns:
The value function head.
"""

return self.vf_head_config.build(framework=framework)

@override(Catalog)
def get_action_dist_cls(self, framework: str) -> "TorchCategorical":
# We only implement DQN Rainbow for Torch.
assert framework == "torch"
simonsays1980 marked this conversation as resolved.
Show resolved Hide resolved
return TorchCategorical


# __sphinx_doc_end__
simonsays1980 marked this conversation as resolved.
Show resolved Hide resolved
60 changes: 60 additions & 0 deletions rllib/algorithms/dqn/dqn_rainbow_noisy_net_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from dataclasses import dataclass

from ray.rllib.core.models.base import Encoder
from ray.rllib.core.models.configs import _framework_implemented, _MLPConfig
from ray.rllib.utils.annotations import ExperimentalAPI, override


@ExperimentalAPI
@dataclass
class NoisyMLPConfig(_MLPConfig):
std_init: float = 0.1

@override(_MLPConfig)
def _validate(self, framework: str = "torch"):
"""Makes sure that standard deviation is positive."""
super()._validate(framework=framework)

if self.std_init < 0.0:
raise ValueError(
f"`std_init` ({self.std_init}) of `NoisyMLPConfig must be "
"non-negative."
)


@ExperimentalAPI
@dataclass
class NoisyMLPEncoderConfig(NoisyMLPConfig):
@_framework_implemented()
def build(self, framework: str = "torch") -> "Encoder":
self._validate(framework)

if framework == "torch":
from ray.rllib.algorithms.dqn.torch.dqn_rainbow_torch_noisy_net import (
TorchNoisyMLPEncoder,
)

return TorchNoisyMLPEncoder(self)
else:
raise ValueError(
"`NoisyMLPEncoder` is not implemented for framework " f"{framework}. "
)


@ExperimentalAPI
@dataclass
class NoisyMLPHeadConfig(NoisyMLPConfig):
@_framework_implemented()
def build(self, framework: str = "torch") -> "Encoder":
self._validate(framework)

if framework == "torch":
from ray.rllib.algorithms.dqn.torch.dqn_rainbow_torch_noisy_net import (
TorchNoisyMLPHead,
)

return TorchNoisyMLPHead(self)
else:
raise ValueError(
"`NoisyMLPHead` is not implemented for framework " f"{framework}. "
)