diff --git a/neodroidagent/agents/agent.py b/neodroidagent/agents/agent.py index de2971b3..dc7e78f2 100644 --- a/neodroidagent/agents/agent.py +++ b/neodroidagent/agents/agent.py @@ -14,6 +14,7 @@ ObservationSpace, SignalSpace, ) +from neodroidagent.utilities import IntrinsicSignalProvider __author__ = "Christian Heider Nielsen" __doc__ = r""" @@ -22,7 +23,7 @@ __all__ = ["Agent"] -ClipFeature = namedtuple("ClipFeature", ("enabled", "low", "high")) +TogglableLowHigh = namedtuple("ClipFeature", ("enabled", "low", "high")) class Agent(ABC): @@ -37,8 +38,9 @@ def __init__( input_shape: Sequence = None, output_shape: Sequence = None, divide_by_zero_safety: float = 1e-6, - action_clipping: ClipFeature = ClipFeature(False, -1.0, 1.0), - signal_clipping: ClipFeature = ClipFeature(False, -1.0, 1.0), + action_clipping: TogglableLowHigh = TogglableLowHigh(False, -1.0, 1.0), + signal_clipping: TogglableLowHigh = TogglableLowHigh(False, -1.0, 1.0), + intrinsic_signal_provider_arch: IntrinsicSignalProvider = None, **kwargs, ): self._sample_i = 0 @@ -54,8 +56,9 @@ def __init__( self._action_clipping = action_clipping self._signal_clipping = signal_clipping + self._intrinsic_signal_provider_arch = intrinsic_signal_provider_arch + self._divide_by_zero_safety = divide_by_zero_safety - self._intrinsic_signal = lambda *a: 0 # TODO: ICM self.__set_protected_attr(**kwargs) @@ -131,6 +134,34 @@ def __infer_io_shapes( highlight=True, ) + def __build_intrinsic_module( + self, + observation_space: ObservationSpace, + action_space: ActionSpace, + signal_space: SignalSpace, + **kwargs, + ): + """ + + @param observation_space: + @type observation_space: + @param action_space: + @type action_space: + @param signal_space: + @type signal_space: + @param kwargs: + @type kwargs: + """ + if self._intrinsic_signal_provider_arch is None: + self._intrinsic_signal_provider = lambda *a: 0 + else: + self._intrinsic_signal_provider = self._intrinsic_signal_provider_arch( + observation_space=observation_space, + action_space=action_space, + signal_space=signal_space, + **kwargs, + ) + # endregion # region Public @@ -155,6 +186,12 @@ def build( self.action_space = action_space self.signal_space = signal_space self.__infer_io_shapes(observation_space, action_space, signal_space) + self.__build_intrinsic_module( + observation_space=observation_space, + action_space=action_space, + signal_space=signal_space, + **kwargs, + ) self.__build__( observation_space=observation_space, action_space=action_space, @@ -245,19 +282,19 @@ def extract_action(self, sample: Any) -> numpy.ndarray: """ return numpy.array(sample) - def extract_signal(self, snapshot: EnvironmentSnapshot, **kwargs) -> numpy.ndarray: + def extract_signal(self, snapshot: EnvironmentSnapshot) -> numpy.ndarray: """ Allows for modulation of signal based on for example an Instrinsic Curiosity signal -@param signal: + @param snapshot: + @type snapshot: @param kwargs: @return: """ signal_out = numpy.array(snapshot.signal) - if self._intrinsic_signal: - signal_out += self._intrinsic_signal() + signal_out += self._intrinsic_signal_provider(snapshot) return signal_out diff --git a/neodroidagent/agents/numpy_agents/model_free/non_contextual/ucb1_policy.py b/neodroidagent/agents/numpy_agents/model_free/non_contextual/ucb1_policy.py index 7b539302..35bbc8b0 100644 --- a/neodroidagent/agents/numpy_agents/model_free/non_contextual/ucb1_policy.py +++ b/neodroidagent/agents/numpy_agents/model_free/non_contextual/ucb1_policy.py @@ -5,7 +5,7 @@ from neodroid.environments.droid_environment import SingleUnityEnvironment from neodroid.utilities import Displayable from neodroidagent.agents.numpy_agents.numpy_agent import NumpyAgent -from neodroidagent.utilities.exploration.ucb1 import UCB1 +from neodroidagent.utilities.exploration.sampling.ucb1 import UCB1 __author__ = "Christian Heider Nielsen" diff --git a/neodroidagent/agents/random_agent.py b/neodroidagent/agents/random_agent.py index d619ccc2..a482480c 100644 --- a/neodroidagent/agents/random_agent.py +++ b/neodroidagent/agents/random_agent.py @@ -21,7 +21,7 @@ def _sample( *args, deterministic: bool = False, metric_writer: Writer = MockWriter(), - **kwargs, + **kwargs ) -> Any: """ @@ -46,7 +46,7 @@ def __build__( observation_space: ObservationSpace = None, action_space: ActionSpace = None, signal_space: SignalSpace = None, - **kwargs, + **kwargs ) -> None: """ diff --git a/neodroidagent/agents/torch_agents/model_free/off_policy/dqn_agent.py b/neodroidagent/agents/torch_agents/model_free/off_policy/dqn_agent.py index 93c83ff6..8bbbedae 100644 --- a/neodroidagent/agents/torch_agents/model_free/off_policy/dqn_agent.py +++ b/neodroidagent/agents/torch_agents/model_free/off_policy/dqn_agent.py @@ -46,7 +46,7 @@ class DQNAgent(TorchAgent): def __init__( self, value_arch_spec: Architecture = GDKC(DuelingQMLP), - exploration_spec: GDKC = ExplorationSpecification( + exploration_spec: ExplorationSpecification = ExplorationSpecification( start=0.95, end=0.05, decay=3000 ), memory_buffer: Memory = TransitionPointPrioritisedBuffer(int(1e5)), diff --git a/neodroidagent/agents/torch_agents/model_free/off_policy/sac_agent.py b/neodroidagent/agents/torch_agents/model_free/off_policy/sac_agent.py index a8704e2b..b6083cbf 100644 --- a/neodroidagent/agents/torch_agents/model_free/off_policy/sac_agent.py +++ b/neodroidagent/agents/torch_agents/model_free/off_policy/sac_agent.py @@ -3,27 +3,27 @@ import copy -import itertools -from typing import Any, Dict, Sequence, Tuple +import itertools import numpy import torch import torch.nn as nn from torch.nn.functional import mse_loss from tqdm import tqdm +from typing import Any, Dict, Sequence, Tuple -from draugr.writers import MockWriter, Writer from draugr.torch_utilities import freeze_model, frozen_parameters, to_tensor +from draugr.writers import MockWriter, Writer from neodroid.utilities import ActionSpace, ObservationSpace, SignalSpace from neodroidagent.agents.torch_agents.torch_agent import TorchAgent from neodroidagent.common import ( Architecture, ConcatInputMLP, + Memory, SamplePoint, ShallowStdNormalMLP, TransitionPoint, TransitionPointBuffer, - Memory, ) from neodroidagent.utilities import ( ActionSpaceNotSupported, @@ -71,7 +71,7 @@ def __init__( ), critic_arch_spec: GDKC = GDKC(ConcatInputMLP), critic_criterion: callable = mse_loss, - **kwargs, + **kwargs ): """ @@ -110,7 +110,15 @@ def __init__( self.inner_update_i = 0 @drop_unused_kws - def _remember(self, *, signal, terminated, state, successor_state, sample): + def _remember( + self, + *, + signal: Any, + terminated: Any, + state: Any, + successor_state: Any, + sample: Any + ) -> None: """ @param signal: @@ -146,8 +154,8 @@ def _sample( state: Any, *args, deterministic: bool = False, - metric_writer: Writer = MockWriter(), - ) -> Tuple[Sequence, Any]: + metric_writer: Writer = MockWriter() + ) -> Tuple[torch.Tensor, Any]: """ @param state: @@ -177,7 +185,7 @@ def __build__( action_space: ActionSpace, signal_space: SignalSpace, metric_writer: Writer = MockWriter(), - print_model_repr=True, + print_model_repr: bool = True, ) -> None: """ @@ -186,12 +194,6 @@ def __build__( @param signal_space: @param metric_writer: @param print_model_repr: -@param critic_1: -@param critic_1_optimizer: -@param critic_2: -@param critic_2_optimizer: -@param actor: -@param actor_optimiser: @return: """ if action_space.is_discrete: @@ -301,7 +303,9 @@ def update_critics( return out_loss - def update_actor(self, tensorised, metric_writer: Writer = None) -> float: + def update_actor( + self, tensorised: torch.Tensor, metric_writer: Writer = None + ) -> float: """ @param tensorised: @@ -344,9 +348,13 @@ def update_actor(self, tensorised, metric_writer: Writer = None) -> float: return out_loss - def update_alpha(self, log_prob, metric_writer: Writer = None) -> float: + def update_alpha( + self, log_prob: torch.Tensor, metric_writer: Writer = None + ) -> float: """ + @param log_prob: + @type log_prob: @param tensorised: @param metric_writer: @return: @@ -405,7 +413,7 @@ def _update(self, *args, metric_writer: Writer = MockWriter(), **kwargs) -> floa if metric_writer: metric_writer.scalar("Accum_loss", accum_loss) - metric_writer.scalar("_num_inner_updates", i) + metric_writer.scalar("num_inner_updates_i", i) return accum_loss @@ -422,6 +430,8 @@ def update_targets( where \rho is polyak. (Always between 0 and 1, usually close to 1.) + @param metric_writer: + @type metric_writer: @param copy_percentage: @return: """ diff --git a/neodroidagent/agents/torch_agents/model_free/on_policy/ddpg_agent.py b/neodroidagent/agents/torch_agents/model_free/on_policy/ddpg_agent.py index a54a5744..7e1e7819 100644 --- a/neodroidagent/agents/torch_agents/model_free/on_policy/ddpg_agent.py +++ b/neodroidagent/agents/torch_agents/model_free/on_policy/ddpg_agent.py @@ -72,7 +72,7 @@ def __init__( copy_percentage: float = 0.005, actor_optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam, lr=1e-4), critic_optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam, lr=1e-2), - **kwargs, + **kwargs ): """ @@ -168,7 +168,7 @@ def models(self) -> Dict[str, Architecture]: return {"_actor": self._actor, "_critic": self._critic} def update_targets( - self, update_percentage, *, metric_writer: Writer = None + self, update_percentage: float, *, metric_writer: Writer = None ) -> None: """ diff --git a/neodroidagent/agents/torch_agents/model_free/on_policy/pg_agent.py b/neodroidagent/agents/torch_agents/model_free/on_policy/pg_agent.py index ceeb99c5..369cef10 100644 --- a/neodroidagent/agents/torch_agents/model_free/on_policy/pg_agent.py +++ b/neodroidagent/agents/torch_agents/model_free/on_policy/pg_agent.py @@ -19,6 +19,7 @@ from neodroidagent.agents.torch_agents.torch_agent import TorchAgent from neodroidagent.common import ( CategoricalMLP, + Memory, MultiDimensionalNormalMLP, SamplePoint, SampleTrajectoryBuffer, @@ -46,12 +47,14 @@ class PGAgent(TorchAgent): def __init__( self, - evaluation_function=torch.nn.CrossEntropyLoss(), - policy_arch_spec=GDKC(CategoricalMLP), - discount_factor=0.95, - optimiser_spec=GDKC(torch.optim.Adam, lr=1e-4), - scheduler_spec=GDKC(torch.optim.lr_scheduler.StepLR, step_size=100, gamma=0.65), - memory_buffer=SampleTrajectoryBuffer(), + evaluation_function: callable = torch.nn.CrossEntropyLoss(), + policy_arch_spec: GDKC = GDKC(CategoricalMLP), + discount_factor: float = 0.95, + optimiser_spec: GDKC = GDKC(torch.optim.Adam, lr=1e-4), + scheduler_spec: GDKC = GDKC( + torch.optim.lr_scheduler.StepLR, step_size=100, gamma=0.65 + ), + memory_buffer: Memory = SampleTrajectoryBuffer(), **kwargs, ) -> None: r""" diff --git a/neodroidagent/agents/torch_agents/model_free/on_policy/ppo_agent.py b/neodroidagent/agents/torch_agents/model_free/on_policy/ppo_agent.py index 7fcac129..4353794a 100644 --- a/neodroidagent/agents/torch_agents/model_free/on_policy/ppo_agent.py +++ b/neodroidagent/agents/torch_agents/model_free/on_policy/ppo_agent.py @@ -5,6 +5,7 @@ import numpy import torch +from torch.distributions import Distribution from torch.nn.functional import mse_loss from tqdm import tqdm @@ -13,7 +14,7 @@ from draugr import mean_accumulator, shuffled_batches from neodroid.utilities import ActionSpace, ObservationSpace, SignalSpace -from neodroidagent.agents.agent import ClipFeature +from neodroidagent.agents.agent import TogglableLowHigh from neodroidagent.agents.torch_agents.torch_agent import TorchAgent from neodroidagent.common import ( ActorCriticMLP, @@ -23,7 +24,6 @@ ) from neodroidagent.utilities import ( ActionSpaceNotSupported, - Distribution, is_none_or_zero_or_negative_or_mod_zero, torch_compute_gae, update_target, @@ -65,8 +65,8 @@ def __init__( optimiser_spec: GDKC = GDKC(constructor=torch.optim.Adam, lr=3e-4), continuous_arch_spec: GDKC = GDKC(ActorCriticMLP), discrete_arch_spec: GDKC = GDKC(CategoricalActorCriticMLP), - gradient_norm_clipping: ClipFeature = ClipFeature(True, 0, 0.5), - **kwargs, + gradient_norm_clipping: TogglableLowHigh = TogglableLowHigh(True, 0, 0.5), + **kwargs ) -> None: """ @@ -204,7 +204,7 @@ def _remember( terminated: Any, state: Any, successor_state: Any, - sample: Any, + sample: Any ) -> None: self._memory_buffer.add_transition_point( ValuedTransitionPoint( @@ -330,7 +330,7 @@ def _policy_loss( log_prob_batch_old, adv_batch, *, - metric_writer: Writer = None, + metric_writer: Writer = None ): action_log_probs_new = self.get_log_prob(new_distribution, action_batch) ratio = torch.exp(action_log_probs_new - log_prob_batch_old) diff --git a/neodroidagent/agents/torch_agents/torch_agent.py b/neodroidagent/agents/torch_agents/torch_agent.py index 3b998e1d..569b6a4b 100644 --- a/neodroidagent/agents/torch_agents/torch_agent.py +++ b/neodroidagent/agents/torch_agents/torch_agent.py @@ -18,8 +18,12 @@ ) from draugr import sprint from neodroid.utilities import ActionSpace, ObservationSpace, SignalSpace -from neodroidagent.agents.agent import Agent, ClipFeature +from neodroidagent.agents.agent import Agent, TogglableLowHigh from neodroidagent.common.architectures.architecture import Architecture +from neodroidagent.utilities import IntrinsicSignalProvider, MLPICM +from neodroidagent.utilities.exploration.intrinsic_signals.braindead import ( + BraindeadIntrinsicSignalProvider, +) from warg import drop_unused_kws, passes_kws_to, super_init_pass_on_kws __author__ = "Christian Heider Nielsen" @@ -40,8 +44,9 @@ def __init__( self, *, device: str = global_torch_device(True), - gradient_clipping: ClipFeature = ClipFeature(False, -1.0, 1.0), - gradient_norm_clipping: ClipFeature = ClipFeature(False, -1.0, 1.0), + gradient_clipping: TogglableLowHigh = TogglableLowHigh(False, -1.0, 1.0), + gradient_norm_clipping: TogglableLowHigh = TogglableLowHigh(False, -1.0, 1.0), + intrinsic_signal_provider_arch: IntrinsicSignalProvider = BraindeadIntrinsicSignalProvider, **kwargs, ): """ @@ -52,7 +57,9 @@ def __init__( @param grad_clip_high: @param kwargs: """ - super().__init__(**kwargs) + super().__init__( + intrinsic_signal_provider_arch=intrinsic_signal_provider_arch, **kwargs + ) self._gradient_clipping = gradient_clipping self._gradient_norm_clipping = gradient_norm_clipping self._device = torch.device( diff --git a/neodroidagent/common/memory/memory.py b/neodroidagent/common/memory/memory.py index 1c81dcef..9a8767d0 100644 --- a/neodroidagent/common/memory/memory.py +++ b/neodroidagent/common/memory/memory.py @@ -56,6 +56,7 @@ def add(self, value: Any) -> None: def __len__(self) -> int: raise NotImplementedError + @property @abstractmethod def capacity(self) -> int: raise NotImplementedError diff --git a/neodroidagent/configs/base_config.py b/neodroidagent/configs/base_config.py index 86ebd264..80d038fb 100644 --- a/neodroidagent/configs/base_config.py +++ b/neodroidagent/configs/base_config.py @@ -9,7 +9,7 @@ from draugr.torch_utilities import global_torch_device from neodroidagent import PROJECT_NAME -from neodroidagent.agents.agent import ClipFeature +from neodroidagent.agents.agent import TogglableLowHigh from warg.gdkc import GDKC __author__ = "Christian Heider Nielsen" @@ -38,10 +38,10 @@ LOAD_PREVIOUS_MODEL_IF_AVAILABLE = False # Clipping -SIGNAL_CLIPPING = ClipFeature(False, -1.0, 1.0) -ACTION_CLIPPING = ClipFeature(False, -1.0, 1.0) -GRADIENT_CLIPPING = ClipFeature(False, -1.0, 1.0) -GRADIENT_NORM_CLIPPING = ClipFeature(False, 0, 1.0) +SIGNAL_CLIPPING = TogglableLowHigh(False, -1.0, 1.0) +ACTION_CLIPPING = TogglableLowHigh(False, -1.0, 1.0) +GRADIENT_CLIPPING = TogglableLowHigh(False, -1.0, 1.0) +GRADIENT_NORM_CLIPPING = TogglableLowHigh(False, 0, 1.0) DISCOUNT_FACTOR = 0.99 RENDER_FREQUENCY = 50 diff --git a/neodroidagent/entry_points/agent_tests/torch_agent_tests/ppo_test.py b/neodroidagent/entry_points/agent_tests/torch_agent_tests/ppo_test.py index 4e51b5ef..13ea9e21 100644 --- a/neodroidagent/entry_points/agent_tests/torch_agent_tests/ppo_test.py +++ b/neodroidagent/entry_points/agent_tests/torch_agent_tests/ppo_test.py @@ -30,12 +30,17 @@ BATCH_SIZE = 256 -GRADIENT_NORM_CLIPPING = ClipFeature(True, 0, 0.1) +GRADIENT_NORM_CLIPPING = TogglableLowHigh(True, 0, 0.1) ppo_config = globals() def ppo_test(config=ppo_config): + """ + + @param config: + @type config: + """ ppo_run(environment_type="gym", config=config) @@ -44,6 +49,15 @@ def ppo_run( environment_type: Union[bool, str] = True, config=ppo_config, ): + """ + + @param skip_confirmation: + @type skip_confirmation: + @param environment_type: + @type environment_type: + @param config: + @type config: + """ session_factory( PPOAgent, config, diff --git a/neodroidagent/utilities/exploration/README.md b/neodroidagent/utilities/exploration/README.md index f0b3e113..aec5dd13 100644 --- a/neodroidagent/utilities/exploration/README.md +++ b/neodroidagent/utilities/exploration/README.md @@ -1 +1,4 @@ # Exploration + +## Random Sampling Processes +## Intrinsic Signal Providers diff --git a/neodroidagent/utilities/exploration/__init__.py b/neodroidagent/utilities/exploration/__init__.py index 418ce9f9..aaed3d01 100644 --- a/neodroidagent/utilities/exploration/__init__.py +++ b/neodroidagent/utilities/exploration/__init__.py @@ -4,10 +4,6 @@ __author__ = "Christian Heider Nielsen" __doc__ = "" -from .curiosity_module import * -from .epsilon import * +from .intrinsic_signals import * from .exploration_specification import * -from .icm import * -from .regularisation import * from .sampling import * -from .ucb1 import * diff --git a/neodroidagent/utilities/exploration/curiosity_module.py b/neodroidagent/utilities/exploration/curiosity_module.py deleted file mode 100644 index 46bb31fa..00000000 --- a/neodroidagent/utilities/exploration/curiosity_module.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -from abc import ABCMeta - -from torch import nn - -__author__ = "Christian Heider Nielsen" -__doc__ = r""" -""" - -__all__ = ["CuriosityMeta", "CuriosityModule"] - - -class CuriosityMeta(metaclass=ABCMeta): - pass - - -class CuriosityModule(CuriosityMeta, nn.Module): - pass diff --git a/neodroidagent/utilities/exploration/epsilon.py b/neodroidagent/utilities/exploration/epsilon.py deleted file mode 100644 index 1852b3c4..00000000 --- a/neodroidagent/utilities/exploration/epsilon.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -import numpy - -__author__ = "Christian Heider Nielsen" -__doc__ = r""" - - Created on 09/10/2019 - """ - -__all__ = ["exploration_action"] - - -def exploration_action(agent, state): - """ -choose an action based on state with random noise added for exploration in training - -:param agent: -:param state: -:return: -""" - - softmax_action = agent._sample_model(state) - epsilon = agent.epsilon_end + (agent.epsilon_start - agent.epsilon_end) * numpy.exp( - -1.0 * agent._step_i / agent.epsilon_decay - ) - if numpy.random.rand() < epsilon: - action = numpy.random.choice(agent.action_dim) - else: - action = numpy.argmax(softmax_action) - return action diff --git a/neodroidagent/utilities/exploration/intrinsic_signals/__init__.py b/neodroidagent/utilities/exploration/intrinsic_signals/__init__.py new file mode 100644 index 00000000..b0a8e246 --- /dev/null +++ b/neodroidagent/utilities/exploration/intrinsic_signals/__init__.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +__author__ = "Christian Heider Nielsen" +__doc__ = r""" + + Created on 25/04/2020 + """ + +from .intrinsic_signal_provider import * +from .torch_isp import * diff --git a/neodroidagent/utilities/exploration/intrinsic_signals/braindead.py b/neodroidagent/utilities/exploration/intrinsic_signals/braindead.py new file mode 100644 index 00000000..7adce198 --- /dev/null +++ b/neodroidagent/utilities/exploration/intrinsic_signals/braindead.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +__author__ = "Christian Heider Nielsen" +__doc__ = r""" + + Created on 25/04/2020 + """ + +__all__ = ["BraindeadIntrinsicSignalProvider"] + +from abc import abstractmethod +from typing import Sequence + +from draugr.writers import Writer +from neodroid.utilities import ( + ActionSpace, + EnvironmentSnapshot, + ObservationSpace, + SignalSpace, +) +from neodroidagent.utilities import IntrinsicSignalProvider +from warg import drop_unused_kws + + +class BraindeadIntrinsicSignalProvider(IntrinsicSignalProvider): + def sample( + self, + environment_snapshot: EnvironmentSnapshot, + *, + writer: Writer = None, + **kwargs + ) -> Sequence: + """ + + @param environment_snapshot: + @type environment_snapshot: + @param writer: + @type writer: + @param kwargs: + @type kwargs: + @return: + @rtype: + """ + return self._signal_space.n * [0] diff --git a/neodroidagent/utilities/exploration/intrinsic_signals/intrinsic_signal_provider.py b/neodroidagent/utilities/exploration/intrinsic_signals/intrinsic_signal_provider.py new file mode 100644 index 00000000..c2fa166d --- /dev/null +++ b/neodroidagent/utilities/exploration/intrinsic_signals/intrinsic_signal_provider.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +__author__ = "Christian Heider Nielsen" +__doc__ = r""" + + Created on 25/04/2020 + """ + +__all__ = ["IntrinsicSignalProvider"] + +from abc import abstractmethod +from typing import Sequence + +from draugr.writers import Writer +from neodroid.utilities import ( + ActionSpace, + EnvironmentSnapshot, + ObservationSpace, + SignalSpace, +) +from warg import drop_unused_kws + + +class IntrinsicSignalProvider: + """ + A callable module that congests observations and provide augmented signals external to the + environment/MDP provided objective signals for + the + learning + control model + """ + + @drop_unused_kws + def __init__( + self, + observation_space: ObservationSpace, + action_space: ActionSpace, + signal_space: SignalSpace, + ): + """ + + @param observation_space: + @type observation_space: + @param action_space: + @type action_space: + @param signal_space: + @type signal_space: + """ + self._observation_space = observation_space + self._action_space = action_space + self._signal_space = signal_space + + def __call__(self, environment_snapshot: EnvironmentSnapshot) -> Sequence: + """ + + @param environment_snapshot: + @type environment_snapshot: + @return: + @rtype: + """ + return self.sample(environment_snapshot) + + @abstractmethod + def sample( + self, + environment_snapshot: EnvironmentSnapshot, + *, + writer: Writer = None, + **kwargs + ) -> Sequence: + """ + + @param environment_snapshot: + @type environment_snapshot: + @param writer: + @type writer: + @param kwargs: + @type kwargs: + """ + raise NotImplemented diff --git a/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/__init__.py b/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/__init__.py new file mode 100644 index 00000000..c3be0f63 --- /dev/null +++ b/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/__init__.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +__author__ = "Christian Heider Nielsen" +__doc__ = r""" + + Created on 25/04/2020 + """ + +from .bored_module import * +from .torch_isp_module import * +from .dopamine_module import * +from .curiosity import * diff --git a/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/bored_module.py b/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/bored_module.py new file mode 100644 index 00000000..85247829 --- /dev/null +++ b/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/bored_module.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +from abc import ABCMeta + +__author__ = "Christian Heider Nielsen" +__doc__ = r""" +""" + +__all__ = ["BoredISP"] + +from draugr.writers import Writer +from neodroid.utilities import EnvironmentSnapshot +from neodroidagent.utilities.exploration.intrinsic_signals.intrinsic_signal_provider import ( + IntrinsicSignalProvider, +) +from neodroidagent.utilities.exploration.intrinsic_signals.torch_isp.dopamine_module import ( + TorchISPMeta, +) + + +class BoredISP(IntrinsicSignalProvider, TorchISPMeta): + def sample( + self, + environment_snapshot: EnvironmentSnapshot, + *, + writer: Writer = None, + **kwargs + ): + return 0 diff --git a/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/curiosity/__init__.py b/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/curiosity/__init__.py new file mode 100644 index 00000000..faa113de --- /dev/null +++ b/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/curiosity/__init__.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +__author__ = "Christian Heider Nielsen" +__doc__ = r""" + + Created on 25/04/2020 + """ + +from .icm import * diff --git a/neodroidagent/utilities/exploration/icm.py b/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/curiosity/icm.py similarity index 65% rename from neodroidagent/utilities/exploration/icm.py rename to neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/curiosity/icm.py index 2ace8f1b..fda778ab 100644 --- a/neodroidagent/utilities/exploration/icm.py +++ b/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/curiosity/icm.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +from typing import Tuple import numpy import torch @@ -8,14 +9,18 @@ from torch.nn import CrossEntropyLoss, MSELoss from draugr.torch_utilities.tensors.to_tensor import to_tensor -from neodroid.utilities import ActionSpace, ObservationSpace -from neodroidagent.utilities.exploration.curiosity_module import CuriosityModule +from draugr.writers import Writer +from neodroid.utilities import ActionSpace, ObservationSpace, SignalSpace __author__ = "Christian Heider Nielsen" __doc__ = r""" """ __all__ = ["ForwardModel", "InverseModel", "MLPICM"] +from neodroidagent.utilities.exploration.intrinsic_signals.torch_isp.torch_isp_module import ( + TorchISPModule, +) + class ForwardModel(nn.Module): """ @@ -47,7 +52,7 @@ def __init__(self, action_converter: ActionSpace, state_latent_features: int): nn.Linear(128, state_latent_features), ) - def forward(self, state_latent: torch.Tensor, action: torch.Tensor): + def forward(self, state_latent: torch.Tensor, action: torch.Tensor) -> torch.Tensor: """ @param state_latent: @@ -68,6 +73,13 @@ class InverseModel(nn.Module): """ def __init__(self, action_space: ActionSpace, state_latent_features: int): + """ + + @param action_space: + @type action_space: + @param state_latent_features: + @type state_latent_features: + """ super().__init__() self.input = nn.Sequential( nn.Linear(state_latent_features * 2, 128), @@ -77,11 +89,22 @@ def __init__(self, action_space: ActionSpace, state_latent_features: int): nn.Linear(128, action_space.n), ) - def forward(self, state_latent: torch.Tensor, next_state_latent: torch.Tensor): + def forward( + self, state_latent: torch.Tensor, next_state_latent: torch.Tensor + ) -> torch.Tensor: + """ + + @param state_latent: + @type state_latent: + @param next_state_latent: + @type next_state_latent: + @return: + @rtype: + """ return self.input(torch.cat((state_latent, next_state_latent), dim=-1)) -class MLPICM(CuriosityModule): +class MLPICM(TorchISPModule): """ Implements the Intrinsic Curiosity Module described in paper: https://arxiv.org/pdf/1705.05363.pdf @@ -98,22 +121,20 @@ def __init__( self, observation_space: ObservationSpace, action_space: ActionSpace, + signal_space: SignalSpace, policy_weight: float, - reward_scale: float, weight: float, intrinsic_reward_integration: float, + hidden_dim: int = 128, ): """ :param policy_weight: weight to be applied to the ``policy_loss`` in the ``loss`` method. Allows to control how important optimizing policy to optimizing the curiosity module -:param reward_scale: scales the intrinsic reward returned by this module. Can be used to control how -big the -intrinsic reward is +:param signal_space: used for scaling the intrinsic reward returned by this module. Can be used to control how +the fluctuation scale of the intrinsic signal :param weight: balances the importance between forward and inverse model -:param intrinsic_reward_integration: balances the importance between extrinsic and intrinsic reward. -Used when -incorporating intrinsic into extrinsic in the ``reward`` method +:param intrinsic_reward_integration: balances the importance between extrinsic and intrinsic signal. """ assert ( @@ -122,41 +143,68 @@ def __init__( assert ( len(action_space.shape) == 1 ), "Only flat action spaces supported by MLP model" - super().__init__() + super().__init__(observation_space, action_space, signal_space) - self.input_state_shape = observation_space - self.input_action_shape = action_space self.policy_weight = policy_weight - self.reward_scale = reward_scale + self.reward_scale = signal_space.span self.weight = weight - self.intrinsic_reward_integration = intrinsic_reward_integration + self.intrinsic_signal_integration = intrinsic_reward_integration self.encoder = nn.Sequential( - nn.Linear(observation_space.shape[0], 128), + nn.Linear(observation_space.shape[0], hidden_dim), nn.ReLU(inplace=True), - nn.Linear(128, 128), + nn.Linear(hidden_dim, hidden_dim), nn.ReLU(inplace=True), - nn.Linear(128, 128), + nn.Linear(hidden_dim, hidden_dim), ) - self.forward_model = ForwardModel(action_space, 128) - self.inverse_model = InverseModel(action_space, 128) + self.forward_model = ForwardModel(action_space, hidden_dim) + self.inverse_model = InverseModel(action_space, hidden_dim) self.a_loss = CrossEntropyLoss() self.a_loss = MSELoss() def forward( self, state: torch.Tensor, next_state: torch.Tensor, action: torch.Tensor - ): + ) -> Tuple: + """ + + @param state: + @type state: + @param next_state: + @type next_state: + @param action: + @type action: + @return: + @rtype: + """ state = self.encoder(state) next_state = self.encoder(next_state) next_state_hat = self.forward_model(state, action) action_hat = self.inverse_model(state, next_state) return next_state, next_state_hat, action_hat - def reward( - self, rewards: numpy.ndarray, states: numpy.ndarray, actions: numpy.ndarray + def sample( + self, + signals: numpy.ndarray, + states: numpy.ndarray, + actions: numpy.ndarray, + *, + writer: Writer = None ) -> numpy.ndarray: + """ + + @param signals: + @type signals: + @param states: + @type states: + @param actions: + @type actions: + @param writer: + @type writer: + @return: + @rtype: + """ n, t = actions.shape[0], actions.shape[1] states, next_states = states[:, :-1], states[:, 1:] states = to_tensor( @@ -169,21 +217,24 @@ def reward( next_states_latent, next_states_hat, _ = self.forward( states, next_states, actions ) - intrinsic_reward = ( - self.reward_scale - / 2 - * (next_states_hat - next_states_latent).norm(2, dim=-1).pow(2) + intrinsic_signal = ( + ( + self.reward_scale + / 2 + * (next_states_hat - next_states_latent).norm(2, dim=-1).pow(2) + ) + .cpu() + .detach() + .numpy() + .reshape(n, t) ) - intrinsic_reward = intrinsic_reward.cpu().detach().numpy().reshape(n, t) - return ( - 1.0 - self.intrinsic_reward_integration - ) * rewards + self.intrinsic_reward_integration * intrinsic_reward + if writer is not None: + writer.scalar("icm/signal", intrinsic_signal.mean().item()) - # self.reporter.scalar('icm/reward', - # intrinsic_reward.mean().item() - # if self.reporter.will_report('icm/reward') - # else 0) + return ( + 1.0 - self.intrinsic_signal_integration + ) * signals + self.intrinsic_signal_integration * intrinsic_signal def loss( self, @@ -191,7 +242,24 @@ def loss( states: torch.Tensor, next_states: torch.Tensor, actions: torch.Tensor, + *, + writer: Writer = None ) -> torch.Tensor: + """ + + @param policy_loss: + @type policy_loss: + @param states: + @type states: + @param next_states: + @type next_states: + @param actions: + @type actions: + @param writer: + @type writer: + @return: + @rtype: + """ next_states_latent, next_states_hat, actions_hat = self.forward( states, next_states, actions ) @@ -206,6 +274,7 @@ def loss( inverse_loss = self.a_loss(ca, actions) curiosity_loss = self.weight * forward_loss + (1 - self.weight) * inverse_loss - return self.policy_weight * policy_loss + curiosity_loss + if writer is not None: + writer.scalar("icm/loss", curiosity_loss.item()) - # self.reporter.scalar('icm/loss', curiosity_loss.item()) + return self.policy_weight * policy_loss + curiosity_loss diff --git a/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/dopamine_module.py b/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/dopamine_module.py new file mode 100644 index 00000000..c006698b --- /dev/null +++ b/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/dopamine_module.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +from abc import ABCMeta + +__author__ = "Christian Heider Nielsen" +__doc__ = r""" +""" + +__all__ = ["DopamineISP"] + +from draugr.writers import Writer +from neodroid.utilities import EnvironmentSnapshot +from neodroidagent.utilities.exploration.intrinsic_signals.intrinsic_signal_provider import ( + IntrinsicSignalProvider, +) +from neodroidagent.utilities.exploration.intrinsic_signals.torch_isp.torch_isp_module import ( + TorchISPMeta, +) + + +class DopamineISP(IntrinsicSignalProvider, TorchISPMeta): + def sample( + self, + environment_snapshot: EnvironmentSnapshot, + *, + writer: Writer = None, + **kwargs + ): + return 0 diff --git a/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/torch_isp_module.py b/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/torch_isp_module.py new file mode 100644 index 00000000..f0ab950f --- /dev/null +++ b/neodroidagent/utilities/exploration/intrinsic_signals/torch_isp/torch_isp_module.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +from abc import ABCMeta + +__author__ = "Christian Heider Nielsen" +__doc__ = r""" +""" + +__all__ = ["TorchISPMeta", "TorchISPModule"] + +from draugr.writers import Writer +from neodroid.utilities import EnvironmentSnapshot +from neodroidagent.utilities.exploration.intrinsic_signals.intrinsic_signal_provider import ( + IntrinsicSignalProvider, +) + + +class TorchISPMeta(metaclass=ABCMeta): + pass + + +class TorchISPModule(IntrinsicSignalProvider, TorchISPMeta): + def sample( + self, + environment_snapshot: EnvironmentSnapshot, + *, + writer: Writer = None, + **kwargs + ): + raise NotImplemented diff --git a/neodroidagent/utilities/exploration/sampling/__init__.py b/neodroidagent/utilities/exploration/sampling/__init__.py index 4b1ad8cc..bf20f8c6 100644 --- a/neodroidagent/utilities/exploration/sampling/__init__.py +++ b/neodroidagent/utilities/exploration/sampling/__init__.py @@ -8,3 +8,4 @@ from .random_process import * from .set_sampling import * from .snake_space_filling import * +from .ucb1 import * diff --git a/neodroidagent/utilities/exploration/sampling/random_process/annealed_guassian.py b/neodroidagent/utilities/exploration/sampling/random_process/annealed_guassian.py index a0ad6a4c..63513d12 100644 --- a/neodroidagent/utilities/exploration/sampling/random_process/annealed_guassian.py +++ b/neodroidagent/utilities/exploration/sampling/random_process/annealed_guassian.py @@ -4,9 +4,20 @@ from .random_process import RandomProcess +__all__ = ["AnnealedGaussianProcess"] + class AnnealedGaussianProcess(RandomProcess): + """ + + """ + def sample(self, size): + """ + + @param size: + @type size: + """ pass def __init__(self, mean, sigma, sigma_min, n_steps_annealing, **kwargs): @@ -26,10 +37,18 @@ def __init__(self, mean, sigma, sigma_min, n_steps_annealing, **kwargs): self.sigma_min = sigma def reset(self): + """ + + """ self.n_steps = 0 @property def current_sigma(self): + """ + + @return: + @rtype: + """ sigma = max(self.sigma_min, self.m * float(self.n_steps) + self.c) return sigma diff --git a/neodroidagent/utilities/exploration/sampling/random_process/bounded_triangle_sample.py b/neodroidagent/utilities/exploration/sampling/random_process/bounded_triangle_sample.py index 56898c03..a2cd5715 100644 --- a/neodroidagent/utilities/exploration/sampling/random_process/bounded_triangle_sample.py +++ b/neodroidagent/utilities/exploration/sampling/random_process/bounded_triangle_sample.py @@ -5,6 +5,8 @@ import numpy +__all__ = ["bounded_triangle_sample"] + def bounded_triangle_sample(a_set, mean=0.5, number=1): l = len(a_set) diff --git a/neodroidagent/utilities/exploration/sampling/random_process/ornstein_uhlenbeck.py b/neodroidagent/utilities/exploration/sampling/random_process/ornstein_uhlenbeck.py index 5acbf586..f71dbeb6 100644 --- a/neodroidagent/utilities/exploration/sampling/random_process/ornstein_uhlenbeck.py +++ b/neodroidagent/utilities/exploration/sampling/random_process/ornstein_uhlenbeck.py @@ -7,6 +7,8 @@ # Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab import numpy +__all__ = ["OrnsteinUhlenbeckProcess"] + class OrnsteinUhlenbeckProcess(AnnealedGaussianProcess): def __init__( diff --git a/neodroidagent/utilities/exploration/sampling/random_process/random_process.py b/neodroidagent/utilities/exploration/sampling/random_process/random_process.py index e4dd198b..605ca9a5 100644 --- a/neodroidagent/utilities/exploration/sampling/random_process/random_process.py +++ b/neodroidagent/utilities/exploration/sampling/random_process/random_process.py @@ -3,6 +3,7 @@ from abc import ABC __author__ = "Christian Heider Nielsen" +__all__ = ["RandomProcess"] class RandomProcess(ABC): diff --git a/neodroidagent/utilities/exploration/sampling/random_process/random_walk.py b/neodroidagent/utilities/exploration/sampling/random_process/random_walk.py index 3ba8c69e..e8a38245 100644 --- a/neodroidagent/utilities/exploration/sampling/random_process/random_walk.py +++ b/neodroidagent/utilities/exploration/sampling/random_process/random_walk.py @@ -6,6 +6,8 @@ import random +__all__ = ["RandomWalk"] + class RandomWalk(RandomProcess): def reset(self): @@ -18,12 +20,12 @@ def sample(self, size=1): return random.choice(self.options) -def main(): - random_process = RandomWalk() +if __name__ == "__main__": - for i in range(1000): - print(random_process.sample()) + def main(): + random_process = RandomWalk() + for i in range(1000): + print(random_process.sample()) -if __name__ == "__main__": main() diff --git a/neodroidagent/utilities/exploration/sampling/random_process/self_avoiding.py b/neodroidagent/utilities/exploration/sampling/random_process/self_avoiding.py index aa3e5de2..16602945 100644 --- a/neodroidagent/utilities/exploration/sampling/random_process/self_avoiding.py +++ b/neodroidagent/utilities/exploration/sampling/random_process/self_avoiding.py @@ -9,6 +9,9 @@ import numpy +__all__ = ["SelfAvoiding"] + + class SelfAvoiding(RandomProcess): def __init__(self, num_of_options=4, n=10): self.num_of_options = num_of_options @@ -52,12 +55,12 @@ def reset(self): self.y = self.n // 2 -def main(n=5, trials=3): - r = SelfAvoiding() +if __name__ == "__main__": - for t in range(trials): - print(r.sample()) + def main(n=5, trials=3): + r = SelfAvoiding() + for t in range(trials): + print(r.sample()) -if __name__ == "__main__": main() diff --git a/neodroidagent/utilities/exploration/sampling/random_process/wiener.py b/neodroidagent/utilities/exploration/sampling/random_process/wiener.py index 09dc894e..e6686f21 100644 --- a/neodroidagent/utilities/exploration/sampling/random_process/wiener.py +++ b/neodroidagent/utilities/exploration/sampling/random_process/wiener.py @@ -10,6 +10,8 @@ import numpy from scipy.stats import norm +__all__ = ["WienerProcess", "wiener"] + class WienerProcess(RandomProcess): def __init__(self, delta, dt, initial, size=1): @@ -87,124 +89,118 @@ def wiener(x0, n, dt, delta, out=None): return out -def main_1d(): - # The Wiener process parameter. - delta = 0.1 - # Total time. - T = 1 - # Number of steps. - N = 50 - # Time step size - dt = T / N - # Number of realizations to generate. - m = 5 - # Create an empty array to store the realizations. - x = numpy.empty((m, N + 1)) - # Initial values of x. - x[:, 0] = 0 - - wiener(x[:, 0], N, dt, delta, out=x[:, 1:]) - - t = numpy.linspace(0.0, N * dt, N + 1) - plot_1d_trajectory(x, t, m) - - -def main_2d(): - # The Wiener process parameter. - delta = 0.25 - # Total time. - T = 1.0 - # Number of steps. - N = 50 - # Time step size - dt = T / N - # Initial values of x. - x = numpy.empty((2, N + 1)) - x[:, 0] = 0.0 - - wiener(x[:, 0], N, dt, delta, out=x[:, 1:]) - - plot_2d_trajectory(x) - - -def main_3d(): - # The Wiener process parameter. - delta = 0.25 - # Total time. - T = 1.0 - # Number of steps. - N = 1000 - # Time step size - dt = T / N - # Initial values of x. - x = numpy.zeros((3, N + 1)) - - wiener(x[:, 0], N, dt, delta, out=x[:, 1:]) - - plot_3d_trajectory(x) - - -def plot_3d_trajectory(x): - fig = pyplot.figure() - ax = fig.add_subplot(111, projection="3d") - ax.plot(x[0], x[1], x[2]) - - ax.scatter(x[0, 0], x[1, 0], x[2, 0], "go") - ax.scatter(x[0, -1], x[1, -1], x[2, -1], "ro") - - ax.set_xlabel("X Label") - ax.set_ylabel("Y Label") - ax.set_zlabel("Z Label") - - pyplot.title("3D Brownian Motion") - pyplot.axis("equal") - pyplot.show() - - -def plot_1d_trajectory(x, t, m): - for k in range(m): - pyplot.plot(t, x[k]) - pyplot.xlabel("t", fontsize=16) - pyplot.ylabel("x", fontsize=16) - pyplot.grid(True) - pyplot.show() - - -def plot_2d_trajectory(x): - # Plot the 2D trajectory. - pyplot.plot(x[0], x[1]) - - # Mark the start and end points. - pyplot.plot(x[0, 0], x[1, 0], "go") - pyplot.plot(x[0, -1], x[1, -1], "ro") - - # More plot decorations. - pyplot.title("2D Brownian Motion") - pyplot.xlabel("x", fontsize=16) - pyplot.ylabel("y", fontsize=16) - pyplot.axis("equal") - pyplot.grid(True) - pyplot.show() - - -def main_class(): - # The Wiener process parameter. - delta = 1 - # Total time. - T = 1.0 - # Number of steps. - N = 50 - # Time step size - dt = T / N - # Initial values of x. - x = numpy.empty((2, N + 1)) - x[:, 0] = 0.0 - - brownian = WienerProcess(delta, dt, 0) - - for i in range(N): - print(brownian.sample()) +if __name__ == "__main__": + def main_1d(): + # The Wiener process parameter. + delta = 0.1 + # Total time. + T = 1 + # Number of steps. + N = 50 + # Time step size + dt = T / N + # Number of realizations to generate. + m = 5 + # Create an empty array to store the realizations. + x = numpy.empty((m, N + 1)) + # Initial values of x. + x[:, 0] = 0 + + wiener(x[:, 0], N, dt, delta, out=x[:, 1:]) + + t = numpy.linspace(0.0, N * dt, N + 1) + plot_1d_trajectory(x, t, m) + + def main_2d(): + # The Wiener process parameter. + delta = 0.25 + # Total time. + T = 1.0 + # Number of steps. + N = 50 + # Time step size + dt = T / N + # Initial values of x. + x = numpy.empty((2, N + 1)) + x[:, 0] = 0.0 + + wiener(x[:, 0], N, dt, delta, out=x[:, 1:]) + + plot_2d_trajectory(x) + + def main_3d(): + # The Wiener process parameter. + delta = 0.25 + # Total time. + T = 1.0 + # Number of steps. + N = 1000 + # Time step size + dt = T / N + # Initial values of x. + x = numpy.zeros((3, N + 1)) + + wiener(x[:, 0], N, dt, delta, out=x[:, 1:]) + + plot_3d_trajectory(x) + + def plot_3d_trajectory(x): + fig = pyplot.figure() + ax = fig.add_subplot(111, projection="3d") + ax.plot(x[0], x[1], x[2]) + + ax.scatter(x[0, 0], x[1, 0], x[2, 0], "go") + ax.scatter(x[0, -1], x[1, -1], x[2, -1], "ro") + + ax.set_xlabel("X Label") + ax.set_ylabel("Y Label") + ax.set_zlabel("Z Label") + + pyplot.title("3D Brownian Motion") + pyplot.axis("equal") + pyplot.show() + + def plot_1d_trajectory(x, t, m): + for k in range(m): + pyplot.plot(t, x[k]) + pyplot.xlabel("t", fontsize=16) + pyplot.ylabel("x", fontsize=16) + pyplot.grid(True) + pyplot.show() + + def plot_2d_trajectory(x): + # Plot the 2D trajectory. + pyplot.plot(x[0], x[1]) + + # Mark the start and end points. + pyplot.plot(x[0, 0], x[1, 0], "go") + pyplot.plot(x[0, -1], x[1, -1], "ro") + + # More plot decorations. + pyplot.title("2D Brownian Motion") + pyplot.xlabel("x", fontsize=16) + pyplot.ylabel("y", fontsize=16) + pyplot.axis("equal") + pyplot.grid(True) + pyplot.show() + + def main_class(): + # The Wiener process parameter. + delta = 1 + # Total time. + T = 1.0 + # Number of steps. + N = 50 + # Time step size + dt = T / N + # Initial values of x. + x = numpy.empty((2, N + 1)) + x[:, 0] = 0.0 + + brownian = WienerProcess(delta, dt, 0) + + for i in range(N): + print(brownian.sample()) -if __name__ == "__main__": main_3d() diff --git a/neodroidagent/utilities/exploration/sampling/set_sampling.py b/neodroidagent/utilities/exploration/sampling/set_sampling.py index e4068b93..1b7e2c4c 100644 --- a/neodroidagent/utilities/exploration/sampling/set_sampling.py +++ b/neodroidagent/utilities/exploration/sampling/set_sampling.py @@ -6,8 +6,17 @@ import numpy +__all__ = ["sample"] + def sample(iter_set: iter) -> Any: + """ + + @param iter_set: + @type iter_set: + @return: + @rtype: + """ a = list(iter_set) if len(a): idx = numpy.random.randint(0, len(a)) diff --git a/neodroidagent/utilities/exploration/sampling/snake_space_filling.py b/neodroidagent/utilities/exploration/sampling/snake_space_filling.py index d1328795..727da2ad 100644 --- a/neodroidagent/utilities/exploration/sampling/snake_space_filling.py +++ b/neodroidagent/utilities/exploration/sampling/snake_space_filling.py @@ -25,6 +25,9 @@ class States(Enum): def snake_space_filling_generator(): + """ + + """ x = 0 y = 0 state = States.expand_x diff --git a/neodroidagent/utilities/exploration/ucb1.py b/neodroidagent/utilities/exploration/sampling/ucb1.py similarity index 80% rename from neodroidagent/utilities/exploration/ucb1.py rename to neodroidagent/utilities/exploration/sampling/ucb1.py index 0c2c093e..f93e6a8b 100644 --- a/neodroidagent/utilities/exploration/ucb1.py +++ b/neodroidagent/utilities/exploration/sampling/ucb1.py @@ -8,7 +8,7 @@ class UCB1: """ - + upper confidence bound """ @staticmethod @@ -108,25 +108,29 @@ def train(self, arms, rollouts: int = 1000) -> NOD: if __name__ == "__main__": - import random - class NormalDistributionArm: - def __init__(self, mu, sigma): - self.mu = mu - self.sigma = sigma + def main(): + import random + + class NormalDistributionArm: + def __init__(self, mu, sigma): + self.mu = mu + self.sigma = sigma + + def draw(self): + return random.gauss(self.mu, self.sigma) - def draw(self): - return random.gauss(self.mu, self.sigma) + arms = [ + NormalDistributionArm(4.01, 2.0), + NormalDistributionArm(4, 2.0), + NormalDistributionArm(3.99, 2.0), + ] - arms = [ - NormalDistributionArm(4.01, 2.0), - NormalDistributionArm(4, 2.0), - NormalDistributionArm(3.99, 2.0), - ] + ucb1 = UCB1(len(arms)) - ucb1 = UCB1(len(arms)) + ucb1.train(arms) - ucb1.train(arms) + print(ucb1.counts) + print(ucb1.values) - print(ucb1.counts) - print(ucb1.values) + main() diff --git a/neodroidagent/utilities/misc/checks.py b/neodroidagent/utilities/misc/checks.py index d74c5657..9544d366 100644 --- a/neodroidagent/utilities/misc/checks.py +++ b/neodroidagent/utilities/misc/checks.py @@ -7,10 +7,13 @@ Created on 11/02/2020 """ +from typing import Any, Sequence from warnings import warn +__all__ = ["check_tensorised_shapes"] -def check_tensorised_shapes(tensorised) -> None: + +def check_tensorised_shapes(tensorised: Sequence) -> None: aa = iter(tensorised) a = next(aa).shape[:-1] try: diff --git a/neodroidagent/utilities/misc/environment_model/environment_model.py b/neodroidagent/utilities/misc/environment_model/environment_model.py index dc047bdc..ccf66a4f 100644 --- a/neodroidagent/utilities/misc/environment_model/environment_model.py +++ b/neodroidagent/utilities/misc/environment_model/environment_model.py @@ -11,6 +11,8 @@ import numpy +__all__ = ["EnvModel"] + class EnvModel(object): """ diff --git a/neodroidagent/utilities/misc/environment_model/environment_utilities.py b/neodroidagent/utilities/misc/environment_model/environment_utilities.py index 55e78d53..185d4048 100644 --- a/neodroidagent/utilities/misc/environment_model/environment_utilities.py +++ b/neodroidagent/utilities/misc/environment_model/environment_utilities.py @@ -3,6 +3,17 @@ import gym import numpy +__all__ = [ + "get_gym_environs", + "get_gym_stats", + "is_tuple", + "is_continuous", + "is_multidimensional", + "obs_stats", + "action_stats", + "env_stats", +] + def get_gym_environs(): """ List all valid OpenAI ``gym`` environment ids. """ diff --git a/neodroidagent/utilities/misc/environment_model/tiles.py b/neodroidagent/utilities/misc/environment_model/tiles.py index 6a8ddd31..258b4956 100644 --- a/neodroidagent/utilities/misc/environment_model/tiles.py +++ b/neodroidagent/utilities/misc/environment_model/tiles.py @@ -28,10 +28,14 @@ from itertools import zip_longest from math import floor +from typing import Tuple + import numpy basehash = hash +__all__ = ["tile_state_space", "IHT", "hashcoords", "tiles", "tileswrap"] + def tile_state_space( env, @@ -41,7 +45,7 @@ def tile_state_space( obs_min=None, state_action=False, grid_size=(4, 4), -): +) -> Tuple: """ Return a function to encode the continous observations generated by `env` in terms of a collection of `n_tilings` overlapping tilings (each with diff --git a/neodroidagent/utilities/exploration/regularisation/README.md b/neodroidagent/utilities/misc/regularisation/README.md similarity index 100% rename from neodroidagent/utilities/exploration/regularisation/README.md rename to neodroidagent/utilities/misc/regularisation/README.md diff --git a/neodroidagent/utilities/exploration/regularisation/__init__.py b/neodroidagent/utilities/misc/regularisation/__init__.py similarity index 100% rename from neodroidagent/utilities/exploration/regularisation/__init__.py rename to neodroidagent/utilities/misc/regularisation/__init__.py diff --git a/neodroidagent/utilities/misc/sampling.py b/neodroidagent/utilities/misc/sampling.py index 17f5470c..c9a914ed 100644 --- a/neodroidagent/utilities/misc/sampling.py +++ b/neodroidagent/utilities/misc/sampling.py @@ -12,6 +12,8 @@ import torch from torch.distributions import Distribution, Normal +__all__ = ["normal_tanh_reparameterised_sample"] + def normal_tanh_reparameterised_sample( dis: Normal, epsilon=1e-6 diff --git a/neodroidagent/utilities/misc/tanh_normal.py b/neodroidagent/utilities/misc/tanh_normal.py index 441e5228..0e4edb23 100644 --- a/neodroidagent/utilities/misc/tanh_normal.py +++ b/neodroidagent/utilities/misc/tanh_normal.py @@ -12,6 +12,8 @@ import torch from torch.distributions import Distribution, Normal +__all__ = ["TanhNormal"] + class TanhNormal(Distribution): """ diff --git a/neodroidagent/utilities/signal/experimental/discounting.py b/neodroidagent/utilities/signal/experimental/discounting.py index 6a569a07..fe36d518 100644 --- a/neodroidagent/utilities/signal/experimental/discounting.py +++ b/neodroidagent/utilities/signal/experimental/discounting.py @@ -7,6 +7,8 @@ __author__ = "Christian Heider Nielsen" __doc__ = "" +__all__ = ["valued_discount"] + @jit(nopython=True, nogil=True) def valued_discount( diff --git a/neodroidagent/utilities/signal/experimental/generalised_advantage.py b/neodroidagent/utilities/signal/experimental/generalised_advantage.py index b6323ebe..2c8f9a95 100644 --- a/neodroidagent/utilities/signal/experimental/generalised_advantage.py +++ b/neodroidagent/utilities/signal/experimental/generalised_advantage.py @@ -7,6 +7,8 @@ __author__ = "Christian Heider Nielsen" __doc__ = "" +__all__ = ["discounted_ge", "discounted_gae"] + def discounted_ge( signals: numpy.ndarray, diff --git a/neodroidagent/utilities/signal/experimental/nstep.py b/neodroidagent/utilities/signal/experimental/nstep.py index 7bc9225e..0456af04 100644 --- a/neodroidagent/utilities/signal/experimental/nstep.py +++ b/neodroidagent/utilities/signal/experimental/nstep.py @@ -8,6 +8,8 @@ from neodroidagent.utilities.signal.experimental.discounting import valued_discount +__all__ = ["discounted_nstep", "discounted_nstep_adv"] + def discounted_nstep( signals: numpy.ndarray, diff --git a/neodroidagent/utilities/signal/objective_regressor.py b/neodroidagent/utilities/signal/objective_regressor.py index 7323b6f4..7e8b482b 100644 --- a/neodroidagent/utilities/signal/objective_regressor.py +++ b/neodroidagent/utilities/signal/objective_regressor.py @@ -6,5 +6,8 @@ __doc__ = r"""Estimation of objective and provider of signals""" +__all__ = ["ObjectiveRegressor"] + + class ObjectiveRegressor(nn.Module): pass