Merge pull request #81 from sony/feature/20220507-add-lqr

Add iLQR and LQR
sony · Jul 12, 2022 · ecf91ae · ecf91ae
2 parents 800b81c + 08ad73d
commit ecf91ae
Show file tree

Hide file tree

Showing 16 changed files with 2,048 additions and 2 deletions.
diff --git a/docs/source/nnablarl_api/algorithms.rst b/docs/source/nnablarl_api/algorithms.rst
@@ -140,6 +140,16 @@ HER
    :members:
    :show-inheritance:
 
+iLQR
+====
+.. autoclass:: nnabla_rl.algorithms.ilqr.iLQRConfig
+   :members:
+   :show-inheritance:
+
+.. autoclass:: nnabla_rl.algorithms.ilqr.iLQR
+   :members:
+   :show-inheritance:
+
 IQN
 ====
 .. autoclass:: nnabla_rl.algorithms.iqn.IQNConfig
@@ -150,6 +160,16 @@ IQN
    :members:
    :show-inheritance:
 
+LQR
+====
+.. autoclass:: nnabla_rl.algorithms.lqr.LQRConfig
+   :members:
+   :show-inheritance:
+
+.. autoclass:: nnabla_rl.algorithms.lqr.LQR
+   :members:
+   :show-inheritance:
+
 MMESAC
 ===============
 .. autoclass:: nnabla_rl.algorithms.mme_sac.MMESACConfig

diff --git a/nnabla_rl/algorithm.py b/nnabla_rl/algorithm.py
@@ -16,7 +16,7 @@
 import sys
 from abc import ABCMeta, abstractmethod
 from dataclasses import dataclass
-from typing import Any, Dict, Sequence, Union, cast
+from typing import Any, Dict, Optional, Sequence, Tuple, Union, cast
 
 import gym
 import numpy as np
@@ -213,6 +213,25 @@ def compute_eval_action(self, state, *, begin_of_episode=False) -> np.ndarray:
         '''
         raise NotImplementedError
 
+    def compute_trajectory(self,
+                           initial_trajectory: Sequence[Tuple[np.ndarray, Optional[np.ndarray]]]) \
+            -> Tuple[Sequence[Tuple[np.ndarray, Optional[np.ndarray]]], Sequence[Dict[str, Any]]]:
+        '''
+        Compute trajectory (sequence of state and action tuples) from given initial trajectory using current policy.
+        Most of the reinforcement learning algorithms does not implement this method.
+        Only the optimal control algorithms implements this method.
+
+        Args:
+            initial_trajectory (Sequence[Tuple[np.ndarray, Optional[np.ndarray]]]): initial trajectory.
+
+        Returns:
+            Tuple[Sequence[Tuple[np.ndarray, Optional[np.ndarray]]], Sequence[Dict[str, Any]]]:
+                Sequence of state and action tuples and extra information (if exist) at each timestep,
+                computed with current best policy. Extra information depends on the algorithm.
+                The sequence length is same as the length of initial trajectory.
+        '''
+        raise NotImplementedError
+
     def _before_training_start(self, env_or_buffer):
         pass
 

diff --git a/nnabla_rl/algorithms/README.md b/nnabla_rl/algorithms/README.md
@@ -1,5 +1,9 @@
 # Algorithm catalog
 
+nnabla-rl offers various (deep) reinforcement learning and optimal control algorithms. See the list below for the implemented algorithms! </br>
+
+## Reinforcement learning algorithms
+
 - Online training: Training which is performed by interacting with the environment. You'll need to prepare an environment which is compatible with the [OpenAI gym's environment interface](https://gym.openai.com/docs/#environments).
 - Offline(Batch) training: Training which is performed sorely from provided data. You'll need to prepare a dataset capsuled with the [ReplayBuffer](../replay_buffer.py).
 - Continuous/Discrete action: If you are familiar with the training of deep neural nets, the action type's difference is similar to the difference of regression and classification. Continuous action is an action which consists of real value(s) (e.g. robot's motor torque). In contrast, discrete action is an action which can be labeled (e.g. UP, DOWN, RIGHT, LEFT). The choice of action type depends on the environment (problem) and applicable algorithm changes depending on the its action type.
@@ -37,4 +41,14 @@
 |[TRPO](https://arxiv.org/abs/1502.05477)|:heavy_check_mark:|:x:|:heavy_check_mark:|(We will support discrete action in the future)|:x:|
 |[TRPO (ICML 2015 version)](https://arxiv.org/abs/1502.05477)|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|:x:|
 
-<sup>*</sup>May require special treatment to train with RNN layers.
+<sup>*</sup>May require special treatment to train with RNN layers.
+
+## Optimal control algorithms
+
+- Need training: Most of the optimal control algorithm does NOT require training to run the controller. Instead, you will need the dynamics model of the system and cost function of the task in prior to the execution of the algorithm. See the documentation of each algorithm for the detail.
+- Continuous/Discrete action: Same as reinfocement learning. However, most of the optimal control algorithm does not support discrete action.
+
+|Algorithm|Need training|Continuous action|Discrete action|
+|:---|:---:|:---:|:---:|
+|[iLQR](https://homes.cs.washington.edu/~todorov/papers/TassaIROS12.pdf)|not required|:heavy_check_mark:|:x:|
+|[LQR](https://en.wikipedia.org/wiki/Linear%E2%80%93quadratic_regulator)|not required|:heavy_check_mark:|:x:|
diff --git a/nnabla_rl/algorithms/__init__.py b/nnabla_rl/algorithms/__init__.py
@@ -31,7 +31,9 @@
 from nnabla_rl.algorithms.icml2015_trpo import ICML2015TRPO, ICML2015TRPOConfig
 from nnabla_rl.algorithms.icml2018_sac import ICML2018SAC, ICML2018SACConfig
 from nnabla_rl.algorithms.icra2018_qtopt import ICRA2018QtOpt, ICRA2018QtOptConfig
+from nnabla_rl.algorithms.ilqr import iLQR, iLQRConfig
 from nnabla_rl.algorithms.iqn import IQN, IQNConfig
+from nnabla_rl.algorithms.lqr import LQR, LQRConfig
 from nnabla_rl.algorithms.mme_sac import MMESAC, MMESACConfig
 from nnabla_rl.algorithms.munchausen_dqn import MunchausenDQN, MunchausenDQNConfig
 from nnabla_rl.algorithms.munchausen_iqn import MunchausenIQN, MunchausenIQNConfig
@@ -86,7 +88,9 @@ def get_class_of(name):
 register_algorithm(Dummy, DummyConfig)
 register_algorithm(HER, HERConfig)
 register_algorithm(ICML2018SAC, ICML2018SACConfig)
+register_algorithm(iLQR, iLQRConfig)
 register_algorithm(IQN, IQNConfig)
+register_algorithm(LQR, LQRConfig)
 register_algorithm(MMESAC, MMESACConfig)
 register_algorithm(MunchausenDQN, MunchausenDQNConfig)
 register_algorithm(MunchausenIQN, MunchausenIQNConfig)