Add docformatter

takuseno · Jun 20, 2023 · c210d41 · c210d41
1 parent 427637a
commit c210d41
Show file tree

Hide file tree

Showing 28 changed files with 13 additions and 139 deletions.
diff --git a/.github/workflows/format_check.yml b/.github/workflows/format_check.yml
@@ -24,7 +24,7 @@ jobs:
           python -m pip install --upgrade pip
           pip install Cython numpy matplotlib onnxruntime onnx pytest tensorboardX
           pip install -e .
-          pip install black mypy pylint==2.13.5 isort
+          pip install black mypy pylint==2.13.5 isort docformatter
       - name: Check format
         run: |
           ./scripts/format

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -21,15 +21,15 @@ Before making your nice PR, please run the follwing commands to inspect code qua
 ### testing
 ```
 $ pip install pytest-cov onnxruntime stable-baselines3 # dependencies used in unit tests
-$ pip install git+https://github.com/takuseno/d4rl-pybullet
 $ ./scripts/test
 ```
 
 ### coding style
 This repository is styled with [black](https://github.com/psf/black) formatter.
 Also, [isort](https://github.com/PyCQA/isort) is used to format package imports.
+[docformatter](https://github.com/PyCQA/docformatter) is additionally used to format docstrings.
 ```
-$ pip install black # formatters
+$ pip install black isort docformatter # formatters
 $ ./scripts/format
 ```
 

diff --git a/d3rlpy/__init__.py b/d3rlpy/__init__.py
@@ -23,7 +23,6 @@ def seed(n: int) -> None:
 
     Args:
         n (int): seed value.
-
     """
     random.seed(n)
     np.random.seed(n)

diff --git a/d3rlpy/algos/qlearning/awac.py b/d3rlpy/algos/qlearning/awac.py
@@ -69,7 +69,6 @@ class AWACConfig(LearnableConfig):
             :math:`A^\pi(s_t, a_t)`.
         n_critics (int): the number of Q functions for ensemble.
         update_actor_interval (int): interval to update policy function.
-
     """
     actor_learning_rate: float = 3e-4
     critic_learning_rate: float = 3e-4

diff --git a/d3rlpy/algos/qlearning/base.py b/d3rlpy/algos/qlearning/base.py
@@ -182,7 +182,6 @@ def save_policy(self, fname: str) -> None:
 
         Args:
             fname: destination file path.
-
         """
         assert self._impl is not None, IMPL_NOT_INITIALIZED_ERROR
 
@@ -254,7 +253,6 @@ def predict(self, x: Observation) -> np.ndarray:
 
         Returns:
             greedy actions
-
         """
         assert self._impl is not None, IMPL_NOT_INITIALIZED_ERROR
         assert check_non_1d_array(x), "Input must have batch dimension."
@@ -300,7 +298,6 @@ def predict_value(self, x: Observation, action: np.ndarray) -> np.ndarray:
 
         Returns:
             predicted action-values
-
         """
         assert self._impl is not None, IMPL_NOT_INITIALIZED_ERROR
         assert check_non_1d_array(x), "Input must have batch dimension."
@@ -341,7 +338,6 @@ def sample_action(self, x: Observation) -> np.ndarray:
 
         Returns:
             sampled actions.
-
         """
         assert self._impl is not None, IMPL_NOT_INITIALIZED_ERROR
         assert check_non_1d_array(x), "Input must have batch dimension."
@@ -400,7 +396,6 @@ def fit(
 
         Returns:
             list of result tuples (epoch, metrics) per epoch.
-
         """
         results = list(
             self.fitter(
@@ -432,7 +427,7 @@ def fitter(
         callback: Optional[Callable[[Self, int, int], None]] = None,
     ) -> Generator[Tuple[int, Dict[str, float]], None, None]:
         """Iterate over epochs steps to train with the given dataset. At each
-             iteration algo methods and properties can be changed or queried.
+        iteration algo methods and properties can be changed or queried.
 
         .. code-block:: python
 
@@ -458,7 +453,6 @@ def fitter(
 
         Returns:
             iterator yielding current epoch and metrics dict.
-
         """
         dataset_info = DatasetInfo.from_episodes(dataset.episodes)
         LOG.info("dataset info", dataset_info=dataset_info)
@@ -593,7 +587,6 @@ def fit_online(
             show_progress: flag to show progress bar for iterations.
             callback: callable function that takes ``(algo, epoch, total_step)``
                 , which is called at the end of epochs.
-
         """
 
         # create default replay buffer
@@ -740,7 +733,6 @@ def collect(
 
         Returns:
             replay buffer with the collected data.
-
         """
         # create default replay buffer
         if buffer is None:
@@ -806,7 +798,6 @@ def update(self, batch: TransitionMiniBatch) -> Dict[str, float]:
 
         Returns:
             dictionary of metrics.
-
         """
         torch_batch = TorchMiniBatch.from_batch(
             batch=batch,
@@ -828,7 +819,6 @@ def inner_update(self, batch: TorchMiniBatch) -> Dict[str, float]:
 
         Returns:
             dictionary of metrics.
-
         """
         raise NotImplementedError
 
@@ -850,7 +840,6 @@ def copy_policy_from(
 
         Args:
             algo: algorithm object.
-
         """
         assert self._impl, IMPL_NOT_INITIALIZED_ERROR
         assert isinstance(algo.impl, QLearningAlgoImplBase)
@@ -874,7 +863,6 @@ def copy_policy_optim_from(
 
         Args:
             algo: algorithm object.
-
         """
         assert self._impl, IMPL_NOT_INITIALIZED_ERROR
         assert isinstance(algo.impl, QLearningAlgoImplBase)
@@ -898,7 +886,6 @@ def copy_q_function_from(
 
         Args:
             algo: algorithm object.
-
         """
         assert self._impl, IMPL_NOT_INITIALIZED_ERROR
         assert isinstance(algo.impl, QLearningAlgoImplBase)
@@ -922,7 +909,6 @@ def copy_q_function_optim_from(
 
         Args:
             algo: algorithm object.
-
         """
         assert self._impl, IMPL_NOT_INITIALIZED_ERROR
         assert isinstance(algo.impl, QLearningAlgoImplBase)
@@ -933,7 +919,6 @@ def reset_optimizer_states(self) -> None:
 
         This is especially useful when fine-tuning policies with setting inital
         optimizer states.
-
         """
         assert self._impl, IMPL_NOT_INITIALIZED_ERROR
         self._impl.reset_optimizer_states()
diff --git a/d3rlpy/algos/qlearning/explorers.py b/d3rlpy/algos/qlearning/explorers.py
@@ -93,7 +93,6 @@ def sample(
 
         Returns:
             :math:`\\epsilon`-greedy action.
-
         """
         greedy_actions = algo.predict(x)
         random_actions = np.random.randint(algo.action_size, size=x.shape[0])
@@ -105,7 +104,6 @@ def compute_epsilon(self, step: int) -> float:
 
         Returns:
             :math:`\\epsilon`.
-
         """
         if step >= self._duration:
             return self._end_epsilon
@@ -119,7 +117,6 @@ class NormalNoise(Explorer):
     Args:
         mean (float): mean.
         std (float): standard deviation.
-
     """
 
     _mean: float
@@ -140,7 +137,6 @@ def sample(
 
         Returns:
             action with noise injection.
-
         """
         action = algo.predict(x)
         noise = np.random.normal(self._mean, self._std, size=action.shape)

diff --git a/d3rlpy/algos/qlearning/plas.py b/d3rlpy/algos/qlearning/plas.py
@@ -181,7 +181,8 @@ def get_action_type(self) -> ActionSpace:
 
 @dataclasses.dataclass()
 class PLASWithPerturbationConfig(PLASConfig):
-    r"""Config of Policy in Latent Action Space algorithm with perturbation layer.
+    r"""Config of Policy in Latent Action Space algorithm with perturbation
+    layer.
 
     PLAS with perturbation layer enables PLAS to output out-of-distribution
     action.

diff --git a/d3rlpy/algos/qlearning/random_policy.py b/d3rlpy/algos/qlearning/random_policy.py
@@ -30,7 +30,6 @@ class RandomPolicyConfig(LearnableConfig):
             ``['uniform', 'normal']``.
         normal_std (float): standard deviation of the normal distribution. This
             is only used when ``distribution='normal'``.
-
     """
     distribution: str = "uniform"
     normal_std: float = 1.0
@@ -96,7 +95,6 @@ class DiscreteRandomPolicyConfig(LearnableConfig):
 
     This is designed for data collection and lightweight interaction tests.
     ``fit`` and ``fit_online`` methods will raise exceptions.
-
     """
 
     def create(self, device: DeviceArg = False) -> "DiscreteRandomPolicy":  # type: ignore

diff --git a/d3rlpy/algos/transformer/base.py b/d3rlpy/algos/transformer/base.py
@@ -82,7 +82,6 @@ class StatefulTransformerWrapper(Generic[TTransformerImpl, TTransformerConfig]):
     Args:
         algo (TransformerAlgoBase): Transformer-based algorithm.
         target_return (float): target return to achieve.
-
     """
     _algo: "TransformerAlgoBase[TTransformerImpl, TTransformerConfig]"
     _target_return: float
@@ -121,7 +120,6 @@ def predict(self, x: Observation, reward: float) -> Union[np.ndarray, int]:
 
         Returns:
             action.
-
         """
         self._observations.append(x)
         self._rewards.append(reward)
@@ -182,7 +180,6 @@ def predict(self, inpt: TransformerInput) -> np.ndarray:
 
         Returns:
             action.
-
         """
         assert self._impl is not None, IMPL_NOT_INITIALIZED_ERROR
         with torch.no_grad():
@@ -216,7 +213,7 @@ def fit(
         callback: Optional[Callable[[Self, int, int], None]] = None,
     ) -> None:
         """Iterate over epochs steps to train with the given dataset. At each
-             iteration algo methods and properties can be changed or queried.
+        iteration algo methods and properties can be changed or queried.
 
         .. code-block:: python
 
@@ -240,7 +237,6 @@ def fit(
             save_interval: interval to save parameters.
             callback: callable function that takes ``(algo, epoch, total_step)``
                 , which is called every step.
-
         """
         dataset_info = DatasetInfo.from_episodes(dataset.episodes)
         LOG.info("dataset info", dataset_info=dataset_info)
@@ -344,7 +340,6 @@ def update(self, batch: TrajectoryMiniBatch) -> Dict[str, float]:
 
         Returns:
             dictionary of metrics.
-
         """
         torch_batch = TorchTrajectoryMiniBatch.from_batch(
             batch=batch,
@@ -366,7 +361,6 @@ def inner_update(self, batch: TorchTrajectoryMiniBatch) -> Dict[str, float]:
 
         Returns:
             dictionary of metrics.
-
         """
         raise NotImplementedError
 
@@ -380,6 +374,5 @@ def as_stateful_wrapper(
 
         Returns:
             StatefulTransformerWrapper object.
-
         """
         return StatefulTransformerWrapper(self, target_return)
diff --git a/d3rlpy/algos/transformer/decision_transformer.py b/d3rlpy/algos/transformer/decision_transformer.py
@@ -54,7 +54,6 @@ class DecisionTransformerConfig(TransformerConfig):
             (``simple`` or ``global``).
         warmup_steps (int): warmup steps for learning rate scheduler.
         clip_grad_norm (float): norm of gradient clipping.
-
     """
 
     batch_size: int = 64