Code check

microsoft · Aug 5, 2021 · 8402253 · 8402253
1 parent 887b90b
commit 8402253
Show file tree

Hide file tree

Showing 7 changed files with 13 additions and 14 deletions.
diff --git a/maro/rl/algorithms/__init__.py b/maro/rl/algorithms/__init__.py
@@ -5,8 +5,8 @@
 from .ac import ActorCritic
 from .ddpg import DDPG
 from .dqn import DQN
-from .pg import PolicyGradient
 from .index import get_algorithm_cls, get_algorithm_model_cls
+from .pg import PolicyGradient
 
 __all__ = [
     "AbsAlgorithm", "ActorCritic", "DDPG", "DQN", "PolicyGradient", "get_algorithm_cls", "get_algorithm_model_cls"

diff --git a/maro/rl/algorithms/abs_algorithm.py b/maro/rl/algorithms/abs_algorithm.py
@@ -2,7 +2,6 @@
 # Licensed under the MIT license.
 
 from abc import ABC, abstractmethod
-from typing import Union
 
 from maro.rl.experience import ExperienceSet
 from maro.rl.exploration import AbsExploration
@@ -50,7 +49,7 @@ def get_state(self, inference: bool = True):
 
         Args:
             learning (bool): If True, the returned state is for inference purpose only. This parameter
-                may be ignored for some algorithms.  
+                may be ignored for some algorithms.
         """
         pass
 

diff --git a/maro/rl/algorithms/ac.py b/maro/rl/algorithms/ac.py
@@ -105,12 +105,12 @@ def _get_loss(self, batch: ExperienceSet):
 
     def learn(self, data: Union[ExperienceSet, dict]):
         assert self.ac_net.trainable, "ac_net needs to have at least one optimizer registered."
-        # If data is an ExperienceSet, get DQN loss from the batch and backprop it throught the network. 
+        # If data is an ExperienceSet, get DQN loss from the batch and backprop it throught the network.
         if isinstance(data, ExperienceSet):
-            self.ac_net.train() 
+            self.ac_net.train()
             loss = self._get_loss(data)
             self.ac_net.step(loss)
-        # Otherwise treat the data as a dict of gradients that can be applied directly to the network. 
+        # Otherwise treat the data as a dict of gradients that can be applied directly to the network.
         else:
             self.ac_net.apply(data)
 

diff --git a/maro/rl/algorithms/ddpg.py b/maro/rl/algorithms/ddpg.py
@@ -93,12 +93,12 @@ def _get_loss(self, batch: ExperienceSet):
 
     def learn(self, data: Union[ExperienceSet, dict]):
         assert self.ac_net.trainable, "ac_net needs to have at least one optimizer registered."
-        # If data is an ExperienceSet, get DQN loss from the batch and backprop it throught the network. 
+        # If data is an ExperienceSet, get DQN loss from the batch and backprop it throught the network.
         if isinstance(data, ExperienceSet):
             self.ac_net.train()
             loss = self._get_loss(data)
             self.ac_net.step(loss)
-        # Otherwise treat the data as a dict of gradients that can be applied directly to the network. 
+        # Otherwise treat the data as a dict of gradients that can be applied directly to the network.
         else:
             self.ac_net.apply(data)
 

diff --git a/maro/rl/algorithms/dqn.py b/maro/rl/algorithms/dqn.py
@@ -93,14 +93,14 @@ def _get_loss(self, experience_batch: ExperienceSet):
 
     def learn(self, data: Union[ExperienceSet, dict]):
         assert self.q_net.trainable, "q_net needs to have at least one optimizer registered."
-        # If data is an ExperienceSet, get DQN loss from the batch and backprop it throught the network. 
+        # If data is an ExperienceSet, get DQN loss from the batch and backprop it throught the network.
         if isinstance(data, ExperienceSet):
             self.q_net.train()
             loss = self._get_loss(data)
             self.q_net.step(loss)
-        # Otherwise treat the data as a dict of gradients that can be applied directly to the network. 
+        # Otherwise treat the data as a dict of gradients that can be applied directly to the network.
         else:
-            self.q_net.apply(data) 
+            self.q_net.apply(data)
 
     def post_update(self, update_index: int):
         # soft-update target network

diff --git a/maro/rl/algorithms/pg.py b/maro/rl/algorithms/pg.py
@@ -57,12 +57,12 @@ def learn(self, data: Union[ExperienceSet, dict]):
         which they are generated during the simulation. Otherwise, the return values may be meaningless.
         """
         assert self.policy_net.trainable, "policy_net needs to have at least one optimizer registered."
-        # If data is an ExperienceSet, get DQN loss from the batch and backprop it throught the network. 
+        # If data is an ExperienceSet, get DQN loss from the batch and backprop it throught the network.
         if isinstance(data, ExperienceSet):
             self.policy_net.train()
             loss = self._get_loss(data)
             self.policy_net.step(loss)
-        # Otherwise treat the data as a dict of gradients that can be applied directly to the network. 
+        # Otherwise treat the data as a dict of gradients that can be applied directly to the network.
         else:
             self.policy_net.apply(data)
 

diff --git a/maro/rl/model/core_model.py b/maro/rl/model/core_model.py
@@ -120,7 +120,7 @@ def apply(self, grad_dict: dict):
     def step(self, loss: torch.tensor):
         """Use the loss to back-propagate gradients and apply them to the underlying parameters.
 
-        This is equivalent to a chained ``get_gradients`` and ``step``. 
+        This is equivalent to a chained ``get_gradients`` and ``step``.
 
         Args:
             loss: Result of a computation graph that involves the underlying parameters.