Fix all errors and warnings in sphinx auto reference documentation (m…

…ake html).
rlgraph · Oct 1, 2018 · 06a3f46 · 06a3f46
1 parent 3d69ece
commit 06a3f46
Show file tree

Hide file tree

Showing 9 changed files with 73 additions and 43 deletions.
diff --git a/docs/reference/components/layers/preprocessing_layers.rst b/docs/reference/components/layers/preprocessing_layers.rst
@@ -18,7 +18,7 @@
 Preprocessing Layers
 --------------------
 
-.. automodule:: rlgraph.components.layers.preprocessing.preprocessing_layer
+.. automodule:: rlgraph.components.layers.preprocessing.preprocess_layer
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/docs/reference/components/neural_networks_reference.rst b/docs/reference/components/neural_networks_reference.rst
@@ -59,6 +59,7 @@ Policy
     :undoc-members:
     :show-inheritance:
 
+
 ActorComponent
 ++++++++++++++
 

diff --git a/docs/reference/components/optimizers_reference.rst b/docs/reference/components/optimizers_reference.rst
@@ -28,7 +28,7 @@ Optimizer Base Class
 Local Optimizer
 +++++++++++++++
 
-.. automodule:: rlgraph.components.optimizers.local_optimizer
+.. automodule:: rlgraph.components.optimizers.local_optimizers
     :members:
     :undoc-members:
     :show-inheritance:

diff --git a/rlgraph/agents/agent.py b/rlgraph/agents/agent.py
@@ -256,10 +256,13 @@ def get_action(self, states, internals=None, use_exploration=True, apply_preproc
         Args:
             states (Union[dict,np.ndarray]): States dict/tuple or numpy array.
             internals (Union[dict,np.ndarray]): Internal states dict/tuple or numpy array.
+
             use_exploration (bool): If False, no exploration or sampling may be applied
                 when retrieving an action.
+
             apply_preprocessing (bool): If True, apply any state preprocessors configured to the action. Set to
                 false if all pre-processing is handled externally both for acting and updating.
+
             extra_returns (Optional[Set[str]]): Optional set of Agent-specific strings for additional return
                 values (besides the actions). All Agents must support "preprocessed_states".
 
@@ -279,10 +282,13 @@ def observe(self, preprocessed_states, actions, internals, rewards, terminals, e
         Args:
             preprocessed_states (Union[dict, ndarray]): Preprocessed states dict or array.
             actions (Union[dict, ndarray]): Actions dict or array containing actions performed for the given state(s).
+
             internals (Union[list]): Internal state(s) returned by agent for the given states.Must be
                 empty list if no internals available.
+
             rewards (float): Scalar reward(s) observed.
             terminals (bool): Boolean indicating terminal.
+
             env_id (Optional[str]): Environment id to observe for. When using vectorized execution and
                 buffering, using environment ids is necessary to ensure correct trajectories are inserted.
                 See `SingleThreadedWorker` for example usage.
@@ -343,8 +349,10 @@ def _observe_graph(self, preprocessed_states, actions, internals, rewards, termi
         Args:
             preprocessed_states (Union[dict,ndarray]): Preprocessed states dict or array.
             actions (Union[dict,ndarray]): Actions dict or array containing actions performed for the given state(s).
+
             internals (Union[list]): Internal state(s) returned by agent for the given states. Must be an empty list
                 if no internals available.
+
             rewards (Union[ndarray,list,float]): Scalar reward(s) observed.
             terminals (Union[list,bool]): Boolean indicating terminal.
         """
@@ -396,6 +404,7 @@ def call_api_method(self, op, inputs=None, return_ops=None):
 
         Args:
             op (str): Name of the api method.
+
             inputs (Optional[dict,np.array]): Dict specifying the provided api_methods for (key=input space name,
                 values=the values that should go into this space (e.g. numpy arrays)).
         Returns:
@@ -436,9 +445,9 @@ def store_model(self, path=None, add_timestep=True):
 
         Args:
             path (str): Path to model directory.
-            add_timestep (bool): Indiciates if current training step should be appended to
-                exported model. If false, may override previous checkpoints.
 
+            add_timestep (bool): Indiciates if current training step should be appended to exported model.
+                If false, may override previous checkpoints.
         """
         self.graph_executor.store_model(path=path, add_timestep=add_timestep)
 

diff --git a/rlgraph/agents/impala_agent.py b/rlgraph/agents/impala_agent.py
@@ -190,14 +190,6 @@ def __init__(self, discount=0.99, fifo_queue_spec=None, architecture="large", en
         self.fifo_queue_keys = ["terminals", "states", "action_probs", "initial_internal_states"]
         self.fifo_record_space = Dict(
             {
-                #"preprocessed_states": self.preprocessor.get_preprocessed_space(
-                #    default_dict(copy.deepcopy(self.state_space), dict(
-                #        previous_action=self.action_space,
-                #        previous_reward=FloatBox()
-                #    ))
-                #),
-                #"actions": self.action_space,
-                #"rewards": float,
                 "terminals": bool,
                 "states": default_dict(copy.deepcopy(self.state_space), dict(
                     previous_action=self.action_space,
@@ -525,6 +517,7 @@ def define_api_methods_actor(self, env_stepper, env_output_splitter, internal_st
         Args:
             env_stepper (EnvironmentStepper): The EnvironmentStepper Component to setp through the Env n steps
                 in a single op call.
+
             fifo_queue (FIFOQueue): The FIFOQueue Component used to enqueue env sample runs (n-step).
         """
         # Perform n-steps in the env and insert the results into our FIFO-queue.
@@ -562,19 +555,21 @@ def reset(self):
 
         self.root_component.define_api_method("reset", reset)
 
-    def define_api_methods_learner(self, fifo_output_splitter, fifo_queue, states_dict_splitter,
-                                   transpose_states, transpose_terminals, transpose_action_probs,
-                                   staging_area,
-                                   preprocessor, policy, loss_function, optimizer):
+    def define_api_methods_learner(
+            self, fifo_output_splitter, fifo_queue, states_dict_splitter, transpose_states, transpose_terminals,
+            transpose_action_probs, staging_area, preprocessor, policy, loss_function, optimizer
+    ):
         """
         Defines the API-methods used by an IMPALA learner. Its job is basically: Pull a batch from the
         FIFOQueue, split it up into its components and pass these through the loss function and into the optimizer for
         a learning update.
 
         Args:
             fifo_queue (FIFOQueue): The FIFOQueue Component used to enqueue env sample runs (n-step).
+
             splitter (ContainerSplitter): The DictSplitter Component to split up a batch from the queue along its
                 items.
+
             policy (Policy): The Policy Component, which to update.
             loss_function (IMPALALossFunction): The IMPALALossFunction Component.
             optimizer (Optimizer): The optimizer that we use to calculate an update and apply it.

diff --git a/rlgraph/components/optimizers/optimizer.py b/rlgraph/components/optimizers/optimizer.py
@@ -30,11 +30,12 @@ class Optimizer(Component):
         variables (DataOpTuple): The list of variables to optimize.
         loss (SingleDataOp): The loss function's output.
         grads_and_vars (DataOpTuple): The zipped gradients plus corresponding variables to be fed back into the
-            Optimizer for actually applying the gradients to the variables.
-        *api_methods (any): Other necessary api_methods for the specific type of optimizer (e.g. a time-step).
+        Optimizer for actually applying the gradients to the variables.
+        \*api_methods (any): Other necessary api_methods for the specific type of optimizer (e.g. a time-step).
+
     outs:
         calc_grads_and_vars (DataOpTuple): The zipped gradients plus corresponding variables to be fed back into the
-            Optimizer for actually applying the gradients to the variables (via in-Socket `grads_and_vars`).
+        Optimizer for actually applying the gradients to the variables (via in-Socket `grads_and_vars`).
         step (DataOp): Triggers applying the gradients coming in from `grads_and_vars` to the variables.
     """
     def __init__(self, learning_rate=None, **kwargs):
@@ -52,29 +53,26 @@ def __init__(self, learning_rate=None, **kwargs):
         self.define_api_method(name="apply_gradients", func=self._graph_fn_apply_gradients, must_be_complete=False)
         self.define_api_method(name="step", func=self._graph_fn_step, must_be_complete=False)
 
-    def _graph_fn_step(self, *inputs):  #variables, loss, loss_per_item, *inputs):
+    def _graph_fn_step(self, *inputs):
         """
         Applies an optimization step to a list of variables via a loss.
 
         Args:
-            #variables (SingleDataOp): Variables to optimize.
-            #loss (SingleDataOp): Loss value.
-            #loss_per_item (SingleDataOp) : Loss per item.
-            *inputs (SingleDataOp): Any args to the optimizer to be able to perform gradient calculations from
+            \*inputs (SingleDataOp): Any args to the optimizer to be able to perform gradient calculations from
                 losses and then apply these gradients to some variables.
 
         Returns:
 
         """
-        raise NotImplementedError  # pass
+        raise NotImplementedError
 
     def _graph_fn_calculate_gradients(self, *inputs):
         """
         Calculates the gradients for the given variables and the loss function (and maybe other child-class
-            specific input parameters).
+        specific input parameters).
 
         Args:
-            inputs (SingleDataOp): Custom SingleDataOp parameters, dependent on the optimizer type.
+            \*inputs (SingleDataOp): Custom SingleDataOp parameters, dependent on the optimizer type.
 
         Returns:
             DataOpTuple: The list of gradients and variables to be optimized.
@@ -84,7 +82,7 @@ def _graph_fn_calculate_gradients(self, *inputs):
     def _graph_fn_apply_gradients(self, grads_and_vars):
         """
         Changes the given variables based on the previously calculated gradients. `gradients` is the output of
-            `self._graph_fn_calculate_gradients`.
+        `self._graph_fn_calculate_gradients`.
 
         Args:
             grads_and_vars (DataOpTuple): The list of gradients and variables to be optimized.

diff --git a/rlgraph/environments/grid_world.py b/rlgraph/environments/grid_world.py
@@ -99,10 +99,13 @@ def __init__(self, world="4x4", save_mode=False, reward_function="sparse", state
         Args:
             world (Union[str,List[str]]): Either a string to map into `MAPS` or a list of strings describing the rows
                 of the world (e.g. ["S ", " G"] for a two-row/two-column world with start and goal state).
+
             save_mode (bool): Whether to replace holes (H) with walls (W). Default: False.
+
             reward_function (str): One of
                 sparse: hole=-1, fire=-1, goal=50, all other steps=-1
                 rich: hole=-100, fire=-10, goal=50
+
             state_representation (str): One of "discr_pos", "xy_pos", "cam"
         """
         # Build our map.
@@ -265,8 +268,8 @@ def get_possible_next_positions(self, discrete_pos, action):
             action (int): The action choice.
 
         Returns:
-            List[Tuple[int,float]]: A list of tuples (s', p(s'|s,a)). Where s' is the next discrete position and
-                p(s'|s,a) is the probability of ending up in that position when in state s and taking action a.
+            List[Tuple[int,float]]: A list of tuples (s', p(s'\|s,a)). Where s' is the next discrete position and
+                p(s'\|s,a) is the probability of ending up in that position when in state s and taking action a.
         """
         x = discrete_pos // self.n_col
         y = discrete_pos % self.n_col

diff --git a/rlgraph/spaces/space.py b/rlgraph/spaces/space.py
@@ -152,10 +152,12 @@ def get_shape(self, with_batch_rank=False, with_time_rank=False, time_major=None
                 position. If `with_batch_rank` is an int (e.g. -1), the possible batch-rank is returned as that number
                 (instead of None) at the 0th (or 1st if time_major is True) position.
                 Default: False.
+
             with_time_rank (Union[bool,int]): Whether to include a possible time-rank as `None` at 1st (or 0th)
                 position. If `with_time_rank` is an int, the possible time-rank is returned as that number
                 (instead of None) at the 1st (or 0th if time_major is True) position.
                 Default: False.
+
             time_major (bool): Overwrites `self.time_major` if not None. Default: None (use `self.time_major`).
 
         Returns:
@@ -196,21 +198,27 @@ def get_variable(self, name, is_input_feed=False, add_batch_rank=None, add_time_
 
         Args:
             name (str): The name for the variable.
+
             is_input_feed (bool): Whether the returned object should be an input placeholder,
                 instead of a full variable.
+
             add_batch_rank (Optional[bool,int]): If True, will add a 0th (or 1st) rank (None) to
                 the created variable. If it is an int, will add that int (-1 means None).
                 If None, will use the Space's default value: `self.has_batch_rank`.
                 Default: None.
+
             add_time_rank (Optional[bool,int]): If True, will add a 1st (or 0th) rank (None) to
                 the created variable. If it is an int, will add that int (-1 means None).
                 If None, will use the Space's default value: `self.has_time_rank`.
                 Default: None.
+
             time_major (bool): Only relevant if both `add_batch_rank` and `add_time_rank` are True.
                 Will make the time-rank the 0th rank and the batch-rank the 1st rank.
                 Otherwise, batch-rank will be 0th and time-rank will be 1st.
                 Default: False.
+
             is_python (bool): Whether to create a python-based variable (list) or a backend-specific one.
+
             local (bool): Whether the variable must not be shared across the network.
                 Default: False.
 
@@ -223,7 +231,7 @@ def get_variable(self, name, is_input_feed=False, add_batch_rank=None, add_time_
         raise NotImplementedError
 
     def flatten(self, mapping=None, custom_scope_separator='/', scope_separator_at_start=True,
-                _scope=None, _list=None):
+                scope_=None, list_=None):
         """
         A mapping function to flatten this Space into an OrderedDict whose only values are
         primitive (non-container) Spaces. The keys are created automatically from Dict keys and
@@ -232,12 +240,16 @@ def flatten(self, mapping=None, custom_scope_separator='/', scope_separator_at_s
         Args:
             mapping (Optional[callable]): A mapping function that takes a flattened auto-generated key and a primitive
                 Space and converts the primitive Space to something else. Default is pass through.
+
             custom_scope_separator (str): The separator to use in the returned dict for scopes.
                 Default: '/'.
+
             scope_separator_at_start (bool): Whether to add the scope-separator also at the beginning.
                 Default: True.
-            _scope (Optional[str]): For recursive calls only. Used for automatic key generation.
-            _list (Optional[list]): For recursive calls only. The list so far.
+
+            scope\_ (Optional[str]): For recursive calls only. Used for automatic key generation.
+
+            list\_ (Optional[list]): For recursive calls only. The list so far.
 
         Returns:
             OrderedDict: The OrderedDict using auto-generated keys and containing only primitive Spaces
@@ -250,32 +262,35 @@ def mapping(key, x):
 
         # Are we in the non-recursive (first) call?
         ret = False
-        if _list is None:
-            _list = list()
+        if list_ is None:
+            list_ = list()
             ret = True
-            _scope = ""
+            scope_ = ""
 
-        self._flatten(mapping, custom_scope_separator, scope_separator_at_start, _scope, _list)
+        self._flatten(mapping, custom_scope_separator, scope_separator_at_start, scope_, list_)
 
         # Non recursive (first) call -> Return the final FlattenedDataOp.
         if ret:
-            return OrderedDict(_list)
+            return OrderedDict(list_)
 
-    def _flatten(self, mapping, custom_scope_separator, scope_separator_at_start, _scope, _list):
+    def _flatten(self, mapping, custom_scope_separator, scope_separator_at_start, scope_, list_):
         """
         Base implementation. May be overridden by ContainerSpace classes.
         Simply sends `self` through the mapping function.
 
         Args:
             mapping (callable): The mapping function to use on a primitive (non-container) Space.
+
             custom_scope_separator (str): The separator to use in the returned dict for scopes.
                 Default: '/'.
+
             scope_separator_at_start (bool): Whether to add the scope-separator also at the beginning.
                 Default: True.
-            _scope (str): The flat-key to use to store the mapped result in list_.
-            _list (list): The list to append the mapped results to (under key=`scope_`).
+
+            scope\_ (str): The flat-key to use to store the mapped result in list_.
+            list\_ (list): The list to append the mapped results to (under key=`scope_`).
         """
-        _list.append(tuple([_scope, mapping(_scope, self)]))
+        list_.append(tuple([scope_, mapping(scope_, self)]))
 
     def __repr__(self):
         return "Space(shape=" + str(self.shape) + ")"
@@ -291,9 +306,10 @@ def sample(self, size=None, fill_value=None):
         Args:
             size (Optional[int]): The number of samples or batch size to sample.
                 If size is > 1: Returns a batch of size samples with the 0th rank being the batch rank
-                    (even if `self.has_batch_rank` is False).
+                (even if `self.has_batch_rank` is False).
                 If size is None or (1 and self.has_batch_rank is False): Returns a single sample w/o batch rank.
                 If size is 1 and self.has_batch_rank is True: Returns a single sample w/ the batch rank.
+
             fill_value (Optional[any]): The number or initializer specifier to fill the sample. Can be used to create
                 a (non-random) sample with a certain fill value in all elements.
                 TODO: support initializer spec-strings like 'normal', 'truncated_normal', etc..

diff --git a/rlgraph/spaces/space_utils.py b/rlgraph/spaces/space_utils.py
@@ -44,9 +44,11 @@ def get_list_registry(from_space, capacity=None, initializer=0, flatten=True, ad
         capacity (Optional[int]): Optional capacity to initalize list.
         initializer (Optional(any)): Optional initializer for list if capacity is not None.
         flatten (bool): Whether to produce a FlattenedDataOp with auto-keys.
+
         add_batch_rank (Optional[bool,int]): If from_space is given and is True, will add a 0th rank (None) to
             the created variable. If it is an int, will add that int instead of None.
             Default: False.
+
     Returns:
         dict: Container dict mapping spaces to empty lists.
     """
@@ -204,17 +206,23 @@ def sanity_check_space(
         space (Space): The Space object to check.
         allowed_types (Optional[List[type]]): A list of types that this Space must be an instance of.
         non_allowed_types (Optional[List[type]]): A list of type that this Space must not be an instance of.
+
         must_have_batch_rank (Optional[bool]): Whether the Space must (True) or must not (False) have the
             `has_batch_rank` property set to True. None, if it doesn't matter.
+
         must_have_time_rank (Optional[bool]): Whether the Space must (True) or must not (False) have the
             `has_time_rank` property set to True. None, if it doesn't matter.
+
         must_have_batch_or_time_rank (Optional[bool]): Whether the Space must (True) or must not (False) have either
             the `has_batch_rank` or the `has_time_rank` property set to True.
+
         must_have_categories (Optional[bool]): For IntBoxes, whether the Space must (True) or must not (False) have
             global bounds with `num_categories` > 0. None, if it doesn't matter.
+
         num_categories (Optional[int,tuple]): An int or a tuple (min,max) range within which the Space's
             `num_categories` rank must lie. Only valid for IntBoxes.
             None if it doesn't matter.
+
         rank (Optional[int,tuple]): An int or a tuple (min,max) range within which the Space's rank must lie.
             None if it doesn't matter.