Skip to content

Commit

Permalink
Fix all errors and warnings in sphinx auto reference documentation (m…
Browse files Browse the repository at this point in the history
…ake html).
  • Loading branch information
sven1977 committed Oct 1, 2018
1 parent 3d69ece commit 06a3f46
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 43 deletions.
2 changes: 1 addition & 1 deletion docs/reference/components/layers/preprocessing_layers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
Preprocessing Layers
--------------------

.. automodule:: rlgraph.components.layers.preprocessing.preprocessing_layer
.. automodule:: rlgraph.components.layers.preprocessing.preprocess_layer
:members:
:undoc-members:
:show-inheritance:
1 change: 1 addition & 0 deletions docs/reference/components/neural_networks_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ Policy
:undoc-members:
:show-inheritance:


ActorComponent
++++++++++++++

Expand Down
2 changes: 1 addition & 1 deletion docs/reference/components/optimizers_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Optimizer Base Class
Local Optimizer
+++++++++++++++

.. automodule:: rlgraph.components.optimizers.local_optimizer
.. automodule:: rlgraph.components.optimizers.local_optimizers
:members:
:undoc-members:
:show-inheritance:
Expand Down
13 changes: 11 additions & 2 deletions rlgraph/agents/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,13 @@ def get_action(self, states, internals=None, use_exploration=True, apply_preproc
Args:
states (Union[dict,np.ndarray]): States dict/tuple or numpy array.
internals (Union[dict,np.ndarray]): Internal states dict/tuple or numpy array.
use_exploration (bool): If False, no exploration or sampling may be applied
when retrieving an action.
apply_preprocessing (bool): If True, apply any state preprocessors configured to the action. Set to
false if all pre-processing is handled externally both for acting and updating.
extra_returns (Optional[Set[str]]): Optional set of Agent-specific strings for additional return
values (besides the actions). All Agents must support "preprocessed_states".
Expand All @@ -279,10 +282,13 @@ def observe(self, preprocessed_states, actions, internals, rewards, terminals, e
Args:
preprocessed_states (Union[dict, ndarray]): Preprocessed states dict or array.
actions (Union[dict, ndarray]): Actions dict or array containing actions performed for the given state(s).
internals (Union[list]): Internal state(s) returned by agent for the given states.Must be
empty list if no internals available.
rewards (float): Scalar reward(s) observed.
terminals (bool): Boolean indicating terminal.
env_id (Optional[str]): Environment id to observe for. When using vectorized execution and
buffering, using environment ids is necessary to ensure correct trajectories are inserted.
See `SingleThreadedWorker` for example usage.
Expand Down Expand Up @@ -343,8 +349,10 @@ def _observe_graph(self, preprocessed_states, actions, internals, rewards, termi
Args:
preprocessed_states (Union[dict,ndarray]): Preprocessed states dict or array.
actions (Union[dict,ndarray]): Actions dict or array containing actions performed for the given state(s).
internals (Union[list]): Internal state(s) returned by agent for the given states. Must be an empty list
if no internals available.
rewards (Union[ndarray,list,float]): Scalar reward(s) observed.
terminals (Union[list,bool]): Boolean indicating terminal.
"""
Expand Down Expand Up @@ -396,6 +404,7 @@ def call_api_method(self, op, inputs=None, return_ops=None):
Args:
op (str): Name of the api method.
inputs (Optional[dict,np.array]): Dict specifying the provided api_methods for (key=input space name,
values=the values that should go into this space (e.g. numpy arrays)).
Returns:
Expand Down Expand Up @@ -436,9 +445,9 @@ def store_model(self, path=None, add_timestep=True):
Args:
path (str): Path to model directory.
add_timestep (bool): Indiciates if current training step should be appended to
exported model. If false, may override previous checkpoints.
add_timestep (bool): Indiciates if current training step should be appended to exported model.
If false, may override previous checkpoints.
"""
self.graph_executor.store_model(path=path, add_timestep=add_timestep)

Expand Down
19 changes: 7 additions & 12 deletions rlgraph/agents/impala_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,6 @@ def __init__(self, discount=0.99, fifo_queue_spec=None, architecture="large", en
self.fifo_queue_keys = ["terminals", "states", "action_probs", "initial_internal_states"]
self.fifo_record_space = Dict(
{
#"preprocessed_states": self.preprocessor.get_preprocessed_space(
# default_dict(copy.deepcopy(self.state_space), dict(
# previous_action=self.action_space,
# previous_reward=FloatBox()
# ))
#),
#"actions": self.action_space,
#"rewards": float,
"terminals": bool,
"states": default_dict(copy.deepcopy(self.state_space), dict(
previous_action=self.action_space,
Expand Down Expand Up @@ -525,6 +517,7 @@ def define_api_methods_actor(self, env_stepper, env_output_splitter, internal_st
Args:
env_stepper (EnvironmentStepper): The EnvironmentStepper Component to setp through the Env n steps
in a single op call.
fifo_queue (FIFOQueue): The FIFOQueue Component used to enqueue env sample runs (n-step).
"""
# Perform n-steps in the env and insert the results into our FIFO-queue.
Expand Down Expand Up @@ -562,19 +555,21 @@ def reset(self):

self.root_component.define_api_method("reset", reset)

def define_api_methods_learner(self, fifo_output_splitter, fifo_queue, states_dict_splitter,
transpose_states, transpose_terminals, transpose_action_probs,
staging_area,
preprocessor, policy, loss_function, optimizer):
def define_api_methods_learner(
self, fifo_output_splitter, fifo_queue, states_dict_splitter, transpose_states, transpose_terminals,
transpose_action_probs, staging_area, preprocessor, policy, loss_function, optimizer
):
"""
Defines the API-methods used by an IMPALA learner. Its job is basically: Pull a batch from the
FIFOQueue, split it up into its components and pass these through the loss function and into the optimizer for
a learning update.
Args:
fifo_queue (FIFOQueue): The FIFOQueue Component used to enqueue env sample runs (n-step).
splitter (ContainerSplitter): The DictSplitter Component to split up a batch from the queue along its
items.
policy (Policy): The Policy Component, which to update.
loss_function (IMPALALossFunction): The IMPALALossFunction Component.
optimizer (Optimizer): The optimizer that we use to calculate an update and apply it.
Expand Down
22 changes: 10 additions & 12 deletions rlgraph/components/optimizers/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ class Optimizer(Component):
variables (DataOpTuple): The list of variables to optimize.
loss (SingleDataOp): The loss function's output.
grads_and_vars (DataOpTuple): The zipped gradients plus corresponding variables to be fed back into the
Optimizer for actually applying the gradients to the variables.
*api_methods (any): Other necessary api_methods for the specific type of optimizer (e.g. a time-step).
Optimizer for actually applying the gradients to the variables.
\*api_methods (any): Other necessary api_methods for the specific type of optimizer (e.g. a time-step).
outs:
calc_grads_and_vars (DataOpTuple): The zipped gradients plus corresponding variables to be fed back into the
Optimizer for actually applying the gradients to the variables (via in-Socket `grads_and_vars`).
Optimizer for actually applying the gradients to the variables (via in-Socket `grads_and_vars`).
step (DataOp): Triggers applying the gradients coming in from `grads_and_vars` to the variables.
"""
def __init__(self, learning_rate=None, **kwargs):
Expand All @@ -52,29 +53,26 @@ def __init__(self, learning_rate=None, **kwargs):
self.define_api_method(name="apply_gradients", func=self._graph_fn_apply_gradients, must_be_complete=False)
self.define_api_method(name="step", func=self._graph_fn_step, must_be_complete=False)

def _graph_fn_step(self, *inputs): #variables, loss, loss_per_item, *inputs):
def _graph_fn_step(self, *inputs):
"""
Applies an optimization step to a list of variables via a loss.
Args:
#variables (SingleDataOp): Variables to optimize.
#loss (SingleDataOp): Loss value.
#loss_per_item (SingleDataOp) : Loss per item.
*inputs (SingleDataOp): Any args to the optimizer to be able to perform gradient calculations from
\*inputs (SingleDataOp): Any args to the optimizer to be able to perform gradient calculations from
losses and then apply these gradients to some variables.
Returns:
"""
raise NotImplementedError # pass
raise NotImplementedError

def _graph_fn_calculate_gradients(self, *inputs):
"""
Calculates the gradients for the given variables and the loss function (and maybe other child-class
specific input parameters).
specific input parameters).
Args:
inputs (SingleDataOp): Custom SingleDataOp parameters, dependent on the optimizer type.
\*inputs (SingleDataOp): Custom SingleDataOp parameters, dependent on the optimizer type.
Returns:
DataOpTuple: The list of gradients and variables to be optimized.
Expand All @@ -84,7 +82,7 @@ def _graph_fn_calculate_gradients(self, *inputs):
def _graph_fn_apply_gradients(self, grads_and_vars):
"""
Changes the given variables based on the previously calculated gradients. `gradients` is the output of
`self._graph_fn_calculate_gradients`.
`self._graph_fn_calculate_gradients`.
Args:
grads_and_vars (DataOpTuple): The list of gradients and variables to be optimized.
Expand Down
7 changes: 5 additions & 2 deletions rlgraph/environments/grid_world.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,13 @@ def __init__(self, world="4x4", save_mode=False, reward_function="sparse", state
Args:
world (Union[str,List[str]]): Either a string to map into `MAPS` or a list of strings describing the rows
of the world (e.g. ["S ", " G"] for a two-row/two-column world with start and goal state).
save_mode (bool): Whether to replace holes (H) with walls (W). Default: False.
reward_function (str): One of
sparse: hole=-1, fire=-1, goal=50, all other steps=-1
rich: hole=-100, fire=-10, goal=50
state_representation (str): One of "discr_pos", "xy_pos", "cam"
"""
# Build our map.
Expand Down Expand Up @@ -265,8 +268,8 @@ def get_possible_next_positions(self, discrete_pos, action):
action (int): The action choice.
Returns:
List[Tuple[int,float]]: A list of tuples (s', p(s'|s,a)). Where s' is the next discrete position and
p(s'|s,a) is the probability of ending up in that position when in state s and taking action a.
List[Tuple[int,float]]: A list of tuples (s', p(s'\|s,a)). Where s' is the next discrete position and
p(s'\|s,a) is the probability of ending up in that position when in state s and taking action a.
"""
x = discrete_pos // self.n_col
y = discrete_pos % self.n_col
Expand Down
42 changes: 29 additions & 13 deletions rlgraph/spaces/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,12 @@ def get_shape(self, with_batch_rank=False, with_time_rank=False, time_major=None
position. If `with_batch_rank` is an int (e.g. -1), the possible batch-rank is returned as that number
(instead of None) at the 0th (or 1st if time_major is True) position.
Default: False.
with_time_rank (Union[bool,int]): Whether to include a possible time-rank as `None` at 1st (or 0th)
position. If `with_time_rank` is an int, the possible time-rank is returned as that number
(instead of None) at the 1st (or 0th if time_major is True) position.
Default: False.
time_major (bool): Overwrites `self.time_major` if not None. Default: None (use `self.time_major`).
Returns:
Expand Down Expand Up @@ -196,21 +198,27 @@ def get_variable(self, name, is_input_feed=False, add_batch_rank=None, add_time_
Args:
name (str): The name for the variable.
is_input_feed (bool): Whether the returned object should be an input placeholder,
instead of a full variable.
add_batch_rank (Optional[bool,int]): If True, will add a 0th (or 1st) rank (None) to
the created variable. If it is an int, will add that int (-1 means None).
If None, will use the Space's default value: `self.has_batch_rank`.
Default: None.
add_time_rank (Optional[bool,int]): If True, will add a 1st (or 0th) rank (None) to
the created variable. If it is an int, will add that int (-1 means None).
If None, will use the Space's default value: `self.has_time_rank`.
Default: None.
time_major (bool): Only relevant if both `add_batch_rank` and `add_time_rank` are True.
Will make the time-rank the 0th rank and the batch-rank the 1st rank.
Otherwise, batch-rank will be 0th and time-rank will be 1st.
Default: False.
is_python (bool): Whether to create a python-based variable (list) or a backend-specific one.
local (bool): Whether the variable must not be shared across the network.
Default: False.
Expand All @@ -223,7 +231,7 @@ def get_variable(self, name, is_input_feed=False, add_batch_rank=None, add_time_
raise NotImplementedError

def flatten(self, mapping=None, custom_scope_separator='/', scope_separator_at_start=True,
_scope=None, _list=None):
scope_=None, list_=None):
"""
A mapping function to flatten this Space into an OrderedDict whose only values are
primitive (non-container) Spaces. The keys are created automatically from Dict keys and
Expand All @@ -232,12 +240,16 @@ def flatten(self, mapping=None, custom_scope_separator='/', scope_separator_at_s
Args:
mapping (Optional[callable]): A mapping function that takes a flattened auto-generated key and a primitive
Space and converts the primitive Space to something else. Default is pass through.
custom_scope_separator (str): The separator to use in the returned dict for scopes.
Default: '/'.
scope_separator_at_start (bool): Whether to add the scope-separator also at the beginning.
Default: True.
_scope (Optional[str]): For recursive calls only. Used for automatic key generation.
_list (Optional[list]): For recursive calls only. The list so far.
scope\_ (Optional[str]): For recursive calls only. Used for automatic key generation.
list\_ (Optional[list]): For recursive calls only. The list so far.
Returns:
OrderedDict: The OrderedDict using auto-generated keys and containing only primitive Spaces
Expand All @@ -250,32 +262,35 @@ def mapping(key, x):

# Are we in the non-recursive (first) call?
ret = False
if _list is None:
_list = list()
if list_ is None:
list_ = list()
ret = True
_scope = ""
scope_ = ""

self._flatten(mapping, custom_scope_separator, scope_separator_at_start, _scope, _list)
self._flatten(mapping, custom_scope_separator, scope_separator_at_start, scope_, list_)

# Non recursive (first) call -> Return the final FlattenedDataOp.
if ret:
return OrderedDict(_list)
return OrderedDict(list_)

def _flatten(self, mapping, custom_scope_separator, scope_separator_at_start, _scope, _list):
def _flatten(self, mapping, custom_scope_separator, scope_separator_at_start, scope_, list_):
"""
Base implementation. May be overridden by ContainerSpace classes.
Simply sends `self` through the mapping function.
Args:
mapping (callable): The mapping function to use on a primitive (non-container) Space.
custom_scope_separator (str): The separator to use in the returned dict for scopes.
Default: '/'.
scope_separator_at_start (bool): Whether to add the scope-separator also at the beginning.
Default: True.
_scope (str): The flat-key to use to store the mapped result in list_.
_list (list): The list to append the mapped results to (under key=`scope_`).
scope\_ (str): The flat-key to use to store the mapped result in list_.
list\_ (list): The list to append the mapped results to (under key=`scope_`).
"""
_list.append(tuple([_scope, mapping(_scope, self)]))
list_.append(tuple([scope_, mapping(scope_, self)]))

def __repr__(self):
return "Space(shape=" + str(self.shape) + ")"
Expand All @@ -291,9 +306,10 @@ def sample(self, size=None, fill_value=None):
Args:
size (Optional[int]): The number of samples or batch size to sample.
If size is > 1: Returns a batch of size samples with the 0th rank being the batch rank
(even if `self.has_batch_rank` is False).
(even if `self.has_batch_rank` is False).
If size is None or (1 and self.has_batch_rank is False): Returns a single sample w/o batch rank.
If size is 1 and self.has_batch_rank is True: Returns a single sample w/ the batch rank.
fill_value (Optional[any]): The number or initializer specifier to fill the sample. Can be used to create
a (non-random) sample with a certain fill value in all elements.
TODO: support initializer spec-strings like 'normal', 'truncated_normal', etc..
Expand Down
8 changes: 8 additions & 0 deletions rlgraph/spaces/space_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ def get_list_registry(from_space, capacity=None, initializer=0, flatten=True, ad
capacity (Optional[int]): Optional capacity to initalize list.
initializer (Optional(any)): Optional initializer for list if capacity is not None.
flatten (bool): Whether to produce a FlattenedDataOp with auto-keys.
add_batch_rank (Optional[bool,int]): If from_space is given and is True, will add a 0th rank (None) to
the created variable. If it is an int, will add that int instead of None.
Default: False.
Returns:
dict: Container dict mapping spaces to empty lists.
"""
Expand Down Expand Up @@ -204,17 +206,23 @@ def sanity_check_space(
space (Space): The Space object to check.
allowed_types (Optional[List[type]]): A list of types that this Space must be an instance of.
non_allowed_types (Optional[List[type]]): A list of type that this Space must not be an instance of.
must_have_batch_rank (Optional[bool]): Whether the Space must (True) or must not (False) have the
`has_batch_rank` property set to True. None, if it doesn't matter.
must_have_time_rank (Optional[bool]): Whether the Space must (True) or must not (False) have the
`has_time_rank` property set to True. None, if it doesn't matter.
must_have_batch_or_time_rank (Optional[bool]): Whether the Space must (True) or must not (False) have either
the `has_batch_rank` or the `has_time_rank` property set to True.
must_have_categories (Optional[bool]): For IntBoxes, whether the Space must (True) or must not (False) have
global bounds with `num_categories` > 0. None, if it doesn't matter.
num_categories (Optional[int,tuple]): An int or a tuple (min,max) range within which the Space's
`num_categories` rank must lie. Only valid for IntBoxes.
None if it doesn't matter.
rank (Optional[int,tuple]): An int or a tuple (min,max) range within which the Space's rank must lie.
None if it doesn't matter.
Expand Down

0 comments on commit 06a3f46

Please sign in to comment.