pockerman · pockerman · Mar 21, 2022 · Mar 16, 2022 · Mar 16, 2022 · Mar 16, 2022
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,4 @@ src/algorithms/__pycache__/
 src/policies/__pycache__/
 src/apps/__pycache__/
 src/.coverage
+src/maths/__pycache__/
diff --git a/docs/source/API/a2c.rst b/docs/source/API/a2c.rst
@@ -0,0 +1,13 @@
+a2c
+===
+.. automodule:: a2c
+.. autoclass:: A2CNetBase
+   :members: __init__, forward
+
+.. autoclass:: A2CNet
+   :members: __init__, forward
+
+.. autoclass:: A2CConfig
+
+.. autoclass:: A2C
+   :members: update_parameters, __init__, share_memory, parameters, on_episode, _do_train
diff --git a/docs/source/API/column_type.rst b/docs/source/API/column_type.rst
@@ -0,0 +1,15 @@
+column\_type
+============
+
+.. automodule:: column_type
+
+.. autoclass:: ColumnType
+
+
+
+
+
+
+
+
+
diff --git a/docs/source/API/discrete_state_environment.rst b/docs/source/API/discrete_state_environment.rst
@@ -0,0 +1,31 @@
+discrete\_state\_environment
+============================
+
+.. automodule:: discrete_state_environment
+
+
+
+
+
+
+
+
+
+
+
+   .. rubric:: Classes
+
+   .. autosummary::
+
+      DiscreteEnvConfig
+      DiscreteStateEnvironment
+      MultiprocessEnv
+
+
+
+
+
+
+
+
+
diff --git a/docs/source/API/exceptions.rst b/docs/source/API/exceptions.rst
@@ -0,0 +1,30 @@
+exceptions
+==========
+
+.. automodule:: exceptions
+
+.. autoclass:: Error
+   :members: __init__, __str__
+
+.. autoclass:: IncompatibleVectorSizesException
+   :members: __init__, __str__
+
+.. autoclass:: InvalidDataTypeException
+   :members: __init__, __str__
+
+.. autoclass:: InvalidFileFormat
+   :members: __init__, __str__
+
+.. autoclass:: InvalidParamValue
+   :members: __init__, __str__
+
+.. autoclass:: InvalidSchemaException
+   :members: __init__, __str__
+
+.. autoclass:: InvalidStateException
+   :members: __init__, __str__
+
+
+
+
+
diff --git a/docs/source/API/optimizer_type.rst b/docs/source/API/optimizer_type.rst
@@ -0,0 +1,6 @@
+optimizer\_type
+===============
+
+.. automodule:: optimizer_type
+
+.. autoclass:: OptimizerType
diff --git a/docs/source/API/pytorch_optimizer_builder.rst b/docs/source/API/pytorch_optimizer_builder.rst
@@ -0,0 +1,5 @@
+pytorch\_optimizer\_builder
+===========================
+
+.. automodule:: pytorch_optimizer_builder
+   :members: pytorch_optimizer_builder
diff --git a/docs/source/API/replay_buffer.rst b/docs/source/API/replay_buffer.rst
@@ -0,0 +1,7 @@
+replay\_buffer
+==============
+
+.. automodule:: replay_buffer
+
+.. autoclass:: ReplayBuffer
+   :members: __init__, __len__, __getitem__, add, sample, get_item_as_torch_tensor, reinitialize
diff --git a/docs/source/API/tiled_environment.rst b/docs/source/API/tiled_environment.rst
@@ -0,0 +1,44 @@
+tiled\_environment
+==================
+
+.. automodule:: tiled_environment
+
+.. autoclass:: TiledEnvConfig
+
+.. autoclass:: Tile
+   :members: __init__, build, search, 
+
+.. autoclass:: Layer
+   :members: n_tiles_per_action, __init__, __len__, build_tiles, get_global_tile_index, _do_build_tile, _do_build_three_columns  
+
+
+.. autoclass:: Tiles
+   :members: __init__, __getitem__, __len__, build
+
+
+.. autoclass:: TiledEnv
+   :members: from_options, __init__, action_space, n_actions, n_states, config, step, reset, get_state_action_tile_matrix, get_action, save_current_dataset, create_tiles, get_aggregated_state, initialize_column_counts, all_columns_visited, initialize_distances, apply_action. total_current_distortion, featurize_state_action, featurize_raw_state, _create_column_scales, _validate
+
+
+
+
+
+
+
+
+
+   .. rubric:: Classes
+
+   .. autosummary::
+
+      TiledEnv
+      TiledEnvConfig
+
+
+
+
+
+
+
+
+
diff --git a/docs/source/Examples/a2c_three_columns.rst b/docs/source/Examples/a2c_three_columns.rst
@@ -0,0 +1,23 @@
+A2C algorithm on three columns data set
+=======================================
+
+
+A2C algorithm
+-------------
+
+Both the Q-learning algorithm we used in `Q-learning on a three columns dataset <qlearning_three_columns.html>`_ and the SARSA algorithm in 
+`Semi-gradient SARSA on a three columns data set`_ are value-based methods; that is they estimate value functions. Specifically the state-action function
+:math:`Q`. By knowing :math:`Q` we can construct a policy to follow for example to choose the action that at the given state
+maximizes the state-action function i.e. :math:`argmax_{\alpha}Q(s_t, \alpha)` i.e. a greedy policy. 
+
+However, the true objective of reinforcement learning is to directly learn a policy  :math:`\pi`.
+
+
+The main advantage of learning a parametrized policy is that it can be any learnable function e.g. a linear model or a deep neural network.
+
+The A2C algorithm falls under the umbrella of actor-critic methods [REF]. In these methods, we estimate a parametrized policy; the actor
+and a parametrized value function; the critic.
+
+
+Specifically, we will use a weight-sharing model. Moreover, the environment is a multi-process class that gathers samples from multiple
+emvironments at once
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -18,6 +18,8 @@
 sys.path.append(os.path.abspath("../../src/exceptions/"))
 sys.path.append(os.path.abspath("../../src/spaces/"))
 sys.path.append(os.path.abspath("../../src/policies/"))
+sys.path.append(os.path.abspath("../../src/maths/"))
+sys.path.append(os.path.abspath("../../src/utils/"))
 print(sys.path)
 
 

diff --git a/docs/source/examples.rst b/docs/source/examples.rst
@@ -8,3 +8,4 @@ Some examples can be found below
 
    Examples/qlearning_three_columns
    Examples/semi_gradient_sarsa_three_columns
+   Examples/a2c_three_columns
diff --git a/docs/source/modules.rst b/docs/source/modules.rst
@@ -12,11 +12,12 @@ API
    API/epsilon_greedy_q_estimator
    API/q_learning
    API/trainer
-   generated/q_estimator
-   generated/exceptions
-   generated/action_space
-   generated/column_type
-   generated/discrete_state_environment
-   generated/observation_space
-   generated/tiled_environment
+   API/optimizer_type
+   API/pytorch_optimizer_builder
+   API/replay_buffer
+   API/a2c
+   API/exceptions
+   API/column_type
+   API/discrete_state_environment
+   API/tiled_environment
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,3 +8,4 @@ Some examples can be found below

		Examples/qlearning_three_columns
		Examples/semi_gradient_sarsa_three_columns
		Examples/a2c_three_columns