Tensorboard entry point

sintefneodroid · Jul 4, 2019 · 6a596e8 · 6a596e8
1 parent 5b8bc24
commit 6a596e8
Show file tree

Hide file tree

Showing 31 changed files with 259 additions and 220 deletions.
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -4,7 +4,7 @@ These guidelines are universal for the entire [Neodroid](https://github.com/sint
 ## Pull Requests
 We welcome pull requests.
 
-1. Fork the repo and create your branch from `master`.
+1. Fork the repo and create your branch from `develop`, we are using the git flow branching model.
 2. If you've added code that should be tested, add tests.
 3. If you've changed APIs, update the documentation.
 

diff --git a/NeodroidAgentPackage.py b/NeodroidAgentPackage.py
diff --git a/agent/agents/ddpg_agent.py b/agent/agents/ddpg_agent.py
@@ -71,7 +71,7 @@ def __defaults__(self) -> None:
                                                             'output_shape':None,  # Obtain from environment
                                                             }))
 
-    self._discount_factor = 0.99
+    self._discount_factor = 0.95
 
     self._initial_observation_period = 10000
     self._learning_frequency = 4

diff --git a/agent/agents/dqn_agent.py b/agent/agents/dqn_agent.py
@@ -45,7 +45,7 @@ def __defaults__(self) -> None:
 
     self._batch_size = 128
 
-    self._discount_factor = 0.99
+    self._discount_factor = 0.95
     self._learning_frequency = 1
     self._initial_observation_period = 0
     self._sync_target_model_frequency = 1000
@@ -171,7 +171,7 @@ def update_models(self, *, stat_writer=None, **kwargs):
 
       # self._memory.batch_update(indices, td_error.tolist())  # Cuda trouble
     else:
-      logging.warning('Batch size is larger than current memory size')
+      logging.info('Batch size is larger than current memory size, skipping update')
 
   def rollout(self,
               initial_state,

diff --git a/agent/agents/pg_agent.py b/agent/agents/pg_agent.py
@@ -53,7 +53,7 @@ def __defaults__(self) -> None:
                                                          }))
 
     self._use_cuda = False
-    self._discount_factor = 0.99
+    self._discount_factor = 0.95
     self._use_batched_updates = False
     self._batch_size = 5
     self._policy_entropy_regularisation = 1

diff --git a/agent/agents/ppo_agent.py b/agent/agents/ppo_agent.py
@@ -33,7 +33,7 @@ class PPOAgent(ActorCriticAgent):
   def __defaults__(self) -> None:
     self._steps = 10
 
-    self._discount_factor = 0.99
+    self._discount_factor = 0.95
     self._gae_tau = 0.95
     # self._reached_horizon_penalty = -10.
 

diff --git a/agent/architectures/mlp.py b/agent/architectures/mlp.py
@@ -39,7 +39,7 @@ def __init__(self,
                output_shape: Sequence = (2,),
                use_bias: bool = True,
                auto_build_hidden_layers_if_none=True,
-               input_multiplier=16,
+               input_multiplier=32,
                output_multiplier=16,
                **kwargs
                ):

diff --git a/agent/configs/agent_test_configs/base_test_config.py b/agent/configs/agent_test_configs/base_test_config.py
@@ -6,20 +6,11 @@
 Author: Christian Heider Nielsen
 '''
 
-
-
-
-
 from agent.configs.base_config import *
 
 CONFIG_NAME = __name__
 CONFIG_FILE = __file__
 
-
-ENVIRONMENT_NAME = 'CartPole-v1'
-# ENVIRONMENT_NAME = 'Acrobot-v1'
-# ENVIRONMENT_NAME = 'MountainCar-v0'
-
 # Mujoco
 # ENVIRONMENT_NAME = 'FetchPickAndPlace-v0'
 # ENVIRONMENT_NAME = 'FetchReach-v1'
@@ -68,4 +59,3 @@
                         )
 
 ROLLOUTS = 10000
-
diff --git a/agent/configs/agent_test_configs/ddpg_test_config.py b/agent/configs/agent_test_configs/ddpg_test_config.py
@@ -31,7 +31,7 @@
 OPTIMISER_LEARNING_RATE = 0.00025
 OPTIMISER_WEIGHT_DECAY = 1e-5
 OPTIMISER_ALPHA = 0.95
-DISCOUNT_FACTOR = 0.99
+DISCOUNT_FACTOR = 0.95
 TARGET_UPDATE_TAU = 3e-3
 RENDER_FREQUENCY = 5
 RENDER_ENVIRONMENT = True

diff --git a/agent/configs/agent_test_configs/dqn_test_config.py b/agent/configs/agent_test_configs/dqn_test_config.py
@@ -14,14 +14,19 @@
 CONFIG_NAME = __name__
 CONFIG_FILE = __file__
 
+ENVIRONMENT_NAME = 'CartPole-v1'
+MODEL_DIRECTORY = PROJECT_APP_PATH.user_data / ENVIRONMENT_NAME / LOAD_TIME / 'models'
+CONFIG_DIRECTORY = PROJECT_APP_PATH.user_data / ENVIRONMENT_NAME / LOAD_TIME / 'configs'
+LOG_DIRECTORY = PROJECT_APP_PATH.user_log / ENVIRONMENT_NAME / LOAD_TIME
+
 INITIAL_OBSERVATION_PERIOD = 0
 LEARNING_FREQUENCY = 1
 REPLAY_MEMORY_SIZE = 10000
 MEMORY = ReplayBuffer(REPLAY_MEMORY_SIZE)
 EXPLORATION_SPEC = ExplorationSpecification(0.99, 0.05, 10000)
 
 BATCH_SIZE = 128
-DISCOUNT_FACTOR = 0.999
+DISCOUNT_FACTOR = 0.95
 RENDER_ENVIRONMENT = True
 SIGNAL_CLIPPING = True
 DOUBLE_DQN = True

diff --git a/agent/configs/agent_test_configs/experimental/cnn_dqn_test_config.py b/agent/configs/agent_test_configs/experimental/cnn_dqn_test_config.py
@@ -26,7 +26,7 @@
 MEMORY = ReplayBuffer(REPLAY_MEMORY_SIZE)
 
 BATCH_SIZE = 128
-DISCOUNT_FACTOR = 0.999
+DISCOUNT_FACTOR = 0.95
 RENDER_ENVIRONMENT = False
 SIGNAL_CLIPPING = True
 DOUBLE_DQN = True

diff --git a/agent/configs/agent_test_configs/experimental/cnn_pg_test_config.py b/agent/configs/agent_test_configs/experimental/cnn_pg_test_config.py
@@ -16,7 +16,7 @@
 
 EVALUATION_FUNCTION = torch.nn.CrossEntropyLoss
 
-DISCOUNT_FACTOR = 0.99
+DISCOUNT_FACTOR = 0.95
 OPTIMISER_LEARNING_RATE = 1e-4
 PG_ENTROPY_REG = 1e-4
 

diff --git a/agent/configs/agent_test_configs/pg_test_config.py b/agent/configs/agent_test_configs/pg_test_config.py
@@ -14,13 +14,18 @@
 
 EVALUATION_FUNCTION = torch.nn.CrossEntropyLoss
 
-DISCOUNT_FACTOR = 0.98
+ENVIRONMENT_NAME = 'CartPole-v1'
+MODEL_DIRECTORY = PROJECT_APP_PATH.user_data / ENVIRONMENT_NAME / LOAD_TIME / 'models'
+CONFIG_DIRECTORY = PROJECT_APP_PATH.user_data / ENVIRONMENT_NAME / LOAD_TIME / 'configs'
+LOG_DIRECTORY = PROJECT_APP_PATH.user_log / ENVIRONMENT_NAME / LOAD_TIME
+
+DISCOUNT_FACTOR = 0.95
 PG_ENTROPY_REG = 3e-3
 
 # Architecture
 POLICY_ARCH_SPEC = GDCS(MLP, NOD(**{
   'input_shape':            None,  # Obtain from environment
-  'hidden_layer_activation':torch.tanh,
+  'hidden_layer_activation':torch.relu,
   'hidden_layers':          None,
   'output_shape':           None,  # Obtain from environment
   'use_bias':               True,

diff --git a/agent/configs/agent_test_configs/ppo_test_config.py b/agent/configs/agent_test_configs/ppo_test_config.py
@@ -37,7 +37,7 @@
 MAX_GRADIENT_NORM = None
 
 GAE_TAU = 0.95
-DISCOUNT_FACTOR = 0.99
+DISCOUNT_FACTOR = 0.95
 
 REACHED_HORIZON_PENALTY = -10.
 
@@ -57,14 +57,14 @@
 # Architecture
 ACTOR_ARCH_SPEC = GDCS(ContinuousActorArchitecture, NOD(**{
   'input_shape':            None,  # Obtain from environment
-  'hidden_layers':          [256],
+  'hidden_layers':          None,
   'hidden_layer_activation':torch.relu,
   'output_shape':           None,  # Obtain from environment
   }))
 
 CRITIC_ARCH_SPEC = GDCS(MLP, NOD(**{
   'input_shape':            None,  # Obtain from environment
-  'hidden_layers':          [256],
+  'hidden_layers':          None,
   'hidden_layer_activation':torch.relu,
   'output_shape':           None,  # Obtain from environment
   }))

diff --git a/agent/configs/base_config.py b/agent/configs/base_config.py
@@ -39,7 +39,7 @@
                         )
 
 # Environment Related Parameters
-ENVIRONMENT_NAME = 'grd'
+ENVIRONMENT_NAME = 'ConnectToRunning'
 CONNECT_TO_RUNNING = False
 RENDER_ENVIRONMENT = False
 SOLVED_REWARD = 0.9
@@ -58,7 +58,7 @@
 SYNC_TARGET_MODEL_FREQUENCY = 10000
 REPLAY_MEMORY_SIZE = 1000000
 INITIAL_OBSERVATION_PERIOD = 10000
-DISCOUNT_FACTOR = 0.99
+DISCOUNT_FACTOR = 0.95
 UPDATE_DIFFICULTY_INTERVAL = 1000
 ROLLOUTS = 4000
 STATE_TYPE = torch.float

diff --git a/agent/configs/reference/mnih_2013_config.py b/agent/configs/reference/mnih_2013_config.py
@@ -61,7 +61,7 @@
 SYNC_TARGET_MODEL_FREQUENCY = 10000
 REPLAY_MEMORY_SIZE = 1000000
 INITIAL_OBSERVATION_PERIOD = 50000
-DISCOUNT_FACTOR = 0.99
+DISCOUNT_FACTOR = 0.95
 
 # Optimiser
 LEARNING_RATE = 0.00025

diff --git a/agent/memory/data_structures/expandable_circular_buffer.py b/agent/memory/data_structures/expandable_circular_buffer.py
@@ -38,7 +38,7 @@ def _sample(self, req_num=None):
       num_entries = len(self._memory)
 
       if req_num > num_entries:
-        logging.warning(f'Buffer only has {num_entries},'
+        logging.info(f'Buffer only has {num_entries},'
                         f' returning {num_entries} entries'
                         f' of the requested {req_num}')
         req_num = len(self._memory)