Skip to content

Commit

Permalink
Tensorboard entry point
Browse files Browse the repository at this point in the history
  • Loading branch information
cnheider committed Jul 4, 2019
1 parent 5b8bc24 commit 6a596e8
Show file tree
Hide file tree
Showing 31 changed files with 259 additions and 220 deletions.
2 changes: 1 addition & 1 deletion .github/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ These guidelines are universal for the entire [Neodroid](https://github.com/sint
## Pull Requests
We welcome pull requests.

1. Fork the repo and create your branch from `master`.
1. Fork the repo and create your branch from `develop`, we are using the git flow branching model.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.

Expand Down
172 changes: 0 additions & 172 deletions NeodroidAgentPackage.py

This file was deleted.

2 changes: 1 addition & 1 deletion agent/agents/ddpg_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __defaults__(self) -> None:
'output_shape':None, # Obtain from environment
}))

self._discount_factor = 0.99
self._discount_factor = 0.95

self._initial_observation_period = 10000
self._learning_frequency = 4
Expand Down
4 changes: 2 additions & 2 deletions agent/agents/dqn_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __defaults__(self) -> None:

self._batch_size = 128

self._discount_factor = 0.99
self._discount_factor = 0.95
self._learning_frequency = 1
self._initial_observation_period = 0
self._sync_target_model_frequency = 1000
Expand Down Expand Up @@ -171,7 +171,7 @@ def update_models(self, *, stat_writer=None, **kwargs):

# self._memory.batch_update(indices, td_error.tolist()) # Cuda trouble
else:
logging.warning('Batch size is larger than current memory size')
logging.info('Batch size is larger than current memory size, skipping update')

def rollout(self,
initial_state,
Expand Down
2 changes: 1 addition & 1 deletion agent/agents/pg_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __defaults__(self) -> None:
}))

self._use_cuda = False
self._discount_factor = 0.99
self._discount_factor = 0.95
self._use_batched_updates = False
self._batch_size = 5
self._policy_entropy_regularisation = 1
Expand Down
2 changes: 1 addition & 1 deletion agent/agents/ppo_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class PPOAgent(ActorCriticAgent):
def __defaults__(self) -> None:
self._steps = 10

self._discount_factor = 0.99
self._discount_factor = 0.95
self._gae_tau = 0.95
# self._reached_horizon_penalty = -10.

Expand Down
2 changes: 1 addition & 1 deletion agent/architectures/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(self,
output_shape: Sequence = (2,),
use_bias: bool = True,
auto_build_hidden_layers_if_none=True,
input_multiplier=16,
input_multiplier=32,
output_multiplier=16,
**kwargs
):
Expand Down
10 changes: 0 additions & 10 deletions agent/configs/agent_test_configs/base_test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,11 @@
Author: Christian Heider Nielsen
'''





from agent.configs.base_config import *

CONFIG_NAME = __name__
CONFIG_FILE = __file__


ENVIRONMENT_NAME = 'CartPole-v1'
# ENVIRONMENT_NAME = 'Acrobot-v1'
# ENVIRONMENT_NAME = 'MountainCar-v0'

# Mujoco
# ENVIRONMENT_NAME = 'FetchPickAndPlace-v0'
# ENVIRONMENT_NAME = 'FetchReach-v1'
Expand Down Expand Up @@ -68,4 +59,3 @@
)

ROLLOUTS = 10000

2 changes: 1 addition & 1 deletion agent/configs/agent_test_configs/ddpg_test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
OPTIMISER_LEARNING_RATE = 0.00025
OPTIMISER_WEIGHT_DECAY = 1e-5
OPTIMISER_ALPHA = 0.95
DISCOUNT_FACTOR = 0.99
DISCOUNT_FACTOR = 0.95
TARGET_UPDATE_TAU = 3e-3
RENDER_FREQUENCY = 5
RENDER_ENVIRONMENT = True
Expand Down
7 changes: 6 additions & 1 deletion agent/configs/agent_test_configs/dqn_test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,19 @@
CONFIG_NAME = __name__
CONFIG_FILE = __file__

ENVIRONMENT_NAME = 'CartPole-v1'
MODEL_DIRECTORY = PROJECT_APP_PATH.user_data / ENVIRONMENT_NAME / LOAD_TIME / 'models'
CONFIG_DIRECTORY = PROJECT_APP_PATH.user_data / ENVIRONMENT_NAME / LOAD_TIME / 'configs'
LOG_DIRECTORY = PROJECT_APP_PATH.user_log / ENVIRONMENT_NAME / LOAD_TIME

INITIAL_OBSERVATION_PERIOD = 0
LEARNING_FREQUENCY = 1
REPLAY_MEMORY_SIZE = 10000
MEMORY = ReplayBuffer(REPLAY_MEMORY_SIZE)
EXPLORATION_SPEC = ExplorationSpecification(0.99, 0.05, 10000)

BATCH_SIZE = 128
DISCOUNT_FACTOR = 0.999
DISCOUNT_FACTOR = 0.95
RENDER_ENVIRONMENT = True
SIGNAL_CLIPPING = True
DOUBLE_DQN = True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
MEMORY = ReplayBuffer(REPLAY_MEMORY_SIZE)

BATCH_SIZE = 128
DISCOUNT_FACTOR = 0.999
DISCOUNT_FACTOR = 0.95
RENDER_ENVIRONMENT = False
SIGNAL_CLIPPING = True
DOUBLE_DQN = True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

EVALUATION_FUNCTION = torch.nn.CrossEntropyLoss

DISCOUNT_FACTOR = 0.99
DISCOUNT_FACTOR = 0.95
OPTIMISER_LEARNING_RATE = 1e-4
PG_ENTROPY_REG = 1e-4

Expand Down
9 changes: 7 additions & 2 deletions agent/configs/agent_test_configs/pg_test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,18 @@

EVALUATION_FUNCTION = torch.nn.CrossEntropyLoss

DISCOUNT_FACTOR = 0.98
ENVIRONMENT_NAME = 'CartPole-v1'
MODEL_DIRECTORY = PROJECT_APP_PATH.user_data / ENVIRONMENT_NAME / LOAD_TIME / 'models'
CONFIG_DIRECTORY = PROJECT_APP_PATH.user_data / ENVIRONMENT_NAME / LOAD_TIME / 'configs'
LOG_DIRECTORY = PROJECT_APP_PATH.user_log / ENVIRONMENT_NAME / LOAD_TIME

DISCOUNT_FACTOR = 0.95
PG_ENTROPY_REG = 3e-3

# Architecture
POLICY_ARCH_SPEC = GDCS(MLP, NOD(**{
'input_shape': None, # Obtain from environment
'hidden_layer_activation':torch.tanh,
'hidden_layer_activation':torch.relu,
'hidden_layers': None,
'output_shape': None, # Obtain from environment
'use_bias': True,
Expand Down
6 changes: 3 additions & 3 deletions agent/configs/agent_test_configs/ppo_test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
MAX_GRADIENT_NORM = None

GAE_TAU = 0.95
DISCOUNT_FACTOR = 0.99
DISCOUNT_FACTOR = 0.95

REACHED_HORIZON_PENALTY = -10.

Expand All @@ -57,14 +57,14 @@
# Architecture
ACTOR_ARCH_SPEC = GDCS(ContinuousActorArchitecture, NOD(**{
'input_shape': None, # Obtain from environment
'hidden_layers': [256],
'hidden_layers': None,
'hidden_layer_activation':torch.relu,
'output_shape': None, # Obtain from environment
}))

CRITIC_ARCH_SPEC = GDCS(MLP, NOD(**{
'input_shape': None, # Obtain from environment
'hidden_layers': [256],
'hidden_layers': None,
'hidden_layer_activation':torch.relu,
'output_shape': None, # Obtain from environment
}))
Expand Down
4 changes: 2 additions & 2 deletions agent/configs/base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
)

# Environment Related Parameters
ENVIRONMENT_NAME = 'grd'
ENVIRONMENT_NAME = 'ConnectToRunning'
CONNECT_TO_RUNNING = False
RENDER_ENVIRONMENT = False
SOLVED_REWARD = 0.9
Expand All @@ -58,7 +58,7 @@
SYNC_TARGET_MODEL_FREQUENCY = 10000
REPLAY_MEMORY_SIZE = 1000000
INITIAL_OBSERVATION_PERIOD = 10000
DISCOUNT_FACTOR = 0.99
DISCOUNT_FACTOR = 0.95
UPDATE_DIFFICULTY_INTERVAL = 1000
ROLLOUTS = 4000
STATE_TYPE = torch.float
Expand Down
2 changes: 1 addition & 1 deletion agent/configs/reference/mnih_2013_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
SYNC_TARGET_MODEL_FREQUENCY = 10000
REPLAY_MEMORY_SIZE = 1000000
INITIAL_OBSERVATION_PERIOD = 50000
DISCOUNT_FACTOR = 0.99
DISCOUNT_FACTOR = 0.95

# Optimiser
LEARNING_RATE = 0.00025
Expand Down
2 changes: 1 addition & 1 deletion agent/memory/data_structures/expandable_circular_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def _sample(self, req_num=None):
num_entries = len(self._memory)

if req_num > num_entries:
logging.warning(f'Buffer only has {num_entries},'
logging.info(f'Buffer only has {num_entries},'
f' returning {num_entries} entries'
f' of the requested {req_num}')
req_num = len(self._memory)
Expand Down
Loading

0 comments on commit 6a596e8

Please sign in to comment.