diff --git a/environment.yml b/environment.yml index e8948bc7c..24509f44d 100644 --- a/environment.yml +++ b/environment.yml @@ -40,7 +40,7 @@ dependencies: - urllib3=1.22=py36_0 - wheel=0.30.0=py36_2 - xlrd=1.1.0=py_2 - - pytorch=1.0.0 + - pytorch-nightly=1.0.0 - torchvision=0.2.1 - pip: - atari-py==0.1.1 diff --git a/slm_lab/lib/util.py b/slm_lab/lib/util.py index 384907bb7..01c9136fd 100644 --- a/slm_lab/lib/util.py +++ b/slm_lab/lib/util.py @@ -616,7 +616,6 @@ def to_json(d, indent=2): return json.dumps(d, indent=indent, cls=LabJsonEncoder) - def to_render(): return get_lab_mode() in ('dev', 'enjoy') and os.environ.get('RENDER', 'true') == 'true' @@ -642,12 +641,12 @@ def try_set_cuda_id(spec, info_space): trial_idx = info_space.get('trial') or 0 session_idx = info_space.get('session') or 0 job_idx = trial_idx * spec['meta']['max_session'] + session_idx + job_idx += int(os.environ.get('CUDA_ID_OFFSET', 0)) device_count = torch.cuda.device_count() if device_count == 0: cuda_id = None else: cuda_id = job_idx % device_count - cuda_id += int(os.environ.get('CUDA_ID_OFFSET', 0)) for agent_spec in spec['agent']: agent_spec['net']['cuda_id'] = cuda_id diff --git a/slm_lab/spec/_fitness_std.json b/slm_lab/spec/_fitness_std.json index 0b9c2230a..50e60c1b1 100644 --- a/slm_lab/spec/_fitness_std.json +++ b/slm_lab/spec/_fitness_std.json @@ -53,5 +53,45 @@ "rand_epi_reward": -250, "std_epi_reward": 200, "std_timestep": 150000 - } + }, + "BeamRiderNoFrameskip-v4": { + "rand_epi_reward": 363.9, + "std_epi_reward": 6846, + "std_timestep": 1000000 + }, + "BreakoutNoFrameskip-v4": { + "rand_epi_reward": 1.7, + "std_epi_reward": 401.2, + "std_timestep": 1000000 + }, + "EnduroNoFrameskip-v4": { + "rand_epi_reward": 0, + "std_epi_reward": 301.8, + "std_timestep": 1000000 + }, + "MsPacmanNoFrameskip-v4": { + "rand_epi_reward": 307.3, + "std_epi_reward": 2311, + "std_timestep": 1000000 + }, + "PongNoFrameskip-v4": { + "rand_epi_reward": -20.7, + "std_epi_reward": 18.9, + "std_timestep": 1000000 + }, + "QbertNoFrameskip-v4": { + "rand_epi_reward": 163.9, + "std_epi_reward": 10596, + "std_timestep": 1000000 + }, + "SeaquestNoFrameskip-v4": { + "rand_epi_reward": 68.4, + "std_epi_reward": 5286, + "std_timestep": 1000000 + }, + "SpaceInvadersNoFrameskip-v4": { + "rand_epi_reward": 148, + "std_epi_reward": 1976, + "std_timestep": 1000000 + }, } diff --git a/slm_lab/spec/experimental/ddqn_beamrider.json b/slm_lab/spec/experimental/ddqn_beamrider.json index 6abee04b6..37c15432d 100644 --- a/slm_lab/spec/experimental/ddqn_beamrider.json +++ b/slm_lab/spec/experimental/ddqn_beamrider.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/ddqn_breakout.json b/slm_lab/spec/experimental/ddqn_breakout.json index 1045de1a9..15617386e 100644 --- a/slm_lab/spec/experimental/ddqn_breakout.json +++ b/slm_lab/spec/experimental/ddqn_breakout.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/ddqn_enduro.json b/slm_lab/spec/experimental/ddqn_enduro.json index 7047fe702..d967ead20 100644 --- a/slm_lab/spec/experimental/ddqn_enduro.json +++ b/slm_lab/spec/experimental/ddqn_enduro.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/ddqn_mspacman.json b/slm_lab/spec/experimental/ddqn_mspacman.json new file mode 100644 index 000000000..2fe49376a --- /dev/null +++ b/slm_lab/spec/experimental/ddqn_mspacman.json @@ -0,0 +1,80 @@ +{ + "ddqn_mspacman": { + "agent": [{ + "name": "DoubleDQN", + "algorithm": { + "name": "DoubleDQN", + "action_pdtype": "Argmax", + "action_policy": "epsilon_greedy", + "explore_var_spec": { + "name": "linear_decay", + "start_val": 1.0, + "end_val": 0.05, + "start_step": 30000, + "end_step": 1000000 + }, + "gamma": 0.99, + "training_batch_epoch": 1, + "training_epoch": 1, + "training_frequency": 4, + "training_start_step": 30000, + "normalize_state": false + }, + "memory": { + "name": "AtariReplay", + "batch_size": 32, + "max_size": 250000, + "stack_len": 4, + "use_cer": false + }, + "net": { + "type": "ConvNet", + "conv_hid_layers": [ + [32, 8, 4, 0, 1], + [64, 4, 2, 0, 1], + [64, 3, 1, 0, 1] + ], + "fc_hid_layers": [512], + "hid_layers_activation": "relu", + "init_fn": null, + "batch_norm": false, + "clip_grad_val": 1.0, + "loss_spec": { + "name": "SmoothL1Loss" + }, + "optim_spec": { + "name": "RMSprop", + "lr": 2.5e-4, + "alpha": 0.95, + "eps": 1e-1, + "momentum": 0.95 + }, + "lr_scheduler_spec": null, + "update_type": "replace", + "update_frequency": 10000, + "gpu": true + } + }], + "env": [{ + "name": "MsPacmanNoFrameskip-v4", + "max_t": null, + "max_tick_unit": "total_t", + "max_tick": 10000000, + "save_frequency": 100000 + }], + "body": { + "product": "outer", + "num": 1 + }, + "meta": { + "distributed": false, + "max_session": 6, + "max_trial": 16, + "training_eval": true, + "search": "RandomSearch", + "resources": { + "num_cpus": 16 + } + } + } +} diff --git a/slm_lab/spec/experimental/ddqn_pong.json b/slm_lab/spec/experimental/ddqn_pong.json index b4097ce89..7be6b52a6 100644 --- a/slm_lab/spec/experimental/ddqn_pong.json +++ b/slm_lab/spec/experimental/ddqn_pong.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/ddqn_qbert.json b/slm_lab/spec/experimental/ddqn_qbert.json index 2a5c2b05a..45a0aaf56 100644 --- a/slm_lab/spec/experimental/ddqn_qbert.json +++ b/slm_lab/spec/experimental/ddqn_qbert.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/ddqn_seaquest.json b/slm_lab/spec/experimental/ddqn_seaquest.json index ce81b7f5a..91ff09e12 100644 --- a/slm_lab/spec/experimental/ddqn_seaquest.json +++ b/slm_lab/spec/experimental/ddqn_seaquest.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/ddqn_spaceinvaders.json b/slm_lab/spec/experimental/ddqn_spaceinvaders.json index ea4c9dd72..2e7b5229e 100644 --- a/slm_lab/spec/experimental/ddqn_spaceinvaders.json +++ b/slm_lab/spec/experimental/ddqn_spaceinvaders.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/dqn_beamrider.json b/slm_lab/spec/experimental/dqn_beamrider.json index 53a16ab1b..5d3bd8e09 100644 --- a/slm_lab/spec/experimental/dqn_beamrider.json +++ b/slm_lab/spec/experimental/dqn_beamrider.json @@ -71,7 +71,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 12, "search": "RandomSearch", "resources": { @@ -162,7 +162,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 12, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/dqn_breakout.json b/slm_lab/spec/experimental/dqn_breakout.json index 2f3b84b8b..f4fa85ba3 100644 --- a/slm_lab/spec/experimental/dqn_breakout.json +++ b/slm_lab/spec/experimental/dqn_breakout.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/dqn_enduro.json b/slm_lab/spec/experimental/dqn_enduro.json index a03ef5170..a4e03576e 100644 --- a/slm_lab/spec/experimental/dqn_enduro.json +++ b/slm_lab/spec/experimental/dqn_enduro.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/dqn_mspacman.json b/slm_lab/spec/experimental/dqn_mspacman.json new file mode 100644 index 000000000..da4f913fe --- /dev/null +++ b/slm_lab/spec/experimental/dqn_mspacman.json @@ -0,0 +1,80 @@ +{ + "dqn_mspacman": { + "agent": [{ + "name": "DQN", + "algorithm": { + "name": "DQN", + "action_pdtype": "Argmax", + "action_policy": "epsilon_greedy", + "explore_var_spec": { + "name": "linear_decay", + "start_val": 1.0, + "end_val": 0.05, + "start_step": 30000, + "end_step": 1000000 + }, + "gamma": 0.99, + "training_batch_epoch": 1, + "training_epoch": 1, + "training_frequency": 4, + "training_start_step": 30000, + "normalize_state": false + }, + "memory": { + "name": "AtariReplay", + "batch_size": 32, + "max_size": 250000, + "stack_len": 4, + "use_cer": false + }, + "net": { + "type": "ConvNet", + "conv_hid_layers": [ + [32, 8, 4, 0, 1], + [64, 4, 2, 0, 1], + [64, 3, 1, 0, 1] + ], + "fc_hid_layers": [512], + "hid_layers_activation": "relu", + "init_fn": null, + "batch_norm": false, + "clip_grad_val": 1.0, + "loss_spec": { + "name": "SmoothL1Loss" + }, + "optim_spec": { + "name": "RMSprop", + "lr": 2.5e-4, + "alpha": 0.95, + "eps": 1e-1, + "momentum": 0.95 + }, + "lr_scheduler_spec": null, + "update_type": "replace", + "update_frequency": 10000, + "gpu": true + } + }], + "env": [{ + "name": "MsPacmanNoFrameskip-v4", + "max_t": null, + "max_tick_unit": "total_t", + "max_tick": 10000000, + "save_frequency": 100000 + }], + "body": { + "product": "outer", + "num": 1 + }, + "meta": { + "distributed": false, + "max_session": 6, + "max_trial": 16, + "training_eval": true, + "search": "RandomSearch", + "resources": { + "num_cpus": 16 + } + } + } +} diff --git a/slm_lab/spec/experimental/dqn_pong.json b/slm_lab/spec/experimental/dqn_pong.json index e6ac47d44..793d18ec9 100644 --- a/slm_lab/spec/experimental/dqn_pong.json +++ b/slm_lab/spec/experimental/dqn_pong.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/dqn_qbert.json b/slm_lab/spec/experimental/dqn_qbert.json index 40729bdab..16460893a 100644 --- a/slm_lab/spec/experimental/dqn_qbert.json +++ b/slm_lab/spec/experimental/dqn_qbert.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/dqn_seaquest.json b/slm_lab/spec/experimental/dqn_seaquest.json index 7cdd72318..fd5022c82 100644 --- a/slm_lab/spec/experimental/dqn_seaquest.json +++ b/slm_lab/spec/experimental/dqn_seaquest.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch", diff --git a/slm_lab/spec/experimental/dqn_spaceinvaders.json b/slm_lab/spec/experimental/dqn_spaceinvaders.json index e34d3ef90..fd34f07c2 100644 --- a/slm_lab/spec/experimental/dqn_spaceinvaders.json +++ b/slm_lab/spec/experimental/dqn_spaceinvaders.json @@ -68,7 +68,7 @@ }, "meta": { "distributed": false, - "max_session": 1, + "max_session": 6, "max_trial": 16, "training_eval": true, "search": "RandomSearch",