Skip to content

Commit

Permalink
Merge pull request #266 from kengz/fix-cuda-id
Browse files Browse the repository at this point in the history
Atari fitness, CUDA fix
  • Loading branch information
kengz committed Jan 6, 2019
2 parents 8b54e41 + 890ad02 commit 7b1735a
Show file tree
Hide file tree
Showing 19 changed files with 218 additions and 19 deletions.
2 changes: 1 addition & 1 deletion environment.yml
Expand Up @@ -40,7 +40,7 @@ dependencies:
- urllib3=1.22=py36_0
- wheel=0.30.0=py36_2
- xlrd=1.1.0=py_2
- pytorch=1.0.0
- pytorch-nightly=1.0.0
- torchvision=0.2.1
- pip:
- atari-py==0.1.1
Expand Down
3 changes: 1 addition & 2 deletions slm_lab/lib/util.py
Expand Up @@ -616,7 +616,6 @@ def to_json(d, indent=2):
return json.dumps(d, indent=indent, cls=LabJsonEncoder)



def to_render():
return get_lab_mode() in ('dev', 'enjoy') and os.environ.get('RENDER', 'true') == 'true'

Expand All @@ -642,12 +641,12 @@ def try_set_cuda_id(spec, info_space):
trial_idx = info_space.get('trial') or 0
session_idx = info_space.get('session') or 0
job_idx = trial_idx * spec['meta']['max_session'] + session_idx
job_idx += int(os.environ.get('CUDA_ID_OFFSET', 0))
device_count = torch.cuda.device_count()
if device_count == 0:
cuda_id = None
else:
cuda_id = job_idx % device_count
cuda_id += int(os.environ.get('CUDA_ID_OFFSET', 0))

for agent_spec in spec['agent']:
agent_spec['net']['cuda_id'] = cuda_id
Expand Down
42 changes: 41 additions & 1 deletion slm_lab/spec/_fitness_std.json
Expand Up @@ -53,5 +53,45 @@
"rand_epi_reward": -250,
"std_epi_reward": 200,
"std_timestep": 150000
}
},
"BeamRiderNoFrameskip-v4": {
"rand_epi_reward": 363.9,
"std_epi_reward": 6846,
"std_timestep": 1000000
},
"BreakoutNoFrameskip-v4": {
"rand_epi_reward": 1.7,
"std_epi_reward": 401.2,
"std_timestep": 1000000
},
"EnduroNoFrameskip-v4": {
"rand_epi_reward": 0,
"std_epi_reward": 301.8,
"std_timestep": 1000000
},
"MsPacmanNoFrameskip-v4": {
"rand_epi_reward": 307.3,
"std_epi_reward": 2311,
"std_timestep": 1000000
},
"PongNoFrameskip-v4": {
"rand_epi_reward": -20.7,
"std_epi_reward": 18.9,
"std_timestep": 1000000
},
"QbertNoFrameskip-v4": {
"rand_epi_reward": 163.9,
"std_epi_reward": 10596,
"std_timestep": 1000000
},
"SeaquestNoFrameskip-v4": {
"rand_epi_reward": 68.4,
"std_epi_reward": 5286,
"std_timestep": 1000000
},
"SpaceInvadersNoFrameskip-v4": {
"rand_epi_reward": 148,
"std_epi_reward": 1976,
"std_timestep": 1000000
},
}
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/ddqn_beamrider.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/ddqn_breakout.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/ddqn_enduro.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down
80 changes: 80 additions & 0 deletions slm_lab/spec/experimental/ddqn_mspacman.json
@@ -0,0 +1,80 @@
{
"ddqn_mspacman": {
"agent": [{
"name": "DoubleDQN",
"algorithm": {
"name": "DoubleDQN",
"action_pdtype": "Argmax",
"action_policy": "epsilon_greedy",
"explore_var_spec": {
"name": "linear_decay",
"start_val": 1.0,
"end_val": 0.05,
"start_step": 30000,
"end_step": 1000000
},
"gamma": 0.99,
"training_batch_epoch": 1,
"training_epoch": 1,
"training_frequency": 4,
"training_start_step": 30000,
"normalize_state": false
},
"memory": {
"name": "AtariReplay",
"batch_size": 32,
"max_size": 250000,
"stack_len": 4,
"use_cer": false
},
"net": {
"type": "ConvNet",
"conv_hid_layers": [
[32, 8, 4, 0, 1],
[64, 4, 2, 0, 1],
[64, 3, 1, 0, 1]
],
"fc_hid_layers": [512],
"hid_layers_activation": "relu",
"init_fn": null,
"batch_norm": false,
"clip_grad_val": 1.0,
"loss_spec": {
"name": "SmoothL1Loss"
},
"optim_spec": {
"name": "RMSprop",
"lr": 2.5e-4,
"alpha": 0.95,
"eps": 1e-1,
"momentum": 0.95
},
"lr_scheduler_spec": null,
"update_type": "replace",
"update_frequency": 10000,
"gpu": true
}
}],
"env": [{
"name": "MsPacmanNoFrameskip-v4",
"max_t": null,
"max_tick_unit": "total_t",
"max_tick": 10000000,
"save_frequency": 100000
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
"resources": {
"num_cpus": 16
}
}
}
}
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/ddqn_pong.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/ddqn_qbert.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/ddqn_seaquest.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/ddqn_spaceinvaders.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down
4 changes: 2 additions & 2 deletions slm_lab/spec/experimental/dqn_beamrider.json
Expand Up @@ -71,7 +71,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 12,
"search": "RandomSearch",
"resources": {
Expand Down Expand Up @@ -162,7 +162,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 12,
"training_eval": true,
"search": "RandomSearch",
Expand Down
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/dqn_breakout.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/dqn_enduro.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down
80 changes: 80 additions & 0 deletions slm_lab/spec/experimental/dqn_mspacman.json
@@ -0,0 +1,80 @@
{
"dqn_mspacman": {
"agent": [{
"name": "DQN",
"algorithm": {
"name": "DQN",
"action_pdtype": "Argmax",
"action_policy": "epsilon_greedy",
"explore_var_spec": {
"name": "linear_decay",
"start_val": 1.0,
"end_val": 0.05,
"start_step": 30000,
"end_step": 1000000
},
"gamma": 0.99,
"training_batch_epoch": 1,
"training_epoch": 1,
"training_frequency": 4,
"training_start_step": 30000,
"normalize_state": false
},
"memory": {
"name": "AtariReplay",
"batch_size": 32,
"max_size": 250000,
"stack_len": 4,
"use_cer": false
},
"net": {
"type": "ConvNet",
"conv_hid_layers": [
[32, 8, 4, 0, 1],
[64, 4, 2, 0, 1],
[64, 3, 1, 0, 1]
],
"fc_hid_layers": [512],
"hid_layers_activation": "relu",
"init_fn": null,
"batch_norm": false,
"clip_grad_val": 1.0,
"loss_spec": {
"name": "SmoothL1Loss"
},
"optim_spec": {
"name": "RMSprop",
"lr": 2.5e-4,
"alpha": 0.95,
"eps": 1e-1,
"momentum": 0.95
},
"lr_scheduler_spec": null,
"update_type": "replace",
"update_frequency": 10000,
"gpu": true
}
}],
"env": [{
"name": "MsPacmanNoFrameskip-v4",
"max_t": null,
"max_tick_unit": "total_t",
"max_tick": 10000000,
"save_frequency": 100000
}],
"body": {
"product": "outer",
"num": 1
},
"meta": {
"distributed": false,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
"resources": {
"num_cpus": 16
}
}
}
}
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/dqn_pong.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/dqn_qbert.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/dqn_seaquest.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down
2 changes: 1 addition & 1 deletion slm_lab/spec/experimental/dqn_spaceinvaders.json
Expand Up @@ -68,7 +68,7 @@
},
"meta": {
"distributed": false,
"max_session": 1,
"max_session": 6,
"max_trial": 16,
"training_eval": true,
"search": "RandomSearch",
Expand Down

0 comments on commit 7b1735a

Please sign in to comment.