Skip to content
This repository has been archived by the owner on Apr 7, 2018. It is now read-only.

Commit

Permalink
Added option to toggle visualisation of the worker
Browse files Browse the repository at this point in the history
  • Loading branch information
Bharat Kunwar authored and tlbtlbtlb committed Jan 31, 2017
1 parent 3f85a63 commit a3fdfba
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 9 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ Using 16 workers, the agent should be able to solve `PongDeterministic-v3` (not
Using 32 workers, the agent is able to solve the same environment in 10 minutes on an `m4.16xlarge` instance.
If you run this experiment on a high-end MacBook Pro, the above job will take just under 2 hours to solve Pong.

Add '--visualise' toggle if you want to visualise the worker using env.render() as follows:

`python train.py --num-workers 2 --env-id PongDeterministic-v3 --log-dir /tmp/pong --visualise`

![pong](https://github.com/openai/universe-starter-agent/raw/master/imgs/tb_pong.png "Pong")

For best performance, it is recommended for the number of workers to not exceed available number of CPU cores.
Expand Down
11 changes: 6 additions & 5 deletions a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class RunnerThread(threading.Thread):
is that a universe environment is _real time_. This means that there should be a thread
that would constantly interact with the environment and tell it what to do. This thread is here.
"""
def __init__(self, env, policy, num_local_steps):
def __init__(self, env, policy, num_local_steps, visualise):
threading.Thread.__init__(self)
self.queue = queue.Queue(5)
self.num_local_steps = num_local_steps
Expand All @@ -81,6 +81,7 @@ def __init__(self, env, policy, num_local_steps):
self.daemon = True
self.sess = None
self.summary_writer = None
self.visualise = visualise

def start_runner(self, sess, summary_writer):
self.sess = sess
Expand All @@ -92,7 +93,7 @@ def run(self):
self._run()

def _run(self):
rollout_provider = env_runner(self.env, self.policy, self.num_local_steps, self.summary_writer)
rollout_provider = env_runner(self.env, self.policy, self.num_local_steps, self.summary_writer, self.visualise)
while True:
# the timeout variable exists because apparently, if one worker dies, the other workers
# won't die with it, unless the timeout is set to some large number. This is an empirical
Expand All @@ -102,7 +103,7 @@ def _run(self):



def env_runner(env, policy, num_local_steps, summary_writer, render=True):
def env_runner(env, policy, num_local_steps, summary_writer, render):
"""
The logic of the thread runner. In brief, it constantly keeps on running
the policy, and as long as the rollout exceeds a certain length, the thread
Expand Down Expand Up @@ -158,7 +159,7 @@ def env_runner(env, policy, num_local_steps, summary_writer, render=True):
yield rollout

class A3C(object):
def __init__(self, env, task):
def __init__(self, env, task, visualise):
"""
An implementation of the A3C algorithm that is reasonably well-tuned for the VNC environments.
Below, we will have a modest amount of complexity due to the way TensorFlow handles data parallelism.
Expand Down Expand Up @@ -205,7 +206,7 @@ def __init__(self, env, task):
# on the one hand; but on the other hand, we get less frequent parameter updates, which
# slows down learning. In this code, we found that making local steps be much
# smaller than 20 makes the algorithm more difficult to tune and to get to work.
self.runner = RunnerThread(env, pi, 20)
self.runner = RunnerThread(env, pi, 20, visualise)


grads = tf.gradients(self.loss, pi.var_list)
Expand Down
14 changes: 11 additions & 3 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
parser.add_argument('-m', '--mode', type=str, default='tmux',
help="tmux: run workers in a tmux session. nohup: run workers with nohup. child: run workers as child processes")

# Add visualise tag
parser.add_argument('--visualise', action='store_true',
help="Visualise the gym environment by running env.render() between each timestep")


def new_cmd(session, name, cmd, mode, logdir, shell):
if isinstance(cmd, (list, tuple)):
Expand All @@ -30,14 +34,18 @@ def new_cmd(session, name, cmd, mode, logdir, shell):
return name, "nohup {} -c {} >{}/{}.{}.out 2>&1 & echo kill $! >>{}/kill.sh".format(shell, shlex_quote(cmd), logdir, session, name, logdir)


def create_commands(session, num_workers, remotes, env_id, logdir, shell='bash', mode='tmux'):
def create_commands(session, num_workers, remotes, env_id, logdir, shell='bash', mode='tmux', visualise=False):
# for launching the TF workers and for launching tensorboard
base_cmd = [
'CUDA_VISIBLE_DEVICES=',
sys.executable, 'worker.py',
'--log-dir', logdir, '--env-id', env_id,
'--log-dir', logdir,
'--env-id', env_id,
'--num-workers', str(num_workers)]

if visualise:
base_cmd += ['--visualise']

if remotes is None:
remotes = ["1"] * num_workers
else:
Expand Down Expand Up @@ -86,7 +94,7 @@ def create_commands(session, num_workers, remotes, env_id, logdir, shell='bash',

def run():
args = parser.parse_args()
cmds, notes = create_commands("a3c", args.num_workers, args.remotes, args.env_id, args.log_dir, mode=args.mode)
cmds, notes = create_commands("a3c", args.num_workers, args.remotes, args.env_id, args.log_dir, mode=args.mode, visualise=args.visualise)
if args.dry_run:
print("Dry-run mode due to -n flag, otherwise the following commands would be executed:")
else:
Expand Down
6 changes: 5 additions & 1 deletion worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def save(self, sess, save_path, global_step=None, latest_filename=None,

def run(args, server):
env = create_env(args.env_id, client_id=str(args.task), remotes=args.remotes)
trainer = A3C(env, args.task)
trainer = A3C(env, args.task, args.visualise)

# Variable names that start with "local" are not saved in checkpoints.
if use_tf12_api:
Expand Down Expand Up @@ -122,6 +122,10 @@ def main(_):
'or the address of pre-existing VNC servers and '
'rewarders to use (e.g. -r vnc://localhost:5900+15900,vnc://localhost:5901+15901)')

# Add visualisation argument
parser.add_argument('--visualise', action='store_true',
help="Visualise the gym environment by running env.render() between each timestep")

args = parser.parse_args()
spec = cluster_spec(args.num_workers, 1)
cluster = tf.train.ClusterSpec(spec).as_cluster_def()
Expand Down

0 comments on commit a3fdfba

Please sign in to comment.