## GA3C-CADRL
#### GPU/CPU Asynchronous Advantage Actor-Critic for Collision Avoidance with Deep Reinforcement Learning
Michael Everett, Yu Fan Chen, and Jonathan P. How<br>
Manuscript submitted to 2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)

**Objective:** This goal of this notebook is to explain how to use our code, enabling other researchers to test and compare against the results presented in the paper. After reading this notebook, it should also be clear how our code could be implemented on your own system (i.e. what format you should provide as input, and what information you'll get as output)

## Create an instance of an Agent
The most important class is Agent, which has attributes such as radius and position, and methods such as find_next_action. The environment is made up of several Agents.

In [1]:
import agent
import network
import util
import numpy as np

### Load trained network

In [2]:
possible_actions = network.Actions()
num_actions = possible_actions.num_actions
nn = network.NetworkVP_rnn(network.Config.DEVICE, 'network', num_actions)
nn.simple_load('../checkpoints/network_01900000')

INFO:tensorflow:Scale of 0 disables regularizer.
INFO:tensorflow:Restoring parameters from ../checkpoints/network_01900000


### Set current state of host agent

In [3]:
start_x = 2
start_y = 5
goal_x = 3
goal_y = 2
radius = 0.5
pref_speed = 1.2
heading_angle = 0
index = 0
v_x = 0
v_y = 0

host_agent = agent.Agent(start_x, start_y, goal_x, goal_y, radius, pref_speed, heading_angle, index)
host_agent.vel_global_frame = np.array([v_x, v_y])

### Set current state of other agents

In [4]:
# Sample observation data in a format easily generated from sensors
other_agents_x = [-1,-2,-3]
other_agents_y = [2,3,4]
other_agents_r = [0.5, 0.4, 0.3]
other_agents_vx = [1.0, 0.6, 0.2]
other_agents_vy = [0.0, 0.6, 0.8]
num_other_agents = len(other_agents_x)

# Create Agent objects for each observed dynamic obstacle
other_agents = []
for i in range(num_other_agents):
    x = other_agents_x[i]; y = other_agents_y[i]
    v_x = other_agents_vx[i]; v_y = other_agents_vy[i]
    radius = other_agents_r[i]
    
    # dummy info - unobservable states not used by NN, just needed to create Agent object
    heading_angle = np.arctan2(v_y, v_x) 
    pref_speed = np.linalg.norm(np.array([v_x, v_y]))
    goal_x = x + 5.0; goal_y = y + 5.0
    
    other_agents.append(agent.Agent(x, y, goal_x, goal_y, radius, pref_speed, heading_angle, i+1))

### Convert agent states into observation vector

In [5]:
obs = host_agent.observe(other_agents)[1:]
obs = np.expand_dims(obs, axis=0)

### Query the policy based on observation vector

In [6]:
predictions = nn.predict_p(obs, None)[0]
print(predictions)
predictions = nn.predict_p(obs, None)[0]
print(predictions)
predictions = nn.predict_p(obs, None)[0]
print(predictions)
raw_action = possible_actions.actions[np.argmax(predictions)]
action = np.array([host_agent.pref_speed*raw_action[0], util.wrap(raw_action[1] + host_agent.heading_global_frame)])
print "action:", action

InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float
	 [[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=<unknown>, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op u'Placeholder', defined at:
  File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 1017, in start
    self._run_callback(self._callbacks.popleft())
  File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 536, in <lambda>
    self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2714, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2818, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2878, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-680eb6d117a0>", line 3, in <module>
    nn = network.NetworkVP_rnn(network.Config.DEVICE, 'network', num_actions)
  File "network.py", line 64, in __init__
    super(self.__class__, self).__init__(device, model_name, num_actions)
  File "network.py", line 27, in __init__
    self._create_graph()
  File "network.py", line 68, in _create_graph
    self._create_graph_inputs()
  File "network.py", line 44, in _create_graph_inputs
    self.keep_prob = tf.placeholder(tf.float32)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 1735, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 4925, in placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Placeholder' with dtype float
	 [[Node: Placeholder = Placeholder[dtype=DT_FLOAT, shape=<unknown>, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
