In [1]:
import numpy as np
import tensorflow as tf
sess = tf.InteractiveSession()

In [2]:
VECTOR_SIZE = 3
MOVE_LEFT = 0
MOVE_RIGHT = 1

Demonstrate

- Creating a vector of zeros
- Updating one of those zeros to one.

This is useful for creating one-hot vector representations.

In [3]:
def get_position_vector(position):
    x = tf.Variable([0.] * VECTOR_SIZE, dtype=tf.float32)
    b = tf.scatter_update(x, [position], [1])
    return b

position = tf.Variable(0, dtype=tf.int32)
position_grid = get_position_vector(position)

position2 = position + 1
position_grid2 = get_position_vector(position2)

sess.run(tf.global_variables_initializer())  

print(position.eval())
print(position_grid.eval())
print(position2.eval())
print(position_grid2.eval())

0
[ 1.  0.  0.]
1
[ 0.  1.  0.]


Moving the position left or right, and converting to one-hot representation.

In [4]:
def move_left(position):
    return tf.maximum(position-1, tf.Variable(0, dtype=tf.int32))

def move_right(position):
    return tf.minimum(position+1, tf.Variable(VECTOR_SIZE-1, dtype=tf.int32))

position3 = move_right(position2)
position4 = move_left(position3)

position_vector3 = get_position_vector(position3)
position_vector4 = get_position_vector(position4)

sess.run(tf.global_variables_initializer()) 

print(position3.eval())
print(position_vector3.eval())

print(position4.eval())
print(position_vector4.eval())

2
[ 0.  0.  1.]
1
[ 0.  1.  0.]


The apply_action can move the position left or right, depending on what action it is given.

In [5]:
def apply_action(position, action):
    cond = tf.equal(action, MOVE_LEFT)
    return tf.cond(cond, lambda: move_left(position), lambda: move_right(position))

position5 = apply_action(position4, MOVE_LEFT)
position6 = apply_action(position5, MOVE_RIGHT)

sess.run(tf.global_variables_initializer())

print(position5.eval())
print(position6.eval())

0
1


We don't have anything to intelligently move the agent yet. So let's just create a function to move it randomly.

In [6]:
def choose_random_action():
    cond = tf.less(0.5, tf.random_uniform([1, ]))
    return tf.cond(cond[0], lambda: MOVE_LEFT, lambda: MOVE_RIGHT)

action = choose_random_action()

sess.run(tf.global_variables_initializer())

In [7]:
position7 = apply_action(position6, choose_random_action())
position8 = apply_action(position7, choose_random_action())
position9 = apply_action(position8, choose_random_action())

sess.run(tf.global_variables_initializer())

print(position7.eval())
print(position8.eval())
print(position9.eval())

0
1
0


Use tf.while_loop to move the agent at random, until it's position reaches VECTOR_SIZE-1

In [8]:
position = tf.Variable(0, tf.int32)

cond = lambda position: tf.less(position, VECTOR_SIZE-1)
op = lambda position: apply_action(position, choose_random_action())

r=tf.while_loop(cond, op, [position])

sess.run(tf.global_variables_initializer())
print(r.eval())

2


Expand the prior example so that:

 - We record all positions inside a TensorArray.
 - We return the position using TensorArray.gather.

In [9]:
MAX_EPISODE_LENGTH = 10

position = tf.Variable(0, tf.int32)
count = tf.Variable(0, tf.int32)
positions = tf.TensorArray(tf.int32, size=MAX_EPISODE_LENGTH)

# The loop can terminate if either:
# - the position reaches VECTOR_SIZE-1, which is the rightmost position in the one-hot vector representation
# - we reach MAX_EPISODE_LENGTH iterations
cond_not_terminal = lambda c, p, ps: tf.less(p, VECTOR_SIZE-1)
cond_not_max_length = lambda c, p, ps: tf.less(c, MAX_EPISODE_LENGTH)
cond = lambda c, p, ps: cond_not_terminal(c, p, ps) & cond_not_max_length(c, p, ps)

def body(c, p, positions):
    next_c = c+1
    next_p = apply_action(p, choose_random_action())
    positions = positions.write(c, next_p)
    return next_c, next_p, positions

count, position, positions = tf.while_loop(cond, body, (position, count, positions))

sess.run(tf.global_variables_initializer())

rng = tf.range(count)
tensor_positions = positions.gather(rng)

In [10]:
cnt, ps = sess.run([count, tensor_positions])
print(cnt)
print(ps)

2
[1 2]
