In [1]:
from rl.lib.timer import Timer

import numpy as np
import tensorflow as tf
sess = tf.InteractiveSession()

NUM_THREADS = 5
sess2 = tf.Session(config=tf.ConfigProto(
    intra_op_parallelism_threads=NUM_THREADS))

sess.run(tf.global_variables_initializer())

In [2]:
VECTOR_SIZE = 3
MOVE_LEFT = 0
MOVE_RIGHT = 1

Demonstrate

- Creating a vector of zeros
- Updating one of those zeros to one.

This is useful for creating one-hot vector representations.

In [3]:
def get_position_vector(position):
    x = tf.Variable([0.] * VECTOR_SIZE, dtype=tf.float32)
    b = tf.scatter_update(x, [position], [1])
    return b

position = tf.Variable(0, dtype=tf.int32)
position_grid = get_position_vector(position)

position2 = position + 1
position_grid2 = get_position_vector(position2)

sess.run(tf.global_variables_initializer())  

print(position.eval())
print(position_grid.eval())
print(position2.eval())
print(position_grid2.eval())

0
[ 1.  0.  0.]
1
[ 0.  1.  0.]


Moving the position left or right, and converting to one-hot representation.

In [4]:
def move_left(position):
    return tf.maximum(position-1, tf.Variable(0, dtype=tf.int32))

def move_right(position):
    return tf.minimum(position+1, tf.Variable(VECTOR_SIZE-1, dtype=tf.int32))

position3 = move_right(position2)
position4 = move_left(position3)

position_vector3 = get_position_vector(position3)
position_vector4 = get_position_vector(position4)

sess.run(tf.global_variables_initializer()) 

print(position3.eval())
print(position_vector3.eval())

print(position4.eval())
print(position_vector4.eval())

2
[ 0.  0.  1.]
1
[ 0.  1.  0.]


The apply_action can move the position left or right, depending on what action it is given.

In [5]:
def apply_action(position, action):
    cond = tf.equal(action, MOVE_LEFT)
    return tf.cond(cond, lambda: move_left(position), lambda: move_right(position))

position5 = apply_action(position4, MOVE_LEFT)
position6 = apply_action(position5, MOVE_RIGHT)

sess.run(tf.global_variables_initializer())

print(position5.eval())
print(position6.eval())

0
1


We don't have anything to intelligently move the agent yet. So let's just create a function to move it randomly.

In [6]:
def choose_random_action():
    cond = tf.less(0.5, tf.random_uniform([1, ]))
    return tf.cond(cond[0], lambda: MOVE_LEFT, lambda: MOVE_RIGHT)

action = choose_random_action()

sess.run(tf.global_variables_initializer())

In [7]:
position7 = apply_action(position6, choose_random_action())
position8 = apply_action(position7, choose_random_action())
position9 = apply_action(position8, choose_random_action())

sess.run(tf.global_variables_initializer())

print(position7.eval())
print(position8.eval())
print(position9.eval())

0
2
2


Use tf.while_loop to move the agent at random, until it's position reaches VECTOR_SIZE-1

In [15]:
position = tf.Variable(0, tf.int32)

cond = lambda position: tf.less(position, VECTOR_SIZE-1)
op = lambda position: apply_action(position, choose_random_action())

r=tf.while_loop(cond, op, [position])

sess.run(tf.global_variables_initializer())
print(r.eval())

2


Expand the prior example so that:

 - We record all positions inside a TensorArray.
 - We return the position using TensorArray.gather.

In [23]:
MAX_EPISODE_LENGTH = 10

position = tf.Variable(0, tf.int32)
count = tf.Variable(0, tf.int32)
positions = tf.TensorArray(tf.int32, size=MAX_EPISODE_LENGTH)

# The loop can terminate if either:
# - the position reaches VECTOR_SIZE-1, which is the rightmost position in the one-hot vector representation
# - we reach MAX_EPISODE_LENGTH iterations
cond_not_terminal = lambda c, p, ps: tf.less(p, VECTOR_SIZE-1)
cond_not_max_length = lambda c, p, ps: tf.less(c, MAX_EPISODE_LENGTH)
cond = lambda c, p, ps: cond_not_terminal(c, p, ps) & cond_not_max_length(c, p, ps)

def body(c, p, positions):
    next_c = c+1
    next_p = apply_action(p, choose_random_action())
    positions = positions.write(c, next_p)
    return next_c, next_p, positions

count, position, positions = tf.while_loop(cond, body, (position, count, positions))
rng = tf.range(count)
gathered_positions = positions.gather(rng)
sess.run(tf.global_variables_initializer())

In [46]:

cnt, p, ps = sess.run([count, position, gathered_positions])
print(cnt)
print(ps)

2
[1 2]


In [11]:
def get_walk():
    position = tf.Variable(0, tf.int32)
    count = tf.Variable(0, tf.int32)
    positions = tf.TensorArray(tf.int32, size=MAX_EPISODE_LENGTH)

    # The loop can terminate if either:
    # - the position reaches VECTOR_SIZE-1, which is the rightmost position in the one-hot vector representation
    # - we reach MAX_EPISODE_LENGTH iterations
    cond_not_terminal = lambda c, p, ps: tf.less(p, VECTOR_SIZE-1)
    cond_not_max_length = lambda c, p, ps: tf.less(c, MAX_EPISODE_LENGTH)
    cond = lambda c, p, ps: cond_not_terminal(c, p, ps) & cond_not_max_length(c, p, ps)

    def body(c, p, positions):
        next_c = c+1
        next_p = apply_action(p, choose_random_action())
        positions = positions.write(c, next_p)
        return next_c, next_p, positions

    count, position, positions = tf.while_loop(cond, body, (position, count, positions))
    
    rng = tf.range(count)
    tensor_positions = positions.gather(rng)
    
    return tensor_positions

In [12]:
walk1 = get_walk()
walk2 = get_walk()

sess.run(tf.global_variables_initializer())

w1, w2 = sess.run([walk1, walk2])

print(w1)
print(w2)

[1 2]
[1 2]


In [13]:
with Timer('Making walks'):
    walks = [ get_walk() for _ in range(1000)]

with Timer('Initialising global variables'):
    sess.run(tf.global_variables_initializer())
    
with Timer('Evaluating walks'):
    ws = sess.run(walks)
print(ws)

Making walks Took 57.67s seconds
Initialising global variables Took 5.07s seconds
Evaluating walks Took 5.49s seconds
[array([0, 0, 1, 0, 1, 2], dtype=int32), array([1, 2], dtype=int32), array([0, 0, 1, 2], dtype=int32), array([0, 0, 0, 0, 0, 0, 1, 0, 1, 2], dtype=int32), array([1, 2], dtype=int32), array([1, 2], dtype=int32), array([0, 0, 0, 1, 2], dtype=int32), array([0, 0, 1, 0, 0, 1, 2], dtype=int32), array([1, 0, 0, 1, 2], dtype=int32), array([1, 0, 1, 2], dtype=int32), array([1, 2], dtype=int32), array([1, 2], dtype=int32), array([0, 0, 1, 0, 1, 0, 1, 2], dtype=int32), array([0, 0, 1, 0, 0, 1, 0, 1, 0, 0], dtype=int32), array([0, 1, 0, 1, 0, 1, 2], dtype=int32), array([0, 0, 1, 2], dtype=int32), array([1, 0, 1, 2], dtype=int32), array([0, 1, 0, 0, 0, 0, 0, 1, 2], dtype=int32), array([1, 0, 0, 0, 0, 0, 1, 2], dtype=int32), array([0, 1, 2], dtype=int32), array([0, 1, 2], dtype=int32), array([0, 1, 2], dtype=int32), array([1, 2], dtype=int32), array([1, 2], dtype=int32), array([0, 0

In [14]:
NUM_WALKS = 100

NUM_THREADS = 12

config = tf.ConfigProto(intra_op_parallelism_threads=NUM_THREADS,
                        inter_op_parallelism_threads=NUM_THREADS,
                        allow_soft_placement=True, device_count = {'CPU': NUM_THREADS})

sess2 = tf.Session(config=config)

walks = tf.TensorArray(tf.int32, size=NUM_WALKS)
count2 = tf.Variable(0, tf.int32)

cond = lambda c, w: tf.less(c, NUM_WALKS)

def body(c, w):
    next_c = c+1
    www = get_walk()
    w = w.write(c, www)
    return next_c, w

with Timer('Making Walks'):
    count2, walks = tf.while_loop(cond, body, [count2, walks])

with Timer('Initialising global variables'):
    sess2.run(tf.global_variables_initializer())

with Timer('Concatenating'):
    ws = sess2.run([walks.concat()])
print(ws)    

Making Walks Took 0.08s seconds
Initialising global variables Took 4.59s seconds
Concatenating Took 1.90s seconds
[array([0, 1, 2, 1, 0, 1, 2, 1, 0, 0, 1, 2, 1, 2, 1, 2, 1, 2, 0, 0, 0, 1, 0,
       1, 2, 0, 1, 2, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 0, 0, 1, 0, 0, 1, 2, 1, 0, 1, 0, 0,
       0, 1, 2, 1, 0, 0, 1, 2, 0, 1, 0, 1, 2, 1, 2, 0, 1, 2, 1, 2, 0, 0, 0,
       1, 2, 0, 1, 0, 1, 2, 1, 2, 0, 1, 2, 1, 0, 0, 1, 0, 1, 0, 0, 1, 2, 0,
       1, 0, 0, 0, 0, 1, 2, 0, 1, 2, 1, 2, 0, 0, 1, 2, 1, 2, 0, 1, 0, 1, 0,
       0, 1, 2, 1, 2, 1, 0, 1, 0, 1, 2, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 2, 0, 1, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       2, 1, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 2, 0, 1, 2, 1, 0, 1, 2, 1, 0,
       0, 0, 1, 0, 0, 1, 2, 1, 0, 0, 0, 0, 1, 2, 1, 2, 0, 1, 0, 1, 0, 1, 0,
       0, 0, 1, 1, 2, 0, 1, 2, 0, 0, 0, 1, 2, 1, 2, 1, 0, 1, 2, 1, 2, 1, 0,
       0, 0, 1, 0, 1, 2, 0, 1, 2, 1, 2, 1, 2, 1, 