In [1]:
import h5py
import numpy as np
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Concatenate, Dense, Dropout, Flatten, Lambda, Activation
from keras.applications import VGG16
from keras import backend as K

sess = tf.InteractiveSession()
np.random.seed(123)

Using TensorFlow backend.


In [2]:
# hyperparameters
dropout_rate = 0.5
num_actions = 11
k = 10

In [3]:
# input initializations
patch = Input(shape=(112, 112, 3), name='patch')
action_history = Input(shape=(k * num_actions,), name='action_history')
q_values = Input(shape=(k * num_actions,), name='q_values')

# TO-DO: 
# 1. change fc4-fc5 to reccurent module?
# 2. use only one vgg network?

In [4]:
def action_transition(new_action, a, num_action=11):
    '''
    Update the past action vector in FIFO manner with new action
    
    :param a: past action vector
    :param new_action: the new action to include
    :param num_action: size of action space
    :return: updated past action vector
    '''
    a = tf.slice(a, [0,], [a.shape[0].value - num_action,])
    a = tf.concat([new_action, a], axis=0)
    return a

# debugging action_transition
x = tf.convert_to_tensor(np.arange(110))
y = tf.convert_to_tensor(np.arange(110, 110+11))
action_transition(x,y).shape

TensorShape([Dimension(110)])

In [5]:
def patch_transition(patch, action, alpha=0.05):
    '''
    Update patch position using selected action
    
    :param patch: bounding box representation (x,y,w,h)
    :param action: one-hot encoded action
    :return: updated patch position
    '''
    a = tf.argmax(action).eval()
    # termination
    if a == 10:
        return patch
    # changes
    assert patch.dtype == tf.float64
    delta = np.zeros(4)
    delta_x = alpha * patch[-2].eval()
    delta_y = alpha * patch[-1].eval()
    # actions
    if   a==0: delta[0] -= delta_x # left   
    elif a==1: delta[0] += delta_x # right
    elif a==2: delta[1] += delta_y # up
    elif a==3: delta[1] -= delta_y # down
    elif a==4: delta[0] -= 3*delta_x # 3x left
    elif a==5: delta[0] += 3*delta_x # 3x right
    elif a==6: delta[1] += 3*delta_y # 3x up
    elif a==7: delta[1] -= 3*delta_y # 3x down
    elif a==8: delta[-2] -= delta_x; delta[-1] -= delta_y # scale down   
    else     : delta[-2] += delta_x; delta[-1] += delta_y # scale up

    return tf.add(patch, tf.convert_to_tensor(delta, dtype=tf.float64))

In [6]:
# critic
c = VGG16(weights='imagenet', include_top=False)(patch)
c = Flatten()(c)
c = Dense(512, activation='relu', name='fc4_c')(c)
c = Dropout(dropout_rate)(c)
c = Dense(512, activation='relu', name='fc5_c')(c)
c = Dropout(dropout_rate)(c)
c = Concatenate()([c, action_history])
c = Dense(num_actions, activation='linear', name='fc6_c')(c) # single vector of Q-values

critic = Model(inputs=[patch, action_history], outputs=[c])

In [7]:
# actor
a = VGG16(weights='imagenet', include_top=False)(patch)
a = Flatten()(a)
a = Dense(512, activation='relu', name='fc4_a')(a)
a = Dropout(dropout_rate)(a)
a = Dense(512, activation='relu', name='fc5_a')(a)
a = Dropout(dropout_rate)(a)
a = Concatenate()([a, q_values])
a = Dense(num_actions, activation='softmax', name='fc6_a')(a)

actor = Model(inputs=[patch, q_values], outputs=[a])