In [2]:
import numpy as np

from keras.models import model_from_json, load_model
from keras.models import Model
from keras.layers.core import Dense, Flatten
from keras.layers import Convolution2D, merge, MaxPooling2D, Input
from keras.optimizers import Adam
from keras.layers.normalization import BatchNormalization
from keras import backend as K

import tensorflow as tf

In [3]:
# Inspired by:
# https://github.com/yanpanlau/DDPG-Keras-Torcs

class ActorNetwork():
    def __init__(self, sess, state_shape, action_size, BATCH_SIZE, LEARNING_RATE, TAU):
        self.sess = sess
        self.BATCH_SIZE = BATCH_SIZE
        self.LEARNING_RATE = LEARNING_RATE
        self.TAU = TAU
        
        K.set_session(sess)
        
        self.model, self.weight, self.state = self.actor_net(state_shape, action_size)
        self.target_model, self.target_weights, self.target_state = self.actor_net(state_shape, action_size)
        self.action_gradient = tf.placeholder(tf.float32, [None, action_size])
        self.params_grad = tf.gradients(self.model.output, self.weights, -self.action_gradient)
        grads = zip(self.params_grad, self.weights)
        self.optimize = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(grads)
        self.sess.run(tf.initialize_all_variables())
        
    def train(self, states, action_grads):
        self.sess.run(self.optimize, feed_dict={
            self.state: states,
            self.action_gradient: action_grads
        })
        
    def target_train(self):
        actor_weights = self.model.get_weights()
        actor_target_weights = self.target_model.get_weights()
        for i in xrange(len(actor_weights)):
            actor_target_weights[i] = self.TAU * actor_weights[i] + (1 - self.TAU) * self.actor_target_weights[i]
        self.target_model.set_weights(actor_target_weights)
        
    def actor_net(self, state_shape, action_size):
        """
        Actor Network: State to Action
        """
        
        input1 = Input(shape=(state_shape[0, :]))
        feat1 = Convolution2D(16, (5, 5), padding='same', activation='relu')(input1)
        feat1 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat1)
        feat1 = Convolution2D(32, (5, 5), padding='same', activation='relu')(feat1)
        feat1 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat1)
        feat1 = Flatten()(feat1)
        feat1 = Dense(256, activation='relu')(feat1)
        
        input2 = Input(shape=(state_shape[1, :]))
        feat2 = Convolution2D(16, (5, 5), padding='same', activation='relu')(input2)
        feat2 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat2)
        feat2 = Convolution2D(32, (5, 5), padding='same', activation='relu')(feat2)
        feat2 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat2)
        feat2 = Flatten()(feat2)
        feat2 = Dense(256, activation='relu')(feat2)
        
        input3 = Input(shape=(state_shape[2, :]))
        feat3 = Convolution2D(16, (5, 5), padding='same', activation='relu')(input3)
        feat3 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat3)
        feat3 = Convolution2D(32, (5, 5), padding='same', activation='relu')(feat3)
        feat3 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat3)
        feat3 = Flatten()(feat3)
        feat3 = Dense(256, activation='relu')(feat3)
        
        input4 = Input(shape=(state_shape[3, :]))
        feat4 = Convolution2D(16, (5, 5), padding='same', activation='relu')(input4)
        feat4 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat4)
        feat4 = Convolution2D(32, (5, 5), padding='same', activation='relu')(feat4)
        feat4 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat4)
        feat4 = Flatten()(feat4)
        feat4 = Dense(256, activation='relu')(feat4)
        
        merge_feat = merge([feat1, feat2, feat3, feat4], mode='concat')
        
        out = Dense(action_size, activation='tanh')(merge_feat)
        
        state = [input1, input2, input3, input4]
        model = Model(input=state, output=out)
        
        return model, model.trainable_weights, state

In [4]:
class CriticNetwork():
    def __init__(self, sess, state_shape, action_size, BATCH_SIZE, LEARNING_RATE, TAU):
        self.sess = sess
        self.BATCH_SIZE = BATCH_SIZE
        self.LEARNING_RATE = LEARNING_RATE
        self.TAU = TAU
        
        K.set_session(sess)
        
        self.model, self.action, self.state = self.critic_net(state_shape, action_size)
        self.target_model, self.target_action, self.target_state = self.critic_net(state_shape, action_size)
        self.action_grads = tf.gradients(self.model.output, self.action)
        self.sess.run(tf.initialize_all_variables())
        
    def gradients(self, states, actions):
        return self.sess.run(self.action_grads, feed_dict={
            self.state: states,
            self.action: actions
        })
        
    def target_train(self):
        critic_weights = self.model.get_weights()
        critic_target_weights = self.target_model.get_weights()
        for i in xrange(len(critic_weights)):
            critic_target_weights[i] = self.TAU * critic_weights[i] + (1 - self.TAU) * self.critic_target_weights[i]
        self.target_model.set_weights(critic_target_weights)
        
    def critic_net(self, state_shape, action_size):
        """
        Actor Network: State to Action
        """
        
        input1 = Input(shape=(state_shape[0, :]))
        feat1 = Convolution2D(16, (5, 5), padding='same', activation='relu')(input1)
        feat1 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat1)
        feat1 = Convolution2D(32, (5, 5), padding='same', activation='relu')(feat1)
        feat1 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat1)
        feat1 = Flatten()(feat1)
        feat1 = Dense(256, activation='relu')(feat1)
        
        input2 = Input(shape=(state_shape[1, :]))
        feat2 = Convolution2D(16, (5, 5), padding='same', activation='relu')(input2)
        feat2 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat2)
        feat2 = Convolution2D(32, (5, 5), padding='same', activation='relu')(feat2)
        feat2 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat2)
        feat2 = Flatten()(feat2)
        feat2 = Dense(256, activation='relu')(feat2)
        
        input3 = Input(shape=(state_shape[2, :]))
        feat3 = Convolution2D(16, (5, 5), padding='same', activation='relu')(input3)
        feat3 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat3)
        feat3 = Convolution2D(32, (5, 5), padding='same', activation='relu')(feat3)
        feat3 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat3)
        feat3 = Flatten()(feat3)
        feat3 = Dense(256, activation='relu')(feat3)
        
        input4 = Input(shape=(state_shape[3, :]))
        feat4 = Convolution2D(16, (5, 5), padding='same', activation='relu')(input4)
        feat4 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat4)
        feat4 = Convolution2D(32, (5, 5), padding='same', activation='relu')(feat4)
        feat4 = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), padding='same')(feat4)
        feat4 = Flatten()(feat4)
        feat4 = Dense(256, activation='relu')(feat4)
        
        state_feat = merge([feat1, feat2, feat3, feat4], mode='concat')
        
        action = Input(shape=(action_size))
        action_feat = Dense(256*4, activation='linear')(action)
        
        combined_feat = merge([state_feat, action_feat], mode='sum')
        
        out = Dense(action_size, activation='tanh')(combined_feat)
        
        state = [input1, input2, input3, input4]
        state_action = [input1, input2, input3, input4, action]
        model = Model(input=state_action, output=out)
        
        return model, action, state