# Recurrent Neural Network Cell Engineering
This code will show how to compile arbitrary neural network into RNN cell of Tensorflow. This allows us to many interesting task. For example compiling optimization problem into neural network!

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import os
import pandas as pd
#Functional coding
import functools
from functools import partial
from tensorflow.python.ops import array_ops 

In [2]:
Datapath="DATA/Navigation/Nav_RDDL_Data.txt"
Labelpath="DATA/Navigation/Nav_RDDL_Label.txt"
Rewardpath="DATA/Navigation/Nav_RDDL_Reward.txt"

In [3]:
#Given local path, find full path
def PathFinder(path):
    #python 2
    #script_dir = os.path.dirname('__file__')
    #fullpath = os.path.join(script_dir,path)
    #python 3
    fullpath=os.path.abspath(path)
    print(fullpath)
    return fullpath

#Read Data for Deep Learning
def ReadData(path):
    fullpath=PathFinder(path)
    return pd.read_csv(fullpath, sep=',', header=0)

#Won't use this one to normalize
#Input Normalization
def Normalize(features, mean = [], std = []):
    if mean == []:
        mean = np.mean(features, axis = 0)
        std = np.std(features, axis = 0)
#     print std
#     print std[:,None]
    new_feature = (features.T - mean[:,None]).T
    new_feature = (new_feature.T / std[:,None]).T
    new_feature[np.isnan(new_feature)]=0
#     print new_feature
    return new_feature, mean, std

In [4]:
S_A_pd = ReadData(Datapath)
SP_pd = ReadData(Labelpath)
R_pd = ReadData(Rewardpath)
S_A_matrix=S_A_pd.as_matrix()
SP_matrix=SP_pd.as_matrix()
R_matrix=R_pd.as_matrix()

/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Navigation/Nav_RDDL_Data.txt
/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Navigation/Nav_RDDL_Label.txt
/home/wuga/Documents/Notebook/VAE-PLANNING/DATA/Navigation/Nav_RDDL_Reward.txt


In [5]:
_,STATE_SIZE=SP_matrix.shape
_,STATE_ACTION_SIZE=S_A_matrix.shape
ACTION_SIZE=STATE_ACTION_SIZE-STATE_SIZE
_,REWARD_SIZE=R_matrix.shape
STEP_SIZE = 12  #pre-defined when sampling

In [6]:
# States
states = tf.placeholder(tf.float32,[None, STATE_SIZE],name="States")

# Actions
actions = tf.placeholder(tf.float32,[None, ACTION_SIZE],name="Actions")

# States'
states_next = tf.placeholder(tf.float32, [None, STATE_SIZE],name="States_prime")

# Rewards
rewards = tf.placeholder(tf.float32, [None, REWARD_SIZE],name="Rewards")

In [7]:
#RNN Shape
states_sq = tf.reshape(states,[-1,STEP_SIZE,STATE_SIZE])
actions_sq = tf.reshape(actions,[-1,STEP_SIZE,ACTION_SIZE])
states_next_sq = tf.reshape(states_next,[-1,STEP_SIZE,STATE_SIZE])
rewards_sq = tf.reshape(rewards,[-1,STEP_SIZE,REWARD_SIZE]) 

In [8]:
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=80):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=80):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:960px;height:600px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

In [9]:
class FullNetworkCell(tf.nn.rnn_cell.RNNCell):

    def __init__(self, num_state_units, num_reward_units,num_hidden_layers=2,num_hidden_units=20,activation=tf.nn.sigmoid):
        self._num_state_units = num_state_units
        self._num_reward_units = num_reward_units
        self._num_hidden_units = num_hidden_units
        self._num_hidden_layers = num_hidden_layers
        self._activation = activation

    @property
    def state_size(self):
        return self._num_state_units

    @property
    def output_size(self):
        return self._num_reward_units

    def __call__(self, inputs, state, scope=None):
        print(inputs.get_shape())
        with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
            with tf.variable_scope("Transition"):
                intermedian_output = self._activation(tf.nn.rnn_cell._linear([inputs, state], self._num_hidden_units, True, 0.0,scope="Layer0"))
                for i in range(self._num_hidden_layers-1):
                    intermedian_output = self._activation(tf.nn.rnn_cell._linear(intermedian_output, self._num_hidden_units, True, 0.0,scope="Layer"+str(i+1)))
                next_state = tf.nn.rnn_cell._linear(intermedian_output, self._num_state_units, True, 0.0,scope="Layer"+str(self._num_hidden_layers)) 
            with tf.variable_scope("Reward"):
                intermedian_output = self._activation(tf.nn.rnn_cell._linear([inputs, state], self._num_hidden_units, True, 0.0,scope="Layer0"))
                for i in range(self._num_hidden_layers-1):
                    intermedian_output = self._activation(tf.nn.rnn_cell._linear(intermedian_output, self._num_hidden_units, True, 0.0,scope="Layer"+str(i+1)))
                reward = tf.nn.rnn_cell._linear(intermedian_output, self._num_reward_units, True, 0.0,scope="Layer"+str(self._num_hidden_layers))
        return reward, next_state
    


In [10]:
#from tensorflow.python.ops import array_ops 
#lstm = FullNetworkCell(2,1,2,20)
#lstm = tf.nn.rnn_cell.DropoutWrapper(cell=lstm, output_keep_prob=0.9)
#initial_state = lstm.zero_state(array_ops.shape(states_sq[0])[0], dtype=tf.float32)+tf.constant([[1,2]],dtype=tf.float32)
#print(initial_state.get_shape())
#rnn_outputs, state = tf.nn.dynamic_rnn(lstm, actions_sq, dtype=tf.float32,initial_state=initial_state)

In [11]:
#sess = tf.InteractiveSession()
#sess.run(tf.global_variables_initializer())

In [12]:
#op=tf.trainable_variables()[1].assign([1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0])

In [13]:
#sess.run(op)

In [14]:
#tf.trainable_variables()[7].eval()

In [15]:
#tf.trainable_variables()

In [16]:
#reward_variables=[]
#for v in tf.trainable_variables():
#    if "Reward" in v.name:
#        reward_variables.append(v)

In [17]:
#saver = tf.train.Saver(reward_variables)

In [18]:
#for v in reward_variables:
#    print(v.name)
#    print(v.get_shape())

In [19]:
#saver.restore(sess,PathFinder("WEIGHTS_FOLDER/REWARD_NET.chkp"))

In [20]:
#tf.trainable_variables()[7].eval()

In [21]:
#transition_variables=[]
#for v in tf.trainable_variables():
#    if "Transition" in v.name:
#        transition_variables.append(v)
#saver = tf.train.Saver(transition_variables)
#saver.restore(sess,PathFinder("WEIGHTS_FOLDER/TRANSITION_NET.chkp"))

In [22]:
#show_graph(tf.get_default_graph().as_graph_def())

In [23]:
class ActionOptimizer(object):
    def __init__(self,
                a, # Reward Value
                num_step, # Number of RNN step, this is a fixed step RNN sequence, 12 for navigation
                learning_rate=0.01, #Learning rate
                cell_state_size = 2,
                cell_output_size = 1,
                cell_layer_size = 2,
                cell_node_size = 20,
                cell_activation = tf.nn.sigmoid): #LSTM hidden state size
        self.action = tf.reshape(a,[-1,num_step,2]) #Reshape rewards
        print(self.action)
        self.scope = "LSTM"
        self.num_step = num_step
        self.learning_rate = learning_rate
        self.cell_state_size = cell_state_size
        self.cell_output_size = cell_output_size
        self.cell_layer_size = cell_layer_size
        self.cell_node_size = cell_node_size
        self.cell_activation = cell_activation
        self._p_create_rnn_graph()
        self._p_create_loss()
        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())
        self.load_pretrained()
    
    def _p_create_rnn_graph(self):
        cell = FullNetworkCell(self.cell_state_size,self.cell_output_size,self.cell_layer_size,self.cell_node_size,self.cell_activation)
        initial_state = cell.zero_state(array_ops.shape(self.action)[0], dtype=tf.float32)#+tf.constant([[5,0]],dtype=tf.float32)
        rnn_outputs, state = tf.nn.dynamic_rnn(cell, self.action, dtype=tf.float32,initial_state=initial_state)
        self.outputs = rnn_outputs
        self.last_state = state
        self.pred = tf.reduce_sum(self.outputs,1)
        print("self.pred:{0}".format(self.pred))
            
    def _p_create_loss(self):
        #Lagrange optimization
        #See http://mat.gsia.cmu.edu/classes/QUANT/NOTES/chap4/node6.html
        #See http://stackoverflow.com/questions/12284638/gradient-descent-with-constraints-lagrange-multipliers
        #self.lagrange_1 = tf.Variable(tf.constant(0.,shape=self.action.get_shape()),name="lagrange1_root")
        #self.lagrange_2 = tf.Variable(tf.constant(0.,shape=self.action.get_shape()),name="lagrange2_root")
        objective = tf.reduce_mean(self.pred) 
                    #+tf.reduce_sum(tf.multiply(tf.square(self.lagrange_1),(self.action+1)))\
                    #+tf.reduce_sum(tf.multiply(tf.square(self.lagrange_2),(1-self.action)))
        self.loss = -objective
        print(self.loss.get_shape())
        #self.loss = -objective
        self.optimizer = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss, var_list=[a])
        
    def Optimize(self,epoch=100):
        
        new_loss = self.sess.run([self.loss])
        print('Loss in epoch {0}: {1}'.format("Initial", new_loss)) 
        for epoch in range(epoch):
            training = self.sess.run([self.optimizer])
            self.sess.run(tf.assign(a, tf.clip_by_value(a, -1, 1)))
            if True:
                new_loss = self.sess.run([self.loss])
                print('Loss in epoch {0}: {1}'.format(epoch, new_loss))  
        minimum_costs_id=self.sess.run(tf.argmax(self.pred,0))
        print(minimum_costs_id)
        print('Optimal Action Squence:{0}'.format(self.sess.run(self.action)[minimum_costs_id[0]]))
        print('The last state:{0}'.format(self.sess.run(self.last_state)))
        print('Rewards each time step:{0}'.format(self.sess.run(self.outputs)))
                
    def load_pretrained(self):
        reward_variables=[]
        for v in tf.trainable_variables():
            if "Reward" in v.name:
                reward_variables.append(v)
        saver = tf.train.Saver(reward_variables)
        saver.restore(self.sess,PathFinder("WEIGHTS_FOLDER/REWARD_NET.chkp"))
        transition_variables=[]
        for v in tf.trainable_variables():
            if "Transition" in v.name:
                transition_variables.append(v)
        saver = tf.train.Saver(transition_variables)
        saver.restore(self.sess,PathFinder("WEIGHTS_FOLDER/TRANSITION_NET.chkp"))
        print("Finish Loading!~")
    
    def save_weights(self, path = "WEIGHTS_FOLDER/LSTM.chpt"):
        lstm_variables = [v for v in tf.trainable_variables() if v.name.startswith(self.scope)]
        saver = tf.train.Saver(lstm_variables)
        saver.save(self.sess, PathFinder(path))
        
    def reload_weights(self, path = "WEIGHTS_FOLDER/LSTM.chpt"):
        lstm_variables = [v for v in tf.trainable_variables() if v.name.startswith(self.scope)]
        saver = tf.train.Saver(lstm_variables)
        saver.restore(self.sess,PathFinder(path))

In [24]:
a = tf.Variable(tf.truncated_normal(shape=[20],mean=0.0, stddev=1.0),name="action")
rnn_inst = ActionOptimizer(a, 10,cell_node_size=32,cell_activation=tf.nn.sigmoid)  

Tensor("Reshape_4:0", shape=(1, 10, 2), dtype=float32)
(1, 2)
self.pred:Tensor("Sum:0", shape=(1, 1), dtype=float32)
()
/home/wuga/Documents/Notebook/VAE-PLANNING/WEIGHTS_FOLDER/REWARD_NET.chkp
/home/wuga/Documents/Notebook/VAE-PLANNING/WEIGHTS_FOLDER/TRANSITION_NET.chkp
Finish Loading!~


In [25]:
rnn_inst.Optimize(500)

Loss in epoch Initial: [134.06165]
Loss in epoch 0: [139.76973]
Loss in epoch 1: [139.23134]
Loss in epoch 2: [138.82382]
Loss in epoch 3: [138.46034]
Loss in epoch 4: [138.10768]
Loss in epoch 5: [137.76125]
Loss in epoch 6: [137.41797]
Loss in epoch 7: [137.07549]
Loss in epoch 8: [136.73196]
Loss in epoch 9: [136.38586]
Loss in epoch 10: [136.03574]
Loss in epoch 11: [135.68031]
Loss in epoch 12: [135.31834]
Loss in epoch 13: [134.94855]
Loss in epoch 14: [134.5697]
Loss in epoch 15: [134.18042]
Loss in epoch 16: [133.77936]
Loss in epoch 17: [133.36481]
Loss in epoch 18: [132.93918]
Loss in epoch 19: [132.50015]
Loss in epoch 20: [132.0396]
Loss in epoch 21: [131.55325]
Loss in epoch 22: [131.03632]
Loss in epoch 23: [130.48344]
Loss in epoch 24: [129.88919]
Loss in epoch 25: [129.24783]
Loss in epoch 26: [128.55331]
Loss in epoch 27: [127.79932]
Loss in epoch 28: [127.00242]
Loss in epoch 29: [126.1812]
Loss in epoch 30: [125.37503]
Loss in epoch 31: [124.52676]
Loss in epoch 32: 

In [26]:
show_graph(tf.get_default_graph().as_graph_def())