In [1]:
import numpy as np
import tensorflow as tf
import os

In [2]:
LOGDIR = "logs/"

In [3]:
log_writer = tf.summary.FileWriter(LOGDIR)

In [4]:
class DNC:
    def __init__(self,input_size,output_size,seq_len,num_words,word_size,read_heads):
        with tf.device('/device:GPU:1'):
            self.input_size = input_size
            self.output_size = output_size

            self.num_words = num_words
            self.word_size = word_size

            self.read_heads = read_heads

            self.interface_size = (word_size*read_heads) + (3*word_size)+ (5*read_heads)+3

            self.controller_input_size = (read_heads*word_size)+input_size

            self.controller_output_size = output_size + self.interface_size

            self.output_vector = tf.truncated_normal([1,self.output_size],stddev=0.1)
            self.interface_vector = tf.truncated_normal([1,self.interface_size],stddev=0.1)

            self.memory_matrix = tf.zeros([num_words,word_size])

            self.usage_vector = tf.fill([num_words,1],0.0)
            self.temp_link_matrix = tf.zeros([num_words,num_words])

            self.precedence_weighting  = tf.zeros([num_words,1])

            self.read_weightings = tf.fill([num_words,read_heads],1e-6)
            self.write_weightings = tf.fill([num_words,1],1e-6)
            self.read_vectors = tf.fill([read_heads,word_size],1e-6)

            # Controller
            with tf.name_scope("controller"):
                self.input_x = tf.placeholder(tf.float32,shape=[1,self.input_size],name = "input_x")
                self.output_y = tf.placeholder(tf.float32,shape=[1,self.output_size],name="output_y")


                self.weights1 = tf.get_variable("weights1",shape=[self.controller_input_size,32],initializer=tf.contrib.layers.xavier_initializer())
                self.biases1 = tf.get_variable("biases1",shape=[32],initializer=tf.zeros_initializer())
                self.weights2 = tf.get_variable("weights2",shape=[32,self.controller_output_size],initializer=tf.contrib.layers.xavier_initializer())
                self.biases2 = tf.get_variable("biases2",shape=[self.controller_output_size])

            self.output_vector_weights = tf.get_variable("Wy",shape=[self.controller_output_size,self.output_size],initializer=tf.contrib.layers.xavier_initializer())
            self.interface_weights = tf.get_variable("Wiv",shape=[self.controller_output_size,self.interface_size],initializer=tf.contrib.layers.xavier_initializer())

            self.read_vectors_weights = tf.get_variable("Wr",shape=[self.read_heads*self.word_size,self.output_size],initializer=tf.contrib.layers.xavier_initializer())
        
    def content_lookup(self,key,key_strength):
        normalized_memory = tf.nn.l2_normalize(self.memory_matrix,1)
        normalized_key = tf.nn.l2_normalize(key,0)
        
        z = tf.matmul(normalized_memory,normalized_key,transpose_b=True)
        
        return tf.nn.softmax(key_strength*z,0)
    
    # used to provided new locations for writing
    def calc_allocation_weighting(self):
        # multiply usage vector by -1 to get locations in ascending order of usage
        print(self.usage_vector)
        sorted_usage_vector,free_list = tf.nn.top_k(-1*tf.squeeze(self.usage_vector),k = self.num_words)
        # since usage vector was multiplied by -1,after sorted, return to its original value
        sorted_usage_vector = tf.reshape(sorted_usage_vector*-1,[self.num_words,1])
        free_list = tf.reshape(free_list,[self.num_words,1])
        print("allocation:",sorted_usage_vector,free_list)
        
        cumulative_product = tf.cumprod(sorted_usage_vector,axis=1,exclusive=True)
        unordered_allocation_weighting =  (1-sorted_usage_vector)*cumulative_product
        
        allocation_weights = tf.zeros([self.num_words])
        identity_matrix = tf.constant(np.identity(self.num_words,dtype=np.float32))
        
        for pos, idx in enumerate(tf.unstack(free_list)):
            #flatten
            m = tf.squeeze(tf.slice(identity_matrix, [idx, 0], [1, -1]))
            #add to weight matrix
            allocation_weights += m*unordered_allocation_weighting[0, pos]
        #the allocation weighting for each row in memory
        return tf.reshape(allocation_weights, [self.num_words, 1])

    
    def one_plus(self,x):
        return 1+tf.nn.softplus(tf.expand_dims(x,0))
    
    def time_step(self):
        #print(x)
        with tf.name_scope("controller"):
            step_input = tf.concat([self.input_x,tf.reshape(self.read_vectors,[1,self.read_heads*self.word_size])],1)
            #print(tf.Print(step_input,[step_input]))
            #controller forward propagation
            layer1_activation = tf.nn.relu(tf.matmul(step_input,self.weights1)+self.biases1)
            #print("layer 1 act",layer1_activation)
            layer2_activation = tf.nn.relu(tf.matmul(layer1_activation,self.weights2)+self.biases2)
        #print("layer 2 act",layer2_activation)
        
        #output vector
            self.output_vector = tf.matmul(layer2_activation,self.output_vector_weights)
        #print("output vector",self.output_vector)
        
        with tf.name_scope("interface_vector"):
        #interface vector
            self.interface_vector = tf.matmul(layer2_activation,self.interface_weights)
            #print("Interface vector",self.interface_vector)

            #Interact with the memory(read and write)
            ##Slice interface vector to get the 10 components of it, the partition its an indexes vector(values from 0 to 9)
            partition_indexes = tf.constant([[0]*(self.read_heads*self.word_size) #read keys
                                    +[1]*(self.read_heads)#read strengths 
                                    +[2]*(self.word_size)
                                    +[3] #write strength
                                    +[4]*(self.word_size) #erase vector
                                    +[5]*(self.word_size) #write vector
                                    +[6]*(self.read_heads) #free gates
                                    +[7] #allocation gate
                                    +[8] #write gate
                                    +[9]*(self.read_heads*3) #read modes

                                    ],dtype = tf.int32)
            #print("indexes",partition_indexes)

            #print(partition_indexes)
            (read_keys,read_strengths,write_key
            ,write_strength,erase_vector,write_vector,
            free_gates,allocation_gate,write_gate,read_modes) = tf.dynamic_partition(self.interface_vector,partition_indexes,10)

            ##Make every value have the correct shape and be in the correct domain
            read_keys = tf.reshape(read_keys,[self.read_heads,self.word_size])
            #print("read keys",read_keys)

            read_strengths = self.one_plus(read_strengths)
            #print("read_strengts",read_strengths)

            write_key = tf.expand_dims(write_key,0)
            #print("write key",write_key)
            write_strength = self.one_plus(write_strength)
            #print("write strength",write_strength)

            erase_vector = tf.nn.sigmoid(tf.expand_dims(erase_vector,0))
            write_vector = tf.expand_dims(write_vector,0)

            free_gates =  tf.nn.sigmoid(tf.expand_dims(free_gates,0))
            allocation_gate = tf.nn.sigmoid(allocation_gate)
            write_gate = tf.nn.sigmoid(write_gate)
            
            tf.summary.scalar("allocation_gate",allocation_gate)

            read_modes = tf.nn.softmax(tf.reshape(read_modes,[3,self.read_heads]))
        
        
        ## Writing to memory(dynamic allocation and content lookup)
        ### dynamic memory allocation
        with tf.name_scope("dynamic_memory_allocation"):
            retention_vector = tf.reduce_prod(1-free_gates*self.read_weightings,reduction_indices=1,keep_dims=True)
            self.usage_vector = (self.usage_vector + self.write_weightings   - self.usage_vector* self.write_weightings) *retention_vector
            allocation_weights = self.calc_allocation_weighting()
        
        ### content lookup for  writing
        write_content_weigths = self.content_lookup(write_key,write_strength)
        
        ### final write weights
        self.write_weightings = write_gate*(allocation_gate*allocation_weights+(1-allocation_gate)*write_content_weigths)
        #print(self.write_weightings )
        
        ### final writing to memory(first erase, then write)
        self.memory_matrix  = self.memory_matrix * (1-tf.matmul(self.write_weightings,erase_vector))+(tf.matmul(self.write_weightings,write_vector))
        
        ## reading from memory(by content and by temporal order)
        
        ### temporal order
        #### temporal link matrix update using write weights, and previus precedence weighitngs
        #print("antes weightis",self.write_weightings)
        write_weightsi = tf.matmul(self.write_weightings,tf.ones([1,self.num_words]))
        #print("yua")
        #print(self.precedence_weighting)
        self.temp_link_matrix = (1-write_weightsi-tf.transpose(write_weightsi)) * self.temp_link_matrix + tf.matmul(self.write_weightings,self.precedence_weighting,transpose_b=True)
        self.temp_link_matrix = self.temp_link_matrix * (tf.ones([self.num_words,self.num_words]) - tf.constant(np.identity(self.num_words,dtype=np.float32)))
        
        
        self.precedence_weighting = (1 - tf.reduce_sum(self.write_weightings,reduction_indices=0))* self.precedence_weighting + self.write_weightings
        ### read modes (backguard,content,forward)
        back_weigthing = read_modes[0]*tf.matmul(self.temp_link_matrix,self.read_weightings,transpose_a=True)
        #print(back_weigthing)
        content_weigthing = read_modes[1]*self.content_lookup(read_keys,read_strengths)
        #print(content_weigthing)
        forward_weithing = read_modes[2]*tf.matmul(self.temp_link_matrix,self.read_weightings)
        #print(forward_weithing)
        
        self.read_weightings  = back_weigthing + content_weigthing + forward_weithing
        
        self.read_vectors = tf.transpose(tf.matmul(self.memory_matrix,self.read_weightings,transpose_a=True))
        #print(self.memory_matrix)
        #print(self.read_weightings)
        #print(self.read_vectors)
        
        ### apply weights to read vectors
        weighted_read_vectors = tf.matmul(tf.reshape(self.read_vectors,[1,self.read_heads*self.word_size]),self.read_vectors_weights)
        #print(self.output_vector)
        #print(weighted_read_vectors)
        return self.output_vector + weighted_read_vectors
    
    #output list of numbers (one hot encoded) by running the step function
    def run(self):
        big_out = []
        for t, seq in enumerate(tf.unstack(self.input_x, axis=0)):
            seq = tf.expand_dims(seq, 0)
            y = self.time_step(seq)
            print(y)
            big_out.append(y)
        return tf.stack(big_out, axis=0)

In [5]:

def train():

    #generate the input output sequences, randomly intialized
    num_seq = 10
    seq_len = 6
    seq_width = 4
    iterations = 600
    con = np.random.randint(0, seq_width,size=seq_len)
    seq = np.zeros((seq_len, seq_width))
    seq[np.arange(seq_len), con] = 1
    end = np.asarray([[-1]*seq_width])
    zer = np.zeros((seq_len, seq_width))

    graph = tf.Graph()
    
    with graph.as_default():
        #training time
        with tf.Session() as sess:
            
            #init the DNC
            dnc = DNC(input_size=seq_width, output_size=seq_width, seq_len=seq_len, num_words=2, word_size=5, read_heads=1)
            
            #calculate the predicted output
            output_logits = tf.reshape(tf.squeeze(dnc.time_step()),[1,seq_width])
            #print(output_logits)
            #print(dnc.output_y)
            output = tf.nn.sigmoid(output_logits)
            #print(output,dnc.output_y)
            #compare prediction to reality, get loss via sigmoid cross entropy
            loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output_logits, labels=dnc.output_y))
            #print(loss)
            #use regularizers for each layer of the controller
            regularizers = (tf.nn.l2_loss(dnc.weights1) + tf.nn.l2_loss(dnc.weights2) +
                            tf.nn.l2_loss(dnc.biases1) + tf.nn.l2_loss(dnc.biases2))
            #to help the loss convergence faster
            loss += 5e-4 * regularizers
            #optimize the entire thing (memory + controller) using gradient descent. dope
            optimizer = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(loss)
            
            #initialize input output pairs
            sess.run(tf.global_variables_initializer())
            
            
            log_writer.add_graph(graph)
            
            final_i_data = np.concatenate((seq, zer), axis=0)
            final_o_data = np.concatenate((zer, seq), axis=0)
            #for each iteration
            for i in range(0, iterations+1):
                #feed in each input output pair
                #print(i)
                for j in range(len(final_i_data)):
                    
                    feed_dict = {dnc.input_x : np.reshape(final_i_data[j],[1,seq_width]), dnc.output_y: np.reshape(final_o_data[j],[1,seq_width])}
                #make predictions
                    debug = sess.run(dnc.usage_vector, feed_dict=feed_dict)
                    print("size",len(debug[0]),debug)
                    l, _, predictions,write_weights = sess.run([loss, optimizer, output,dnc.write_weightings], feed_dict=feed_dict)
                #print(write_weights)
                if i%10==0:
                    #print(read_weights)
                    print(i,l)
                    #debug = sess.run([dnc.usage_vector], feed_dict=feed_dict)
                    #print(debug)
            #print results
            #print(np.reshape(final_i_data[0],[1,4]).shape)
            #print(final_o_data[0].shape)
            #print(predictions)



In [6]:
train()

Tensor("dynamic_memory_allocation/mul_2:0", shape=(2, 1), dtype=float32)
allocation: Tensor("dynamic_memory_allocation/Reshape:0", shape=(2, 1), dtype=float32) Tensor("dynamic_memory_allocation/Reshape_1:0", shape=(2, 1), dtype=int32)


ValueError: Tried to convert 'begin' to a tensor and failed. Error: Shapes must be equal rank, but are 1 and 0
	From merging shape 0 with other shapes. for 'dynamic_memory_allocation/Slice/packed' (op: 'Pack') with input shapes: [1], [].