In [1]:
import numpy as np
import tensorflow as tf
import os

In [2]:
class DNC:
    def __init__(self,input_size,output_size,seq_len,num_words,word_size,read_heads):
        with tf.device('/device:GPU:1'):
            self.input_size = input_size
            self.output_size = output_size

            self.num_words = num_words
            self.word_size = word_size

            self.read_heads = read_heads

            self.interface_size = (word_size*read_heads) + (3*word_size)+ (5*read_heads)+3

            self.controller_input_size = (read_heads*word_size)+input_size

            self.controller_output_size = output_size + self.interface_size

            self.output_vector = tf.truncated_normal([1,self.output_size],stddev=0.1)
            self.interface_vector = tf.truncated_normal([1,self.interface_size],stddev=0.1)

            self.memory_matrix = tf.zeros([num_words,word_size])

            self.usage_vector = tf.fill([num_words,1],1e-6)
            self.temp_link_matrix = tf.zeros([num_words,num_words])

            self.precedence_weighting  = tf.zeros([num_words,1])

            self.read_weightings = tf.fill([num_words,read_heads],1e-6)
            self.write_weightings = tf.fill([num_words,1],1e-6)
            self.read_vectors = tf.fill([read_heads,word_size],1e-6)

            # Controller
            self.input_x = tf.placeholder(tf.float32,shape=[seq_len*2,self.input_size],name = "input_x")
            self.output_y = tf.placeholder(tf.float32,shape=[seq_len*2,self.output_size],name="output_y")


            self.weights1 = tf.get_variable("weights1",shape=[self.controller_input_size,32],initializer=tf.contrib.layers.xavier_initializer())
            self.biases1 = tf.get_variable("biases1",shape=[32],initializer=tf.zeros_initializer())
            self.weights2 = tf.get_variable("weights2",shape=[32,self.controller_output_size],initializer=tf.contrib.layers.xavier_initializer())
            self.biases2 = tf.get_variable("biases2",shape=[self.controller_output_size])

            self.output_vector_weights = tf.get_variable("Wy",shape=[self.controller_output_size,self.output_size],initializer=tf.contrib.layers.xavier_initializer())
            self.interface_weights = tf.get_variable("Wiv",shape=[self.controller_output_size,self.interface_size],initializer=tf.contrib.layers.xavier_initializer())

            self.read_vectors_weights = tf.get_variable("Wr",shape=[self.read_heads*self.word_size,self.output_size],initializer=tf.contrib.layers.xavier_initializer())
        
    def content_lookup(self,key,key_strength):
        normalized_memory = tf.nn.l2_normalize(self.memory_matrix,1)
        normalized_key = tf.nn.l2_normalize(key,0)
        
        z = tf.matmul(normalized_memory,normalized_key,transpose_b=True)
        
        return tf.nn.softmax(key_strength*z)
    
    # used to provided new locations for writing
    def calc_allocation_weighting(self):
        # multiply usage vector by -1 to get locations in ascending order of usage
        sorted_usage_vector,free_list = tf.nn.top_k(-1*self.usage_vector,k = self.num_words)
        # since usage vector was multiplied by -1,after sorted, return to its original value
        sorted_usage_vector = sorted_usage_vector*-1
        
        cumulative_product = tf.cumprod(sorted_usage_vector,axis=0,exclusive=True)
        unordered_allocation_weighting =  (1-sorted_usage_vector)*cumulative_product
        
        allocation_weights = tf.zeros([self.num_words])
        identity_matrix = tf.constant(np.identity(self.num_words,dtype=np.float32))
        
        for pos, idx in enumerate(tf.unstack(free_list[0])):
            #flatten
            m = tf.squeeze(tf.slice(identity_matrix, [idx, 0], [1, -1]))
            #add to weight matrix
            allocation_weights += m*unordered_allocation_weighting[0, pos]
        #the allocation weighting for each row in memory
        return tf.reshape(allocation_weights, [self.num_words, 1])

    
    def one_plus(self,x):
        return 1+tf.nn.softplus(x)
    
    def time_step(self,x):
        step_input = tf.concat([x,tf.reshape(self.read_vectors,[1,self.read_heads*self.word_size])],1)
        
        #controller forward propagation
        layer1_activation = tf.nn.relu(tf.matmul(step_input,self.weights1)+self.biases1)
        #print("layer 1 act",layer1_activation)
        layer2_activation = tf.nn.relu(tf.matmul(layer1_activation,self.weights2)+self.biases2)
        #print("layer 2 act",layer2_activation)
        
        #output vector
        self.output_vector = tf.matmul(layer2_activation,self.output_vector_weights)
        #print("output vector",self.output_vector)
        
        #interface vector
        self.interface_vector = tf.matmul(layer2_activation,self.interface_weights)
        #print(self.interface_vector)
        
        #Interact with the memory(read and write)
        ##Slice interface vector to get the 10 components of it, the partition its an indexes vector(values from 0 to 9)
        partition_indexes = tf.constant([[0]*(self.read_heads*self.word_size) #read keys
                                +[1]*(self.read_heads)#read strengths 
                                +[2]*(self.word_size)
                                +[3] #write strength
                                +[4]*(self.word_size) #erase vector
                                +[5]*(self.word_size) #write vector
                                +[6]*(self.read_heads) #free gates
                                +[7] #allocation gate
                                +[8] #write gate
                                +[9]*(self.read_heads*3) #read modes
                                
                                ],dtype = tf.int32)
        
        #print(partition_indexes)
        (read_keys,read_strengths,write_key
        ,write_strength,erase_vector,write_vector,
        free_gates,allocation_gate,write_gate,read_modes) = tf.dynamic_partition(self.interface_vector,partition_indexes,10)
        
        ##Make every value have the correct shape and be in the correct domain
        read_keys = tf.reshape(read_keys,[self.read_heads,self.word_size])
        print(read_keys)
        read_strengths = self.one_plus(read_strengths)
        print(read_strengths)
        
        write_key = tf.expand_dims(write_key,0)
        write_strength = self.one_plus(write_key)
        
        erase_vector = tf.nn.sigmoid(tf.expand_dims(erase_vector,0))
        write_vector = tf.expand_dims(write_vector,0)
        
        free_gates =  tf.nn.sigmoid(tf.expand_dims(free_gates,0))
        allocation_gate = tf.nn.sigmoid(allocation_gate)
        write_gate = tf.nn.sigmoid(write_gate)
        
        read_modes = tf.nn.softmax(tf.reshape(read_modes,[3,self.read_heads]))
        
        
        ## Writing to memory(dynamic allocation and content lookup)
        ### dynamic memory allocation
        retention_vector = tf.reduce_prod(1-free_gates*self.read_weightings,reduction_indices=1)
        
        self.usage_vector = (self.usage_vector + self.write_weightings  - self.usage_vector* self.write_weightings)*retention_vector
        
        allocation_weights = self.calc_allocation_weighting()
        
        ### content lookup for  writing
        write_content_weigths = self.content_lookup(write_key,write_strength)
        
        ### final write weights
        self.write_weightings = write_gate*(allocation_gate*allocation_weights+(1-allocation_gate)*write_content_weigths)
        #print(self.write_weightings )
        
        ### final writing to memory(first erase, then write)
        self.memory_matrix  = self.memory_matrix * (1-tf.matmul(self.write_weightings,erase_vector))+(tf.matmul(self.write_weightings,write_vector))
        
        ## reading from memory(by content and by temporal order)
        
        ### temporal order
        #### temporal link matrix update using write weights, and previus precedence weighitngs
        #print("antes weightis",self.write_weightings)
        write_weightsi = tf.matmul(self.write_weightings,tf.ones([1,self.num_words]))
        #print("yua")
        #print(self.precedence_weighting)
        self.temp_link_matrix = (1-write_weightsi-tf.transpose(write_weightsi)) * self.temp_link_matrix + tf.matmul(self.write_weightings,self.precedence_weighting,transpose_b=True)
        self.temp_link_matrix = self.temp_link_matrix * (tf.ones([self.num_words,self.num_words]) - tf.constant(np.identity(self.num_words,dtype=np.float32)))
        
        ### read modes (backguard,content,forward)
        back_weigthing = read_modes[0]*tf.matmul(self.temp_link_matrix,self.read_weightings,transpose_a=True)
        #print(back_weigthing)
        content_weigthing = read_modes[1]*self.content_lookup(read_keys,read_strengths)
        #print(content_weigthing)
        forward_weithing = read_modes[2]*tf.matmul(self.temp_link_matrix,self.read_weightings)
        #print(forward_weithing)
        
        self.read_weightings  = back_weigthing + content_weigthing + forward_weithing
        
        self.read_vectors = tf.transpose(tf.matmul(self.memory_matrix,self.read_weightings,transpose_a=True))
        #print(self.memory_matrix)
        #print(self.read_weightings)
        #print(self.read_vectors)
        
        ### apply weights to read vectors
        weighted_read_vectors = tf.matmul(tf.reshape(self.read_vectors,[1,self.read_heads*self.word_size]),self.read_vectors_weights)
        #print(self.output_vector)
        #print(weighted_read_vectors)
        return self.output_vector + weighted_read_vectors
    
    #output list of numbers (one hot encoded) by running the step function
    def run(self):
        big_out = []
        for t, seq in enumerate(tf.unstack(self.input_x, axis=0)):
            seq = tf.expand_dims(seq, 0)
            y = self.time_step(seq)
            big_out.append(y)
        return tf.stack(big_out, axis=0)

In [3]:

def main(argv=None):

    #generate the input output sequences, randomly intialized
    num_seq = 10
    seq_len = 6
    seq_width = 4
    iterations = 1000
    con = np.random.randint(0, seq_width,size=seq_len)
    seq = np.zeros((seq_len, seq_width))
    seq[np.arange(seq_len), con] = 1
    end = np.asarray([[-1]*seq_width])
    zer = np.zeros((seq_len, seq_width))

    graph = tf.Graph()
    
    with graph.as_default():
        #training time
        with tf.Session() as sess:
            
            #init the DNC
            dnc = DNC(input_size=seq_width, output_size=seq_width, seq_len=seq_len, num_words=10, word_size=4, read_heads=1)
            
            #calculate the predicted output
            output = tf.squeeze(dnc.run())
            #print(output,dnc.output_y)
            #compare prediction to reality, get loss via sigmoid cross entropy
            loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=dnc.output_y))
            #print(loss)
            #use regularizers for each layer of the controller
            regularizers = (tf.nn.l2_loss(dnc.weights1) + tf.nn.l2_loss(dnc.weights2) +
                            tf.nn.l2_loss(dnc.biases1) + tf.nn.l2_loss(dnc.biases2))
            #to help the loss convergence faster
            loss += 5e-4 * regularizers
            #optimize the entire thing (memory + controller) using gradient descent. dope
            optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
            
            #initialize input output pairs
            sess.run(tf.global_variables_initializer())
            final_i_data = np.concatenate((seq, zer), axis=0)
            final_o_data = np.concatenate((zer, seq), axis=0)
            #for each iteration
            for i in range(0, iterations+1):
                #feed in each input output pair
                
                feed_dict = {dnc.input_x : final_i_data, dnc.output_y: final_o_data}
                #make predictions
                print(final_i_data.shape,final_o_data.shape)
                l, _, predictions = sess.run([loss, optimizer, output], feed_dict=feed_dict)
                if i%100==0:
                    print(i,l)
            #print results
            print(final_i_data)
            print(final_o_data)
            print(predictions)



In [4]:
if __name__ == '__main__':
    tf.app.run()

Tensor("Reshape_1:0", shape=(1, 4), dtype=float32)
Tensor("add_2:0", shape=(?,), dtype=float32)
Tensor("Reshape_6:0", shape=(1, 4), dtype=float32)
Tensor("add_23:0", shape=(?,), dtype=float32)
Tensor("Reshape_11:0", shape=(1, 4), dtype=float32)
Tensor("add_44:0", shape=(?,), dtype=float32)
Tensor("Reshape_16:0", shape=(1, 4), dtype=float32)
Tensor("add_65:0", shape=(?,), dtype=float32)
Tensor("Reshape_21:0", shape=(1, 4), dtype=float32)
Tensor("add_86:0", shape=(?,), dtype=float32)
Tensor("Reshape_26:0", shape=(1, 4), dtype=float32)
Tensor("add_107:0", shape=(?,), dtype=float32)
Tensor("Reshape_31:0", shape=(1, 4), dtype=float32)
Tensor("add_128:0", shape=(?,), dtype=float32)
Tensor("Reshape_36:0", shape=(1, 4), dtype=float32)
Tensor("add_149:0", shape=(?,), dtype=float32)
Tensor("Reshape_41:0", shape=(1, 4), dtype=float32)
Tensor("add_170:0", shape=(?,), dtype=float32)
Tensor("Reshape_46:0", shape=(1, 4), dtype=float32)
Tensor("add_191:0", shape=(?,), dtype=float32)
Tensor("Reshape_51

InvalidArgumentError: Matrix size-incompatible: In[0]: [10,4], In[1]: [1,4]
	 [[Node: MatMul_6 = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](mul_19, ExpandDims_3)]]
	 [[Node: Squeeze_120/_741 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_22475_Squeeze_120", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'MatMul_6', defined at:
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-5be245e2ed29>", line 2, in <module>
    tf.app.run()
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/tensorflow/python/platform/app.py", line 48, in run
    _sys.exit(main(_sys.argv[:1] + flags_passthrough))
  File "<ipython-input-3-843b88c9ed7c>", line 25, in main
    output = tf.squeeze(dnc.run())
  File "<ipython-input-2-bde03500ba96>", line 191, in run
    y = self.time_step(seq)
  File "<ipython-input-2-bde03500ba96>", line 152, in time_step
    self.memory_matrix  = self.memory_matrix * (1-tf.matmul(self.write_weightings,erase_vector))+(tf.matmul(self.write_weightings,write_vector))
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 1891, in matmul
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2437, in _mat_mul
    name=name)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/home/luis/anaconda2/envs/tf14_gpu_py3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Matrix size-incompatible: In[0]: [10,4], In[1]: [1,4]
	 [[Node: MatMul_6 = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](mul_19, ExpandDims_3)]]
	 [[Node: Squeeze_120/_741 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_22475_Squeeze_120", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


In [None]:
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

get_available_gpus()