In [1]:
import numpy as np
import tensorflow as tf
import os

In [2]:
LOGDIR = "unit_logs/"

In [3]:
log_writer = tf.summary.FileWriter(LOGDIR)

In [4]:
class DNC:
    def __init__(self,input_size,output_size,seq_len,num_words,word_size,read_heads,device = '/device:GPU:1'):
        self.output_size = output_size
        self.controller_output_size = output_size
        self.read_heads = read_heads
        self.word_size = word_size
        self.num_words = num_words
        
        with tf.device(device):
            #controller
            
            self.read_weightings = tf.Variable( tf.fill([num_words,read_heads],1e-8,name = "read_weights_wr_init"),trainable=False,name="read_weights_wr")
            self.read_vectors = tf.Variable(tf.fill([read_heads,word_size],1e-8,name="init_read_vectors"),trainable=False,name="read_vectors_r")
            
            self.memory_matrix = tf.Variable( tf.truncated_normal([num_words,word_size]),trainable=False,name = "memory_matrix_M")
            
            with tf.name_scope("calc_output_vector_y/"):
                self.read_vectors_weights = tf.get_variable("Wr",shape=[self.read_heads*self.word_size,self.output_size],initializer=tf.contrib.layers.xavier_initializer())
            
            with tf.name_scope("train_data/inputs_x"):        
                
                
                self.input_x = tf.placeholder(tf.float32,shape=[1,input_size],name = "input_x")
                
            with tf.name_scope("train_data/outputs_y"):    
                self.output_y = tf.placeholder(tf.float32,shape=[1,output_size],name="output_y")
            
            

            with tf.variable_scope("feed_forward/layer1"):
                self.weights1 = tf.get_variable("weights1",shape=[input_size,32],initializer=tf.contrib.layers.xavier_initializer())
                self.biases1 = tf.get_variable("biases1",shape=[32],initializer=tf.zeros_initializer())
                layer1_activation = tf.nn.relu(tf.matmul(self.input_x,self.weights1)+self.biases1)
            with tf.variable_scope("feed_forward/layer2"):
                self.weights2 = tf.get_variable("weights2",shape=[32,output_size],initializer=tf.contrib.layers.xavier_initializer())
                self.biases2 = tf.get_variable("biases2",shape=[output_size])
                layer2_activation = tf.nn.relu(tf.matmul(layer1_activation,self.weights2)+self.biases2)
                self.output_logits = layer2_activation
                
                
                
            
            with tf.name_scope("calc_output_vector_y/controller_output_vt"): #will need to be changed when final output is coded
                self.output_vector_weights = tf.get_variable("Wy",shape=[self.controller_output_size,self.output_size],initializer=tf.contrib.layers.xavier_initializer())
                self.controller_output_vector = tf.matmul(layer2_activation,self.output_vector_weights,name="controller_output_vector_vt")
                
                
            
            with tf.name_scope("update_read_weightingss/"):
                self.read_keys = tf.truncated_normal([read_heads,word_size],name="temp_random_read_keys")
                self.read_strengths = self.one_plus(tf.truncated_normal([read_heads,1],name = "temp_random_read_strengths"),name="read_strengths_one_plus")
            
            #TODO: multiply by read_mode
                self.content_weigthing = self.content_lookup(self.read_keys,self.read_strengths) #N*1
                self.read_weightings = tf.assign(self.read_weightings, self.content_weigthing,name ="update_read_weightings")
            
            with tf.name_scope("calc_read_vectors/"):
                self.read_vectors = tf.assign(self.read_vectors, tf.transpose(tf.matmul(self.memory_matrix,self.read_weightings,transpose_a=True)),name  ="update_read_vectors")
            
            with tf.name_scope("calc_output_vector_y/weighted_read_vectors"):
                weighted_read_vectors = tf.matmul(tf.reshape(self.read_vectors,[1,self.read_heads*self.word_size]),self.read_vectors_weights,name = "weighted_read_vectors")
                
            
            with tf.name_scope("calc_output_vector_y/"):
                self.output_vector = tf.add( self.controller_output_vector  , weighted_read_vectors , name="output_vector_y")
            
            
            with tf.name_scope("loss"):    
                self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.output_vector, labels=self.output_y))
        
            with tf.device('/device:CPU:0'):
                tf.summary.scalar("cross_entropy_loss",self.loss)
                
            with tf.name_scope("optimizer"):
                self.optimizer = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(self.loss)
 
    def one_plus(self,x,name=""):
        return  tf.add( 1.0,tf.log(tf.expand_dims(tf.add(1.0,tf.exp(x)),0)),name=name)
    
    def content_lookup(self,key,key_strength):
        with tf.name_scope("cosine_content_lookup"):
            normalized_memory = tf.nn.l2_normalize(self.memory_matrix,1) #N*W
            normalized_key = tf.nn.l2_normalize(key,0) #1*W
        
        
            z = tf.matmul(normalized_memory,normalized_key,transpose_a=False,transpose_b=True)
            content_lookup_result = tf.reshape(tf.nn.softmax(z*tf.squeeze(key_strength),0),[self.num_words,self.read_heads])
        
        # N*1
        return  content_lookup_result

In [5]:
def train():
    #generate the input output sequences, randomly intialized
    tf.reset_default_graph()
    num_seq = 10
    seq_len = 6
    seq_width = 4
    iterations = 600
    con = np.random.randint(0, seq_width,size=seq_len)
    seq = np.zeros((seq_len, seq_width))
    seq[np.arange(seq_len), con] = 1
    end = np.asarray([[-1]*seq_width])
    zer = np.zeros((seq_len, seq_width))
    
    j = 0
    
    debug_every = 1
    final_i_data = np.concatenate((seq, zer), axis=0)
    final_o_data = np.concatenate((zer, seq), axis=0)
    with tf.Session() as session:
        
        dnc = DNC(input_size=seq_width, output_size=seq_width, seq_len=seq_len, num_words=20, word_size=5, read_heads=1)
        log_writer.add_graph(session.graph)
        
        session.run(tf.global_variables_initializer())
        feed_dict = {dnc.input_x : np.reshape(final_i_data[j],[1,seq_width]), dnc.output_y: np.reshape(final_o_data[j],[1,seq_width])}
        
        merged_summary = tf.summary.merge_all()
        rk,mm,cw = session.run([dnc.read_keys,dnc.memory_matrix,dnc.content_weigthing],feed_dict=feed_dict)
        print(rk,mm,cw)
        #print(start_mem)
        ol,l,op,summ,mm = session.run([dnc.output_vector,dnc.loss,dnc.optimizer,merged_summary,dnc.read_keys], feed_dict=feed_dict)
        
        log_writer.add_summary(summ)
        #print(l)
        #print(mm)

In [6]:
train()

[[-0.59424162 -0.72244006  0.37448293  0.09541292  0.6779151 ]] [[  2.47669697e-01   1.16942859e+00  -7.63524890e-01   1.29428184e+00
   -1.56634510e+00]
 [ -1.13119781e-01   1.05344331e+00   4.49118435e-01   7.14221716e-01
    2.05895022e-01]
 [  4.29264382e-02  -1.29676044e+00  -1.28187943e+00  -1.20258379e+00
   -1.08996356e+00]
 [  4.80944514e-01  -9.96630788e-01   4.22748715e-01  -1.65358746e+00
   -1.13452148e+00]
 [  1.58233766e-03  -3.79273444e-01  -1.36837697e+00  -1.42595112e-01
   -2.65732527e-01]
 [ -5.99190891e-01   8.83945227e-01   5.71001656e-02  -3.43652874e-01
   -5.04611135e-01]
 [  9.59120333e-01  -1.72320569e+00   6.99574454e-03  -1.56161022e+00
    8.28489602e-01]
 [  1.39877200e+00  -2.04522982e-01   1.97561696e-01   1.08912313e+00
    4.40411836e-01]
 [ -5.43573976e-01   9.84363258e-01  -4.63951856e-01  -2.75325239e-01
    3.20992410e-01]
 [  8.14128876e-01  -3.44241351e-01  -1.85590792e+00   1.18575120e+00
    1.84317553e+00]
 [  2.62433261e-01   1.09336150e+00 

In [7]:
def create_graph():
    with tf.device('/device:GPU:1'):
        var = tf.Variable(tf.zeros([2,1]),trainable = False)
    
        var2 = tf.Variable(tf.ones([2,1]),trainable=False)
        
        var = tf.assign(var,tf.add(var,var2))
        
        return var,var2
    

In [8]:
def train():    
    tf.reset_default_graph()
    with tf.Session() as session:
        var,var2 = create_graph()
        session.run(tf.global_variables_initializer())
        
        for i in range(5):
            result = session.run(var)
            print(result)

In [9]:
train()

[[ 1.]
 [ 1.]]
[[ 2.]
 [ 2.]]
[[ 3.]
 [ 3.]]
[[ 4.]
 [ 4.]]
[[ 5.]
 [ 5.]]


In [10]:
def content_lookup(memory_matrix,key,key_strength):
        with tf.name_scope("cosine_content_lookup"):
            normalized_memory = tf.nn.l2_normalize(memory_matrix,1) #N*W
            normalized_key = tf.nn.l2_normalize(key,0) #1*W
        
        
            z = tf.matmul(normalized_memory,normalized_key,transpose_a=False,transpose_b=True)
            content_lookup_result = tf.reshape(tf.nn.softmax(z*tf.squeeze(key_strength),0),[20,1])
            #content_lookup_result = tf.nn.softmax(tf.losses.cosine_distance(normalized_memory, tf.nn.l2_normalize(key, 1), dim=0))
            #content_lookup_result = normalized_memory
        # N*1
        return  content_lookup_result

def one_plus(self,x,name=""):
        return  tf.add( 1.0,tf.log(tf.expand_dims(tf.add(1.0,tf.exp(x)),0)),name=name)

def create_graph():
    memory_matrix = tf.truncated_normal([20,5])
    key  = tf.truncated_normal([1,5],name="temp_random_read_keys")
    #key_strength = one_plus( x=tf.truncated_normal([1,1],name = "temp_random_read_strengths"),name="op")
    key_strength = tf.truncated_normal([1,1],name = "temp_random_read_strengths")
    key_strength_op = one_plus( key_strength,name = "temp_random_read_strengths")
        
        
    result = content_lookup(memory_matrix,key,key_strength_op)
    
    return memory_matrix,key,key_strength,key_strength_op,result

def train():    
    tf.reset_default_graph()
    with tf.Session() as session:
        memory_matrix,key,key_strength,key_stren_op,result = create_graph()
        session.run(tf.global_variables_initializer())
        
        
        mm,k,ks,ks_op,r = session.run([memory_matrix,key,key_strength,key_stren_op,result])
        print("mem",mm)
        print("key",k)
        print("key stren",ks)
        print("key stren op",ks_op)
        print("resul",np.sum(r))
        print("----")
        print(1 + np.log(1+np.exp(ks)))
train()

TypeError: one_plus() missing 1 required positional argument: 'x'

In [None]:
1 + np.log(1+np.exp(-1.06015229))