# Head
## Focusing by location

In [45]:
import math
import numpy as np 
import tensorflow as tf


# Reset graph - just in case.
tf.reset_default_graph()


def build_focusing_by_location_2D(keys, beta, memory):
    """Computes content addressing. Uses both address and content part for calculation of the similarity.
    Memory is 2D i.e. shared between batch samples.
    Args:
        key: a 2-D Tensor [BATCH_SIZE x SLOT_SIZE] 
        beta: a 1-D Tensor - key strength [BATCH_SIZE x 1]
        memory: a 2-D Tensor [SLOT_SIZE x NUMBER_OF_SLOTS]
    """
    with tf.name_scope("focusing_by_location"):
    
        # Normalize batch - along samples.
        norm_keys = tf.nn.l2_normalize(keys,1, name="normalized_keys")
        # Normalize memory - along slots 
        norm_memory = tf.nn.l2_normalize(memory, 0)

        # Calculate cosine similarity [BATCH_SIZE x NUMBER_OF_SLOTS].
        similarity = tf.matmul(norm_keys, norm_memory, name="similarity")

        # Element-wise multiplication [BATCH_SIZE x NUMBER_OF_SLOTS]
        strengthtened_similarity = beta * similarity

        # Calculate weighting based on similarity along the "slot dimension" [BATCH_SIZE x NUMBER_OF_SLOTS].
        result = tf.nn.softmax(strengthtened_similarity, dim=1)
        return result


# test focusing
keys = tf.constant([[0.2, 0.3, 0.4],[0.1, 0.2, 0.7]], dtype=tf.float32)
print("keys=",keys)
beta= tf.constant([[100.0],[100.0]], dtype=tf.float32)

memory = tf.transpose(tf.constant([[0.2, 0.3, 0.4],[0.2, 0.3, 0.4],[0.3, 0.3, 0.3],[0.1, 0.2, 0.7]], dtype=tf.float32))
print("memory=",memory)

focus = build_focusing_by_location_2D(keys, beta, memory)

# Finally - initialize all variables.
initialize_model = tf.global_variables_initializer()    
    
# Execute graph.
sess=tf.InteractiveSession()
# Initialize.
sess.run(initialize_model)
print("keys =\n",sess.run([keys]))
print("memory =\n",sess.run([memory]))
print("focus =\n",sess.run([focus]))
# [7,1,2,3,4,5,6]
sess.close()


keys= Tensor("Const:0", shape=(2, 3), dtype=float32)
memory= Tensor("transpose:0", shape=(3, 4), dtype=float32)
keys =
 [array([[ 0.2       ,  0.30000001,  0.40000001],
       [ 0.1       ,  0.2       ,  0.69999999]], dtype=float32)]
memory =
 [array([[ 0.2       ,  0.2       ,  0.30000001,  0.1       ],
       [ 0.30000001,  0.30000001,  0.30000001,  0.2       ],
       [ 0.40000001,  0.40000001,  0.30000001,  0.69999999]], dtype=float32)]
focus =
 [array([[  4.92605865e-01,   4.92605865e-01,   1.47291617e-02,
          5.90993950e-05],
       [  1.19946955e-04,   1.19946955e-04,   4.91867769e-10,
          9.99760211e-01]], dtype=float32)]


In [46]:
import math
import numpy as np 
import tensorflow as tf


# Reset graph - just in case.
tf.reset_default_graph()


def build_focusing_by_location_3D_iterative(keys, beta, memory_batch):
    """Computes content addressing. Uses both address and content part for calculation of the similarity.
    Memory is 3D i.e. every sample in the batch has its own "memory slice".
    Version that iterates through samples in the batch.
    Args:
        keys: a 2-D Tensor [BATCH_SIZE x SLOT_SIZE] 
        beta: a 1-D Tensor - key strength [BATCH_SIZE x 1]
        memory: a 3-D !! Tensor [BATCH_SIZE x SLOT_SIZE x NUMBER_OF_SLOTS]
    """
    with tf.name_scope("focusing_by_location"):
        
        batch_size = int(memory_batch.get_shape()[0])
        slot_size = int(keys.get_shape()[1])
        # Decompose memory into list of BATCH_SIZE memories of size [SLOT_SIZE x NUMBER_OF_SLOTS].
        memory_list = tf.unstack(memory_batch, axis=0)

        # Normalize batch - along samples.
        norm_keys = tf.nn.l2_normalize(keys,1, name="normalized_keys")

        weighting = list()
        # Iterate through samples in batch.
        for b in range(batch_size):
                        
            # Normalize memory - along slots 
            norm_memory = tf.nn.l2_normalize(memory_list[b], 0)

            # Get a single key [1 x SLOT_SIZE]
            norm_keys_slice = tf.slice(norm_keys, [b, 0], [1, slot_size])
            
            # Calculate cosine similarity [BATCH_SIZE x NUMBER_OF_SLOTS].
            print("norm_keys_slice=", norm_keys_slice)
            print("norm_memory=",norm_memory)
            similarity = tf.matmul(norm_keys_slice, norm_memory, name="similarity")

            # Element-wise multiplication [BATCH_SIZE x NUMBER_OF_SLOTS]
            strengthtened_similarity = beta[b] * similarity

            # Calculate weighting based on similarity along the "slot dimension" [BATCH_SIZE x NUMBER_OF_SLOTS].
            weighting.append(tf.nn.softmax(strengthtened_similarity, dim=1))
            
        # "Stack" results into 2D tensor [BATCH_SIZE x NUMBER_OF_SLOTS]
        result = tf.stack(weighting, axis=0)
        print (result)
        return result


# test focusing
keys = tf.constant([[0.2, 0.3, 0.4],[0.1, 0.2, 0.7]], dtype=tf.float32)
print("keys=",keys)
beta= tf.constant([[100.0],[2.0]], dtype=tf.float32)

memory_0 = tf.transpose(tf.constant([[0.2, 0.3, 0.4],[0.2, 0.3, 0.4],[0.3, 0.3, 0.3],[0.1, 0.2, 0.7]], dtype=tf.float32))
memory_1 = tf.transpose(tf.constant([[0.1, 0.2, 0.7],[0.0, 0.6, 0.8],[0.9, 0.0, 0.1],[0.8, 0.1, 0.1]], dtype=tf.float32))
memory = tf.stack([memory_0, memory_1])
print("memory=",memory)

focus = build_focusing_by_location_3D_iterative(keys, beta, memory)

# Finally - initialize all variables.
initialize_model = tf.global_variables_initializer()    
    
# Execute graph.
sess=tf.InteractiveSession()
# Initialize.
sess.run(initialize_model)
print("keys_BxS =\n",sess.run([keys]))
print("memory =\n",sess.run([memory]))
print("focus =\n",sess.run([focus]))
# [7,1,2,3,4,5,6]
sess.close()


keys= Tensor("Const:0", shape=(2, 3), dtype=float32)
memory= Tensor("stack:0", shape=(2, 3, 4), dtype=float32)
norm_keys_slice= Tensor("focusing_by_location/Slice:0", shape=(1, 3), dtype=float32)
norm_memory= Tensor("focusing_by_location/l2_normalize:0", shape=(3, 4), dtype=float32)
norm_keys_slice= Tensor("focusing_by_location/Slice_1:0", shape=(1, 3), dtype=float32)
norm_memory= Tensor("focusing_by_location/l2_normalize_1:0", shape=(3, 4), dtype=float32)
Tensor("focusing_by_location/stack:0", shape=(2, 1, 4), dtype=float32)
keys =
 [array([[ 0.2       ,  0.30000001,  0.40000001],
       [ 0.1       ,  0.2       ,  0.69999999]], dtype=float32)]
memory =
 [array([[[ 0.2       ,  0.2       ,  0.30000001,  0.1       ],
        [ 0.30000001,  0.30000001,  0.30000001,  0.2       ],
        [ 0.40000001,  0.40000001,  0.30000001,  0.69999999]],

       [[ 0.1       ,  0.        ,  0.89999998,  0.80000001],
        [ 0.2       ,  0.60000002,  0.        ,  0.1       ],
        [ 0.69999999,  

In [49]:
import math
import numpy as np 
import tensorflow as tf


# Reset graph - just in case.
tf.reset_default_graph()


def build_focusing_by_location_3D(keys_BxS_, beta_Bx1_, prev_memory_BxSxN_):
    """Computes content addressing. Uses both address and content part for calculation of the similarity.
    Memory is 3D i.e. every sample in the batch has its own "memory slice".
    Args:
        keys_BxS_: a 2-D Tensor [BATCH_SIZE x SLOT_SIZE] 
        beta_Bx1_: a 2-D Tensor - key strength [BATCH_SIZE x 1]
        prev_memory_BxSxN_: a 3-D !! Tensor [BATCH_SIZE x SLOT_SIZE x NUMBER_OF_SLOTS]
    """
    with tf.name_scope("focusing_by_location"):
        
        # Normalize key - along samples [BATCH_SIZE x 1 x SLOT_SIZE]
        keys_Bx1xS = tf.expand_dims(keys_BxS_, 1)
        norm_keys_Bx1xS = tf.nn.l2_normalize(keys_Bx1xS,2, name="norm_keys_Bx1xS")

        # Normalize memory - along slots [BATCH_SIZE x SLOT_SIZE x NUMBER_OF_SLOTS]
        norm_memory_BxSxN = tf.nn.l2_normalize(prev_memory_BxSxN_, 1, name="norm_memory_BxSxN")
        
        # Calculate batched cosine similarity [BATCH_SIZE x 1 x NUMBER_OF_SLOTS]
        similarity_Bx1xN= tf.matmul(norm_keys_Bx1xS, norm_memory_BxSxN, name="similarity_Bx1xN")
        
        # Element-wise multiplication [BATCH_SIZE x 1 x NUMBER_OF_SLOTS]
        beta_Bx1x1 = tf.expand_dims(beta_Bx1_, 1)
        strengthtened_similarity_Bx1xN = tf.matmul(beta_Bx1x1, similarity_Bx1xN, name="strengthtened_similarity_Bx1xN")

        # Calculate weighting based on similarity along the "slot dimension" [BATCH_SIZE x NUMBER_OF_SLOTS].
        weighting_Bx1xN = tf.nn.softmax(strengthtened_similarity_Bx1xN, dim=2)
            
        # "Squeeze" results into 2D tensor [BATCH_SIZE x NUMBER_OF_SLOTS]
        weighting_BxN = tf.squeeze(weighting_Bx1xN)
        print ("weighting_BxN=",weighting_BxN)
        return weighting_BxN


# test focusing
keys = tf.constant([[0.2, 0.3, 0.4],[0.1, 0.2, 0.7]], dtype=tf.float32)
print("keys=",keys)
beta= tf.constant([[100.0],[2.0]], dtype=tf.float32)

memory_0 = tf.transpose(tf.constant([[0.2, 0.3, 0.4],[0.2, 0.3, 0.4],[0.3, 0.3, 0.3],[0.1, 0.2, 0.7]], dtype=tf.float32))
memory_1 = tf.transpose(tf.constant([[0.1, 0.2, 0.7],[0.0, 0.6, 0.8],[0.9, 0.0, 0.1],[0.8, 0.1, 0.1]], dtype=tf.float32))
memory = tf.stack([memory_0, memory_1])
print("memory=",memory)

focus = build_focusing_by_location_3D(keys, beta, memory)

# Finally - initialize all variables.
initialize_model = tf.global_variables_initializer()    
    
# Execute graph.
sess=tf.InteractiveSession()
# Initialize.
sess.run(initialize_model)
print("keys_BxS =\n",sess.run([keys]))
print("memory =\n",sess.run([memory]))
print("focus =\n",sess.run([focus]))
# [7,1,2,3,4,5,6]
sess.close()


keys= Tensor("Const:0", shape=(2, 3), dtype=float32)
memory= Tensor("stack:0", shape=(2, 3, 4), dtype=float32)
weighting_BxN= Tensor("focusing_by_location/Squeeze:0", shape=(2, 4), dtype=float32)
keys_BxS =
 [array([[ 0.2       ,  0.30000001,  0.40000001],
       [ 0.1       ,  0.2       ,  0.69999999]], dtype=float32)]
memory =
 [array([[[ 0.2       ,  0.2       ,  0.30000001,  0.1       ],
        [ 0.30000001,  0.30000001,  0.30000001,  0.2       ],
        [ 0.40000001,  0.40000001,  0.30000001,  0.69999999]],

       [[ 0.1       ,  0.        ,  0.89999998,  0.80000001],
        [ 0.2       ,  0.60000002,  0.        ,  0.1       ],
        [ 0.69999999,  0.80000001,  0.1       ,  0.1       ]]], dtype=float32)]
focus =
 [array([[  4.92605865e-01,   4.92605865e-01,   1.47291617e-02,
          5.90993950e-05],
       [  4.31139648e-01,   3.71354699e-01,   9.43793803e-02,
          1.03126228e-01]], dtype=float32)]


## Circular convolution

In [26]:
import math
import numpy as np 
import tensorflow as tf


# Reset graph - just in case.
tf.reset_default_graph()


def circular_index(idx, size):
    if idx < 0: return size + idx
    if idx >= size : return idx - size
    else: return idx

def build_circular_convolution(batch, kernel):
    """Computes circular convolution.
    Args:
        batch: a 2-D Tensor [BATCH_SIZE x NUMBER_OF_SLOTS] 
        kernel: a 2-D Tensor [BATCH_SIZE x KERNEL_SIZE (e.g. 3)]
    """
    size = int(batch.get_shape()[1])
    kernel_size = int(k.get_shape()[1])
    kernel_shift = int(math.floor(kernel_size/2.0))

    kernels = []
    for i in range(size):
        # Create a list of index vectors.
        indices = [circular_index(i+j, size) for j in range(kernel_shift, -kernel_shift-1, -1)]
        # Reorganize batch according to indices. 
        reorganized_batch = tf.gather(batch, indices, axis=1)
        # Perform convolution.
        kernels.append(tf.reduce_sum(reorganized_batch * kernel, 1))
    # Sum elements lying on the same positions.
    result = tf.transpose(tf.dynamic_stitch([i for i in range(size)], kernels))
    return result

# test circular convolution
v = tf.constant([[1,2,3,4,5,6,7],[1,2,3,4,5,6,7],[0.1,0.2,0.3,0.4,0.5,0.6,0.7]], dtype=tf.float32)
print("v=",v)
k = tf.constant([[0,0,1],[0,0.5,0.5],[1,0,0]], dtype=tf.float32)
print("k=",k)

conv = build_circular_convolution(v, k)

# Finally - initialize all variables.
initialize_model = tf.global_variables_initializer()    
    
# Execute graph.
sess=tf.InteractiveSession()
# Initialize.
sess.run(initialize_model)
print("conv =\n",sess.run([conv]))
# [7,1,2,3,4,5,6]
sess.close()

v= Tensor("Const:0", shape=(3, 7), dtype=float32)
k= Tensor("Const_1:0", shape=(3, 3), dtype=float32)
conv =
 [array([[ 7.        ,  1.        ,  2.        ,  3.        ,  4.        ,
         5.        ,  6.        ],
       [ 4.        ,  1.5       ,  2.5       ,  3.5       ,  4.5       ,
         5.5       ,  6.5       ],
       [ 0.2       ,  0.30000001,  0.40000001,  0.5       ,  0.60000002,
         0.69999999,  0.1       ]], dtype=float32)]


##  Sharpening
Requirements: EPS = 1e-40, gamma truncated to 50

In [27]:
import math
import numpy as np 
import tensorflow as tf


# Reset graph - just in case.
tf.reset_default_graph()


def build_sharpening(batch, gamma):
    """Computes sharpening.
    Args:
        batch: a 2-D Tensor [BATCH_SIZE x NUMBER_OF_SLOTS] 
        gamma: a 1-D Tensor [BATCH_SIZE x 1]
    """
    EPS = 1e-30
    with tf.name_scope("sharpening"):    
        number_of_slots = int(batch.get_shape()[1])

        # Duplicate gammas - tf.tile is not working for partially unknown shape :] 
        gammas = []
        for i in range(number_of_slots):
            # Truncates gamma to 50!
            gammas.append(tf.minimum(gamma[:,0], 50))
        gammas_stacked = tf.transpose(tf.stack(gammas))
        print("gammas_stacked=", gammas_stacked)
        # Calculate powered batch [BATCH_SIZE x NUMBER_OF_SLOTS].
        powed_batch = tf.pow(batch, gammas_stacked)+EPS
        print("powed_batch=", powed_batch)

        # "Normalization" [BATCH_SIZE x NUMBER_OF_SLOTS].
        sharpened_batch = (powed_batch) / (tf.reduce_sum(powed_batch, axis=1, keep_dims=True))
        print("sharpened_batch=",sharpened_batch)

        return sharpened_batch


NUMBER_OF_SLOTS = 3
# test sharpening
v = tf.placeholder(tf.float32, shape=[None, NUMBER_OF_SLOTS])
#v = tf.constant([[0.2, 0.3, 0.4]], dtype=tf.float32)
print("v=",v)
g = tf.placeholder(tf.float32, shape=[None, 1])
#g = tf.constant([1.0], dtype=tf.float32)
print("g=",g)

sharp_v = build_sharpening(v, g)

# Finally - initialize all variables.
initialize_model = tf.global_variables_initializer()    

my_v = [[0.2, 0.3, 0.4],[0.0, 0.3, 0.9],[0.3, 0.3, 0.3],[0.1, 0.2, 0.7]]
init_g = np.transpose([50.0, 1.0, 10.0, 50.0])
my_g = np.reshape(init_g, [4,1])
my_feed_dict={v: my_v, g: my_g}

# Execute graph.
sess=tf.InteractiveSession()
# Initialize.
sess.run(initialize_model)
print("v =\n",sess.run([v], feed_dict=my_feed_dict))
print("sharp_v =\n",sess.run([sharp_v], feed_dict=my_feed_dict))
# [7,1,2,3,4,5,6]
sess.close()


v= Tensor("Placeholder:0", shape=(?, 3), dtype=float32)
g= Tensor("Placeholder_1:0", shape=(?, 1), dtype=float32)
gammas_stacked= Tensor("sharpening/transpose:0", shape=(?, 3), dtype=float32)
powed_batch= Tensor("sharpening/add:0", shape=(?, 3), dtype=float32)
sharpened_batch= Tensor("sharpening/truediv:0", shape=(?, 3), dtype=float32)
v =
 [array([[ 0.2       ,  0.30000001,  0.40000001],
       [ 0.        ,  0.30000001,  0.89999998],
       [ 0.30000001,  0.30000001,  0.30000001],
       [ 0.1       ,  0.2       ,  0.69999999]], dtype=float32)]
sharp_v =
 [array([[  7.88868831e-11,   5.66400843e-07,   9.99999404e-01],
       [  8.33333336e-31,   2.50000000e-01,   7.49999940e-01],
       [  3.33333313e-01,   3.33333313e-01,   3.33333313e-01],
       [  5.56030187e-23,   5.56036435e-23,   1.00000000e+00]], dtype=float32)]


# Memory
## Preserved memory (content)

In [51]:
import math
import numpy as np 
import tensorflow as tf


# Reset graph - just in case.
tf.reset_default_graph()

BATCH_SIZE = 2
ADDRESS_SIZE = 1
CONTENT_SIZE = 2
SLOT_SIZE = ADDRESS_SIZE + CONTENT_SIZE
NUMBER_OF_SLOTS=4

def build_memory_preservation_iterative(write_weights_BxN_, erase_vector_BxC_, prev_memory_BxSxN_):
    """Computes how much memory will be preserved using weights and erase vector as params.
    Version that iterates through samples in the batch.
    Args:
        write_weights_BxN: a 2-D Tensor [BATCH_SIZE x NUMBER_OF_SLOTS] 
        erase_vector_BxC: a 2-D Tensor [BATCH_SIZE x CONTENT_SIZE]
        prev_memory_BxSxN: a 3-D !! Tensor [BATCH_SIZE x SLOT_SIZE x NUMBER_OF_SLOTS]
    """
    with tf.name_scope("memory_preservation"):
        
        #batch_size = int(memory_batch.get_shape()[0])

        # Decompose memory into a list of BATCH_SIZE memories of size [SLOT_SIZE x NUMBER_OF_SLOTS].
        #prev_memory_B_SxN = tf.unstack(prev_memory_BxSxN_, axis=0)
        preserved_memory_B_SxN = list()
        preserved_memory_mask_B_SxN = list()

        # Iterate through samples in batch.
        for b in range(BATCH_SIZE):

            # Get part of the memory for given sample - [SLOT_SIZE x NUMBER_OF_SLOTS]
            prev_memory_SxN = tf.squeeze(tf.slice(prev_memory_BxSxN_, [b, 0, 0], [1, SLOT_SIZE, NUMBER_OF_SLOTS],
                                          name="memory_slot"))
            print("prev_memory_SxN=",prev_memory_SxN)
            
            # Get erase vector.
            write_1xN = tf.slice(write_weights_BxN_, [b,0], [1, NUMBER_OF_SLOTS])
            print("write_1xN=",write_1xN)
            erase_Cx1 = tf.transpose(tf.slice(erase_vector_BxC_, [b,0], [1, CONTENT_SIZE]))
            print("erase_Cx1=",erase_Cx1)
            preserved_content_mask_CxN = tf.ones([CONTENT_SIZE, NUMBER_OF_SLOTS]) - tf.matmul(erase_Cx1, write_1xN)
            print(preserved_content_mask_CxN)
            
            preserved_memory_mask_SxN = tf.concat(
                [tf.ones([ADDRESS_SIZE, NUMBER_OF_SLOTS], tf.float32), preserved_content_mask_CxN],
                axis=0)
            
            preserved_memory_B_SxN.append(preserved_memory_mask_SxN * prev_memory_SxN)
            preserved_memory_mask_B_SxN.append(preserved_memory_mask_SxN)
        # 3. Preservation gate p [CONTENT_SIZE x NUMBER_OF_SLOTS]
        #preserved_content_BxCxN = tf.ones([BATCH_SIZE, CONTENT_SIZE, NUMBER_OF_SLOTS]) - 
        #    tf.matmul(tf.transpose(erase_vector_BxC), write_weights_BxN)
        # How much memory will "preserve" [SLOT_SIZE x NUMBER_OF_SLOTS] 
        #preserved_memory_SxN = prev_memory_SxN * tf.concat(
        #    [tf.ones([ADDRESS_SIZE, NUMBER_OF_SLOTS], tf.float32), preserved_content_CxN],
        #    axis=0)


        #        print (preserved_content_BxCxN)
        preserved_memory_BxSxN = tf.stack(preserved_memory_B_SxN)
        preserved_memory_mask_BxSxN = tf.stack(preserved_memory_mask_B_SxN)
        return preserved_memory_BxSxN, preserved_memory_mask_BxSxN


# Write weights: [2 samples x 4 slots]
write_weights = tf.constant([[1.0, 0.5, 0.0, 1.0],[0.2, 0.7, 0.1, 0.0]], dtype=tf.float32)
print("write_weights=",write_weights)
# erase_vector: [2 samples x 2 content_size]
erase_vector= tf.constant([[1.0, 1.0],[0.1, 0.1]], dtype=tf.float32)
print("erase_vector=",erase_vector)

# Memory: [2 samples x 4 slots x 3 slot_size]
memory_0 = tf.transpose(tf.constant([[0.2, 0.3, 0.4],[0.2, 0.3, 0.4],[0.3, 0.3, 0.3],[0.1, 0.2, 0.7]], dtype=tf.float32))
memory_1 = tf.transpose(tf.constant([[0.1, 0.2, 0.7],[0.0, 0.6, 0.8],[0.9, 0.0, 0.1],[0.8, 0.1, 0.1]], dtype=tf.float32))
memory = tf.stack([memory_0, memory_1])
print("memory=",memory)

pres_mem, pres_mem_mask = build_memory_preservation_iterative(write_weights, erase_vector, memory)

# Finally - initialize all variables.
initialize_model = tf.global_variables_initializer()    
    
# Execute graph.
sess=tf.InteractiveSession()
# Initialize.
sess.run(initialize_model)
write_weights_BxN, erase_vector_BxC, prev_memory_BxSxN, preserved_memory_BxSxN, preserved_memory_mask_BxSxN = \
    sess.run([write_weights, erase_vector, memory, pres_mem, pres_mem_mask])
print("write_weights_BxN =\n",write_weights_BxN)
print("erase_vector_BxC =\n",erase_vector_BxC)
print("prev_memory_BxSxN =\n",prev_memory_BxSxN)
print("preserved_memory_mask_BxSxN =\n",preserved_memory_mask_BxSxN)
print("preserved_memory_BxSxN =\n",preserved_memory_BxSxN)
# [7,1,2,3,4,5,6]
sess.close()

        
        
        

write_weights= Tensor("Const:0", shape=(2, 4), dtype=float32)
erase_vector= Tensor("Const_1:0", shape=(2, 2), dtype=float32)
memory= Tensor("stack:0", shape=(2, 3, 4), dtype=float32)
prev_memory_SxN= Tensor("content_preservation/Squeeze:0", shape=(3, 4), dtype=float32)
write_1xN= Tensor("content_preservation/Slice:0", shape=(1, 4), dtype=float32)
erase_Cx1= Tensor("content_preservation/transpose:0", shape=(2, 1), dtype=float32)
Tensor("content_preservation/sub:0", shape=(2, 4), dtype=float32)
prev_memory_SxN= Tensor("content_preservation/Squeeze_1:0", shape=(3, 4), dtype=float32)
write_1xN= Tensor("content_preservation/Slice_2:0", shape=(1, 4), dtype=float32)
erase_Cx1= Tensor("content_preservation/transpose_1:0", shape=(2, 1), dtype=float32)
Tensor("content_preservation/sub_1:0", shape=(2, 4), dtype=float32)
write_weights_BxN =
 [[ 1.          0.5         0.          1.        ]
 [ 0.2         0.69999999  0.1         0.        ]]
erase_vector_BxC =
 [[ 1.   1. ]
 [ 0.1  0.1]]
prev_mem

In [105]:
import math
import numpy as np 
import tensorflow as tf


# Reset graph - just in case.
tf.reset_default_graph()

BATCH_SIZE = 2
ADDRESS_SIZE = 1
CONTENT_SIZE = 2
SLOT_SIZE = ADDRESS_SIZE + CONTENT_SIZE
NUMBER_OF_SLOTS=4

def build_memory_preservation(write_weights_BxN_, erase_vector_BxC_, prev_memory_BxSxN_):
    """Computes how much memory will be preserved using weights and erase vector as params.
    Batched version, i.e. all computations are computed without iteration through the batch samples.
    Args:
        write_weights_BxN: a 2-D Tensor [BATCH_SIZE x NUMBER_OF_SLOTS] 
        erase_vector_BxC: a 2-D Tensor [BATCH_SIZE x CONTENT_SIZE]
        prev_memory_BxSxN: a 3-D !! Tensor [BATCH_SIZE x SLOT_SIZE x NUMBER_OF_SLOTS]
    """
    with tf.name_scope("memory_preservation"):
        
        # Expand dimensions of weights and erase vectors to 3D.
        write_weights_Bx1xN = tf.expand_dims(write_weights_BxN_, axis=1)
        print("write_weights_Bx1xN=",write_weights_Bx1xN)
        erase_BxCx1 = tf.expand_dims(erase_vector_BxC_, axis=2)
        print("erase_BxCx1=",erase_BxCx1)
        
        # Calculate the erase content mask.
        erase_content_mask_BxCxN = tf.matmul(erase_BxCx1, write_weights_Bx1xN)
        print("erase_content_mask_BxCxN=",erase_content_mask_BxCxN)
        
        # Calculate the preserved mask.
        preserved_content_mask_BxCxN = tf.ones_like(erase_content_mask_BxCxN) - erase_content_mask_BxCxN
        print(preserved_content_mask_BxCxN)
        
        # Create the preserved address mask.
        preserved_address_mask_BxAxN = tf.ones([int(prev_memory_BxSxN_.get_shape()[0]), ADDRESS_SIZE, NUMBER_OF_SLOTS], tf.float32)
        print("preserved_address_mask_BxAxN=",preserved_address_mask_BxAxN)

        # Concatenate the latter two.
        preserved_memory_mask_BxSxN = tf.concat(
            [preserved_address_mask_BxAxN, preserved_content_mask_BxCxN],
            axis=1)
        print("preserved_memory_mask_BxSxN=",preserved_memory_mask_BxSxN)

        # Finally, calculate the preserved memory part.
        preserved_memory_BxSxN = tf.multiply(preserved_memory_mask_BxSxN, prev_memory_BxSxN_)
        print("preserved_memory_BxSxN=",preserved_memory_BxSxN)
        
        return preserved_memory_BxSxN, preserved_memory_mask_BxSxN


# Write weights: [2 samples x 4 slots]
write_weights = tf.constant([[1.0, 0.5, 0.0, 1.0],[0.2, 0.7, 0.1, 0.0]], dtype=tf.float32)
print("write_weights=",write_weights)
# erase_vector: [2 samples x 2 content_size]
erase_vector= tf.constant([[1.0, 1.0],[0.1, 0.1]], dtype=tf.float32)
print("erase_vector=",erase_vector)

# Memory: [2 samples x 4 slots x 3 slot_size]
memory_0 = tf.transpose(tf.constant([[0.2, 0.3, 0.4],[0.2, 0.3, 0.4],[0.3, 0.3, 0.3],[0.1, 0.2, 0.7]], dtype=tf.float32))
memory_1 = tf.transpose(tf.constant([[0.1, 0.2, 0.7],[0.0, 0.6, 0.8],[0.9, 0.0, 0.1],[0.8, 0.1, 0.1]], dtype=tf.float32))
memory = tf.stack([memory_0, memory_1])
print("memory=",memory)

pres_mem, pres_mem_mask = build_memory_preservation(write_weights, erase_vector, memory)

# Finally - initialize all variables.
initialize_model = tf.global_variables_initializer()    
    
# Execute graph.
sess=tf.InteractiveSession()
# Initialize.
sess.run(initialize_model)
write_weights_BxN, erase_vector_BxC, prev_memory_BxSxN, preserved_memory_BxSxN, preserved_memory_mask_BxSxN = \
    sess.run([write_weights, erase_vector, memory, pres_mem, pres_mem_mask])
print("write_weights_BxN =\n",write_weights_BxN)
print("erase_vector_BxC =\n",erase_vector_BxC)
print("prev_memory_BxSxN =\n",prev_memory_BxSxN)
print("preserved_memory_mask_BxSxN =\n",preserved_memory_mask_BxSxN)
print("preserved_memory_BxSxN =\n",preserved_memory_BxSxN)

# Close session.
sess.close()


write_weights= Tensor("Const:0", shape=(2, 4), dtype=float32)
erase_vector= Tensor("Const_1:0", shape=(2, 2), dtype=float32)
memory= Tensor("stack:0", shape=(2, 3, 4), dtype=float32)
write_weights_Bx1xN= Tensor("memory_preservation/ExpandDims:0", shape=(2, 1, 4), dtype=float32)
erase_BxCx1= Tensor("memory_preservation/ExpandDims_1:0", shape=(2, 2, 1), dtype=float32)
erase_content_mask_BxCxN= Tensor("memory_preservation/MatMul:0", shape=(2, 2, 4), dtype=float32)
Tensor("memory_preservation/sub:0", shape=(2, 2, 4), dtype=float32)
preserved_address_mask_BxAxN= Tensor("memory_preservation/ones:0", shape=(2, 1, 4), dtype=float32)
preserved_memory_mask_BxSxN= Tensor("memory_preservation/concat:0", shape=(2, 3, 4), dtype=float32)
preserved_memory_BxSxN= Tensor("memory_preservation/Mul:0", shape=(2, 3, 4), dtype=float32)
write_weights_BxN =
 [[ 1.          0.5         0.          1.        ]
 [ 0.2         0.69999999  0.1         0.        ]]
erase_vector_BxC =
 [[ 1.   1. ]
 [ 0.1  0.1]]
prev

## Memory update

In [61]:
import math
import numpy as np 
import tensorflow as tf


# Reset graph - just in case.
tf.reset_default_graph()

BATCH_SIZE = 2
ADDRESS_SIZE = 1
CONTENT_SIZE = 2
SLOT_SIZE = ADDRESS_SIZE + CONTENT_SIZE
NUMBER_OF_SLOTS=4

def build_memory_update(write_weights_BxN_, add_vector_BxC_):
    """Computes the update that will be added to the memory.
    Assumes that memory is a 3D tensor  [BATCH_SIZE x SLOT_SIZE x NUMBER_OF_SLOTS]
    Batched version, i.e. all computations are computed without iteration through the batch samples.
    Args:
        write_weights_BxN: a 2-D Tensor [BATCH_SIZE x NUMBER_OF_SLOTS] 
        add_vector_BxC_: a 2-D Tensor [BATCH_SIZE x CONTENT_SIZE]
    """
    with tf.name_scope("memory_update"):
        
        # Expand dimensions of weights and erase vectors to 3D.
        write_weights_Bx1xN = tf.expand_dims(write_weights_BxN_, axis=1)
        print("write_weights_Bx1xN=",write_weights_Bx1xN)
        add_vector_BxCx1 = tf.expand_dims(add_vector_BxC_, axis=2)
        print("add_vector_BxCx1=",add_vector_BxCx1)
        
        # Calculate the content update.
        content_update_BxCxN = tf.matmul(add_vector_BxCx1, write_weights_Bx1xN)
        print("content_update_BxCxN=",content_update_BxCxN)
        
        # Create the addres part - all zeros, so it won't change.        
        address_update_BxAxN = tf.zeros([int(write_weights_Bx1xN.get_shape()[0]), ADDRESS_SIZE, NUMBER_OF_SLOTS], tf.float32)
        print("address_update_BxAxN=",address_update_BxAxN)

        # Concatenate the latter two.
        memory_update_BxSxN = tf.concat(
            [address_update_BxAxN, content_update_BxCxN],
            axis=1)
        print("memory_update_BxSxN=",memory_update_BxSxN)

        return memory_update_BxSxN


# Write weights: [2 samples x 4 slots]
write_weights = tf.constant([[1.0, 0.5, 0.0, 1.0],[0.2, 0.7, 0.1, 0.0]], dtype=tf.float32)
print("write_weights=",write_weights)
# erase_vector: [2 samples x 2 content_size]
add_vector= tf.constant([[1.0, 1.0],[0.1, 0.1]], dtype=tf.float32)
print("add_vector=",add_vector)

# Memory: [2 samples x 4 slots x 3 slot_size]
memory_0 = tf.transpose(tf.constant([[0.2, 0.3, 0.4],[0.2, 0.3, 0.4],[0.3, 0.3, 0.3],[0.1, 0.2, 0.7]], dtype=tf.float32))
memory_1 = tf.transpose(tf.constant([[0.1, 0.2, 0.7],[0.0, 0.6, 0.8],[0.9, 0.0, 0.1],[0.8, 0.1, 0.1]], dtype=tf.float32))
memory = tf.stack([memory_0, memory_1])
print("memory=",memory)

mem_update = build_memory_update(write_weights, add_vector)

# Finally - initialize all variables.
initialize_model = tf.global_variables_initializer()    
    
# Execute graph.
sess=tf.InteractiveSession()
# Initialize.
sess.run(initialize_model)
write_weights_BxN, add_vector_BxC, prev_memory_BxSxN, memory_update_BxSxN = \
    sess.run([write_weights, add_vector, memory, mem_update])
print("write_weights_BxN =\n",write_weights_BxN)
print("add_vector_BxC =\n",add_vector_BxC)
#print("prev_memory_BxSxN =\n",prev_memory_BxSxN)
print("memory_update_BxSxN =\n",memory_update_BxSxN)

# Close session.
sess.close()


write_weights= Tensor("Const:0", shape=(2, 4), dtype=float32)
add_vector= Tensor("Const_1:0", shape=(2, 2), dtype=float32)
memory= Tensor("stack:0", shape=(2, 3, 4), dtype=float32)
write_weights_Bx1xN= Tensor("content_preservation/ExpandDims:0", shape=(2, 1, 4), dtype=float32)
add_vector_BxCx1= Tensor("content_preservation/ExpandDims_1:0", shape=(2, 2, 1), dtype=float32)
content_update_BxCxN= Tensor("content_preservation/MatMul:0", shape=(2, 2, 4), dtype=float32)
address_update_BxAxN= Tensor("content_preservation/zeros:0", shape=(2, 1, 4), dtype=float32)
memory_update_BxSxN= Tensor("content_preservation/concat:0", shape=(2, 3, 4), dtype=float32)
write_weights_BxN =
 [[ 1.          0.5         0.          1.        ]
 [ 0.2         0.69999999  0.1         0.        ]]
add_vector_BxC =
 [[ 1.   1. ]
 [ 0.1  0.1]]
memory_update_BxSxN =
 [[[ 0.    0.    0.    0.  ]
  [ 1.    0.5   0.    1.  ]
  [ 1.    0.5   0.    1.  ]]

 [[ 0.    0.    0.    0.  ]
  [ 0.02  0.07  0.01  0.  ]
  [ 0.02  0.

In [89]:
import math
import numpy as np 
import tensorflow as tf


# Reset graph - just in case.
tf.reset_default_graph()

BATCH_SIZE = 2
ADDRESS_SIZE = 1
CONTENT_SIZE = 2
SLOT_SIZE = ADDRESS_SIZE + CONTENT_SIZE
NUMBER_OF_SLOTS=4

def build_memory_output(read_weights_BxN_, prev_memory_BxSxN_):
    """Creates ops computing the memory output.
    Assumes that memory is a 3D tensor  [BATCH_SIZE x SLOT_SIZE x NUMBER_OF_SLOTS]
    Batched version, i.e. all computations are computed without iteration through the batch samples.
    Args:
        read_weights_BxN: a 2-D Tensor [BATCH_SIZE x NUMBER_OF_SLOTS] 
        prev_memory_BxSxN_: a 3-D !! Tensor [BATCH_SIZE x SLOT_SIZE x NUMBER_OF_SLOTS]
    """
    with tf.name_scope("memory_output"):
        
        # Expand dimensions of weights to 3D  [BATCH_SIZE x 1 x NUMBER_OF_SLOTS]
        read_weights_Bx1xN = tf.expand_dims(read_weights_BxN_, axis=1)
        print("read_weights_Bx1xN=",read_weights_Bx1xN)
        
        # Get the content  [BATCH_SIZE x NUMBER_OF_SLOTS x CONTENT_SIZE]
        prev_content_BxCxN = tf.slice (prev_memory_BxSxN_, [0, ADDRESS_SIZE, 0], [-1, CONTENT_SIZE, NUMBER_OF_SLOTS])
        prev_content_BxNxC = tf.transpose(prev_content_BxCxN, perm=[0, 2, 1])
        print("prev_content_BxNxC=",prev_content_BxNxC)
        
        # Calculate output [BATCH_SIZE x 1 x CONTENT_SIZE]
        output_Bx1xC = tf.matmul(read_weights_Bx1xN, prev_content_BxNxC, name="output_Bx1xC")
        print("output_Bx1xC=",output_Bx1xC)
        
        # Squeeze the output to [BATCH_SIZE x CONTENT_SIZE] 
        output_BxC = tf.reshape(output_Bx1xC, [-1, CONTENT_SIZE], name="output_BxC")
        print("output_BxC=",output_BxC)

        return output_BxC


# read_weights weights: [2 samples x 4 slots]
read_weights = tf.constant([[0.1, 0.01, 0.001, 0.0001],[0.1, 0.1, 0.1, 0.1]], dtype=tf.float32)
print("read_weights=",read_weights)


# Memory: [2 samples x 4 slots x 3 slot_size]
memory_0 = tf.transpose(tf.constant([[0.2, 0.3, 0.4],[0.2, 0.3, 0.4],[0.3, 0.3, 0.3],[0.1, 0.2, 0.7]], dtype=tf.float32))
memory_1 = tf.transpose(tf.constant([[0.1, 0.2, 0.7],[0.0, 0.6, 0.8],[0.9, 0.0, 0.1],[0.8, 0.1, 0.1]], dtype=tf.float32))
memory = tf.stack([memory_0, memory_1])
print("memory=",memory)

mem_output = build_memory_output(read_weights, memory)

# Finally - initialize all variables.
initialize_model = tf.global_variables_initializer()    
    
# Execute graph.
sess=tf.InteractiveSession()
# Initialize.
sess.run(initialize_model)
read_weights_BxN, prev_memory_BxSxN, output_BxC = \
    sess.run([read_weights, memory, mem_output])
print("read_weights_BxN =\n",read_weights_BxN)
print("prev_memory_BxSxN =\n",prev_memory_BxSxN)
print("output_BxC =\n",output_BxC)

# Close session.
sess.close()


read_weights= Tensor("Const:0", shape=(2, 4), dtype=float32)
memory= Tensor("stack:0", shape=(2, 3, 4), dtype=float32)
read_weights_Bx1xN= Tensor("memory_output/ExpandDims:0", shape=(2, 1, 4), dtype=float32)
prev_content_BxNxC= Tensor("memory_output/transpose:0", shape=(2, 4, 2), dtype=float32)
output_Bx1xC= Tensor("memory_output/output_Bx1xC:0", shape=(2, 1, 2), dtype=float32)
output_BxC= Tensor("memory_output/output_BxC:0", shape=(2, 2), dtype=float32)
read_weights_BxN =
 [[  1.00000001e-01   9.99999978e-03   1.00000005e-03   9.99999975e-05]
 [  1.00000001e-01   1.00000001e-01   1.00000001e-01   1.00000001e-01]]
prev_memory_BxSxN =
 [[[ 0.2         0.2         0.30000001  0.1       ]
  [ 0.30000001  0.30000001  0.30000001  0.2       ]
  [ 0.40000001  0.40000001  0.30000001  0.69999999]]

 [[ 0.1         0.          0.89999998  0.80000001]
  [ 0.2         0.60000002  0.          0.1       ]
  [ 0.69999999  0.80000001  0.1         0.1       ]]]
output_BxC =
 [[ 0.03332     0.04437   ]


In [3]:
import numpy as np 
import tensorflow as tf

# Reset graph - just in case.
tf.reset_default_graph()


# 3-D tensor `a`
a = tf.constant(np.arange(1, 24, dtype=np.int32),
                shape=[2, 4, 3])
#=> [[[ 1.  2.  3.]
#    [ 4.  5.  6.]],
#    [[ 7.  8.  9.]
#    [10. 11. 12.]]]

# 3-D tensor `b`
b = tf.constant(np.arange(1, 24, dtype=np.int32),
                shape=[2, 3, 4])
#=> [[[13. 14.]
#    [15. 16.]
#    [17. 18.]],
#   [[19. 20.]
#    [21. 22.]
#    [23. 24.]]]
c = tf.matmul(a, b)
print(c)
#=> [[[ 94 100]
#    [229 244]],
#    [[508 532]
#    [697 730]]]

# Execute graph.
sess=tf.InteractiveSession()
# Initialize.
#sess.run(initialize_model)
print("a =\n",sess.run([a]))
print("b =\n",sess.run([b]))
print("c =\n",sess.run([c]))
sess.close()


Tensor("MatMul:0", shape=(2, 4, 4), dtype=int32)
a =
 [array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12]],

       [[13, 14, 15],
        [16, 17, 18],
        [19, 20, 21],
        [22, 23, 23]]], dtype=int32)]
b =
 [array([[[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]],

       [[13, 14, 15, 16],
        [17, 18, 19, 20],
        [21, 22, 23, 23]]], dtype=int32)]
c =
 [array([[[  38,   44,   50,   56],
        [  83,   98,  113,  128],
        [ 128,  152,  176,  200],
        [ 173,  206,  239,  272]],

       [[ 722,  764,  806,  833],
        [ 875,  926,  977, 1010],
        [1028, 1088, 1148, 1187],
        [1160, 1228, 1296, 1341]]], dtype=int32)]


In [103]:
import tensorflow as tf
import numpy as np

x = tf.placeholder(tf.int32, shape=[None, 2])
data = np.random.randint(10, size=(5,2))

parts = list(range(len(data)))
print("parts=",parts)
out = tf.dynamic_partition(x, parts, 20)

sess = tf.Session()
print ('out tensors:\n', out)
print
print ('input data:\n', data)
print
print ('sess.run result:\n', sess.run(out, {x: data}))

parts= [0, 1, 2, 3, 4]
out tensors:
 [<tf.Tensor 'DynamicPartition_12:0' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:1' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:2' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:3' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:4' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:5' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:6' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:7' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:8' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:9' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:10' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:11' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:12' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:13' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartition_12:14' shape=(?, 2) dtype=int32>, <tf.Tensor 'DynamicPartitio