In [1]:
import tensorflow as tf
import numpy as np
import scipy.linalg as ln
from model.ntm_ops import *
from model.memory import *
%load_ext autoreload
%autoreload 2

In [2]:
sess = tf.InteractiveSession()

## Introduction
Neural Turing Machines combined the ability of Turing Machine and Neural Networks to infer simple algorithms. The controller (it's usually a LSTM) can be viewed as CPU and the external memory can be seen as RAM. 

A NTM has four components: Controller, read heads, write heads, and an external memory. 

High level overview:
1. Addressing: Addressing mechanism is used to produce the weightings of each head. There are two types of adrressing, content based and location based. At every time step, the controller outputs five elements to produce weightings of each head: key vector, key strength, interpolation gate, shift weighting, and a scalar that used to sharpen the weightings. 
2. Read: each read head has a weighting vector tells how much degree of information we read from on each memory location
3. Write: each write head has a weighting vector, an erase vector and an add vector. This is inspired by LSTM's forget gate and input gate. 

## Section 1 Hyper parameters

### 1.1 Memory matrix
Define two hyper parameters for the memory matrix: $N \times M$, where $N$ is the number of memory locations, $M$ is the vector size at each memory location

### 1.2 Controller dimension
Define the LSTM hidden state dimension h and stacked hidden layer number a. This is the same as tradition LSTM with the hidden state and cell state.

Define the output and input dimension, in NTM, it usually is how many bits per sequence. e.g. If one of the input sequence is [0, 1, 0, 1, 0, 1], then it should be 6.

### 1.3 The range of allowed location shift
Define the range of the allowed location shift in location based addressing (Convolutional shift), s. e.g. if s = 3, then allowed location shift will be [-1, 0, 1]

## Section 2 Memory Testing
At every time step the controller outputs weighting of each head and hidden states(including cell states in original LSTM).. The weighting is determined by addressing mechanism:
1. Content Addressing
2. Interpolation
3. Convolutional Shift
4. Sharpening

In [7]:
# define memory and give attributes values
B, N, M = 1, 3, 2
memory = Memory(batch_size=B, mem_dim=M, mem_size=N)
memory.memory = tf.constant(np.array([[
            [6, 8],
            [1, 2],
            [3, 3],
            
        ]]), dtype=tf.float32)
memory.read_weighting = tf.constant(np.array([
            [1, 0, 0]
        ]), dtype=tf.float32)
memory.write_weighting = tf.constant(np.array([
            [0, 1, 0]
        ]), dtype=tf.float32)
memory.read_vector = tf.constant(np.array([
            [1.0, 1.0]
        ]), dtype=tf.float32)

In [8]:
# define emitted vectors, these should be generated by the controller.
key_vector = tf.constant(np.array([[3, 4]]), dtype=tf.float32)
key_strength = tf.constant(np.array([[1]]), dtype=tf.float32)
interplotation = tf.constant(np.array([[0.9]]), dtype=tf.float32)
shifting = tf.constant([[0, 0, 1]], dtype=tf.float32)
sharpening = tf.constant([[2]], dtype=tf.float32)
add_vector = tf.constant([
        [0.5, 0.5]
    ])
erase_vector = tf.constant([
        [0.1, 0.5]
    ])

In [9]:
# memory write test
print "Previous memory\n", memory.memory.eval()
matrix = memory.write(memory.write_weighting, memory.memory, erase_vector, add_vector)
memory.memory = matrix
print "Current memory\n", memory.memory.eval()

Previous memory
[[[ 6.  8.]
  [ 1.  2.]
  [ 3.  3.]]]
Current memory
[[[ 6.          8.        ]
  [ 1.39999998  1.5       ]
  [ 3.          3.        ]]]


In [10]:
# update write weighting test
print "current write weighting", memory.write_weighting.eval()
memory.write_weighting = memory.update_weighting(key_vector, key_strength, interplotation, shifting, sharpening,
                                                 memory.write_weighting, memory.memory)
print "next write weighting", memory.write_weighting.eval()

current write weighting [[ 0.  1.  0.]]
[[ 1.          0.99423516  0.98994946]]
inter [[ 0.30158314  0.39984959  0.29856727]]
shifting [[ 0.29856727  0.30158314  0.39984959]]
next write weighting [[ 0.32519153  0.32578066  0.34902781]]


In [11]:
# memory read test
print "current read vector", memory.read_vector.eval()
memory.read_vector = memory.read(memory.read_weighting, memory.memory)
print "next read vector", memory.read_vector.eval()

current read vector [[ 1.  1.]]
next read vector [[ 6.  8.]]


In [12]:
# update read weighting test
print "current read weighting", memory.read_weighting.eval()
memory.read_weighting = memory.update_weighting(key_vector, key_strength, interplotation, shifting, sharpening,
                                                 memory.read_weighting, memory.memory)
print "next read weighting", memory.read_weighting.eval()

current read weighting [[ 1.  0.  0.]]
[[ 1.          0.99423516  0.98994946]]
inter [[ 0.40158316  0.29984957  0.29856727]]
shifting [[ 0.29856727  0.40158316  0.29984957]]
next read weighting [[ 0.32514414  0.34946215  0.32539377]]


In [13]:
a = 0.9 * tf.nn.softmax(1 - np.array([[0, 0.016, 0.001]])).eval() + (1-0.9)*np.array([[0, 1, 0]])

In [16]:
1 - np.array([[0, 0.01613, 0.010051]])

array([[ 1.      ,  0.98387 ,  0.989949]])

In [19]:
tf.nn.softmax(a).eval()

array([[ 0.35698247,  0.32146079,  0.32155674]])