In [4]:
import tensorflow as tf
import numpy as np
import scipy.linalg as ln
from model.ntm_ops import *
from model.ntm_cell import *
%load_ext autoreload
%autoreload 2

In [5]:
sess = tf.InteractiveSession()

In [6]:
class Task(object):

    def __init__(self, max_iter=None, batch_size=1):
        self.max_iter = max_iter
        self.batch_size = batch_size
        self.num_iter = 0

    def __iter__(self):
        return self

    def __next__(self):
        return self.next()

    def next(self):
        if (self.max_iter is None) or (self.num_iter < self.max_iter):
            self.num_iter += 1
            params = self.sample_params()
            return (self.num_iter - 1), self.sample(**params)
        else:
            raise StopIteration()

    def sample_params(self):
        raise NotImplementedError()

    def sample(self):
        raise NotImplementedError()
        
class CopyTask(Task):

    def __init__(self, size, max_length, min_length=1, max_iter=None, \
        batch_size=1, end_marker=False):
        super(CopyTask, self).__init__(max_iter=max_iter, batch_size=batch_size)
        self.size = size
        self.min_length = min_length
        self.max_length = max_length
        self.end_marker = end_marker
        print end_marker

    def sample_params(self, length=None):
        if length is None:
            length = self.max_length
        return {'length': length}

    def sample(self, length):
        sequence = np.random.binomial(1, 0.5, (self.batch_size, length, self.size))
        example_input = np.zeros((self.batch_size, 2 * length + 1 + self.end_marker, \
            self.size + 1))
        example_output = np.zeros((self.batch_size, 2 * length + 1 + self.end_marker, \
            self.size + 1))

        example_input[:, :length, :self.size] = sequence
        example_input[:, length, -1] = 1
        example_output[:, length + 1:2 * length + 1, :self.size] = sequence
        if self.end_marker:
            example_output[:, -1, -1] = 1

        return example_input, example_output

In [7]:
seq_len = 2
input_dim = 1
batch_size = 1
task = CopyTask(input_dim, seq_len, max_iter=10000, end_marker=True)

True


In [8]:
from model.ntm import *

In [9]:
cell = NTMCell(20, 128, 20, shift_weighting=3)
ntm = NTM(cell, 6, input_dim, batch_size, 0.001)

Building Graph 1/6
Building Graph 2/6
Building Graph 3/6
Building Graph 4/6
Building Graph 5/6
Building Graph 6/6
Building loss 0/6
Building loss 1/6
Building loss 2/6
Building loss 3/6
Building loss 4/6
Building loss 5/6


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [10]:
sess.run(tf.initialize_all_variables())
loss = ntm.loss
h, c, w_w, w_r, m = ntm.final_state
for i, (example_input, example_output) in task:
    _loss, _, o, _m, _rw, _ww = sess.run([loss, ntm.opt_op, ntm.outputs, m, w_r, 
                               w_w]
                              , feed_dict={ntm.inputs:example_input, ntm.targets: example_output})
    if i % 10 == 0:
        print _loss
        # print o
        # print _m
        print _rw
        # print _ww
        #print _r

8.63673
[[ 0.00781415  0.00779088  0.0077693   0.00774188  0.00773185  0.00773433
   0.0077456   0.00775054  0.007756    0.00776047  0.00778584  0.00780329
   0.00783407  0.0078394   0.00785358  0.00785361  0.00785571  0.00785132
   0.0078373   0.0078271   0.00781053  0.00781507  0.00781695  0.00782701
   0.0078165   0.00780348  0.00777774  0.00776394  0.00774907  0.00775764
   0.00777026  0.00780333  0.00782752  0.00785018  0.00786911  0.00788496
   0.00789514  0.00788423  0.00785806  0.00783128  0.00781864  0.00782866
   0.00785246  0.00788562  0.0079193   0.00794919  0.0079628   0.00796299
   0.00794261  0.00791366  0.00788057  0.00784604  0.00781922  0.00778832
   0.00776485  0.00774513  0.00774212  0.00775232  0.00777675  0.00782088
   0.00785448  0.00789092  0.00789494  0.00789526  0.0078903   0.00790458
   0.00792229  0.00794312  0.00795006  0.00794142  0.00792364  0.00790081
   0.007883    0.00787001  0.00784866  0.00782512  0.00779058  0.00776623
   0.0077554   0.0077688   0.0

KeyboardInterrupt: 

In [9]:
for v in tf.trainable_variables():
    print v.name
    print v.eval()

ntm/w:0
[[ 0.29988933 -0.50125867]
 [ 0.05929485 -0.09765271]
 [-1.96322942  0.66455507]
 [ 0.78633398  0.63134253]
 [ 0.34694678  0.83837974]
 [-0.31785628  0.28706414]
 [ 0.13405693  0.06769112]
 [ 1.31629992 -0.70072579]
 [ 0.61418337 -0.05263442]
 [ 0.56044382  0.71812528]
 [ 0.50941122 -0.81322646]
 [ 0.57279843 -0.06291273]
 [ 1.59072554 -1.66310453]
 [-0.08642627  1.47039676]
 [ 1.78211367 -1.08129787]
 [ 0.31895331  0.70946717]
 [-0.37986392 -0.24697068]
 [-0.15629934 -0.69628811]
 [-2.01304817  0.52255833]
 [-0.00629619 -1.04649186]]
ntm/b:0
[ nan  nan]
ntm/init_mem/mem/Matrix:0
[[-0.04101996 -0.04307508 -0.00423902 ..., -0.02433592 -0.03070864
  -0.02337485]
 [-0.03357133 -0.04444226 -0.02112933 ..., -0.00141062 -0.01142912
   0.00782645]
 [-0.0419778  -0.02508279 -0.01272599 ..., -0.01083992 -0.05210544
  -0.02469731]
 ..., 
 [-0.00176581 -0.01148703  0.00244977 ...,  0.00169099 -0.04393398
  -0.03696742]
 [-0.0054918  -0.00384431 -0.01022866 ...,  0.00834487 -0.02999016
   

In [None]:
def _split_seq(x):
        """
        split x into a seq_len list with each element a 2D Tensor (batch_size, input_dim)

        Parameters:
        -----------
        x: 3D Tensor with shape (batch_size, seq_len, input_dim)

        Returns: A seq_len list with each element a 2D Tenosr
        """
        x = tf.transpose(x, perm=(1, 0, 2))
        x = tf.reshape(x, shape=(-1, 1 + 1))
        x = tf.split(num_split=6, split_dim=0, value=x)
        return x

In [4]:
inputs = tf.placeholder(dtype=tf.float32, shape=(1, 6, 2))
lstm = tf.nn.rnn_cell.BasicLSTMCell(num_units=100,state_is_tuple=True)
outs, state = tf.nn.rnn(lstm, _split_seq(inputs), dtype=tf.float32)
with tf.variable_scope("linear"):
    w = tf.get_variable(name="w", initializer=tf.truncated_normal_initializer(), shape=(100, 2))
    b = tf.get_variable(name="b", initializer=tf.constant_initializer(0.01), shape=2)
outputs = []
for out in outs:
    output = tf.matmul(out, w) + b
    # output = tf.nn.sigmoid(output)
    outputs.append(output)


In [5]:
outputs = tf.transpose(tf.convert_to_tensor(outputs), (1, 0, 2))

In [6]:
# outputs_ = _split_seq(outputs)

In [7]:
targets = tf.placeholder(name="target", dtype=tf.float32, shape=(1, 6, 2))
# ts = _split_seq(targets)
# losses = []
# for o, t in zip(outputs_, ts):
losses = tf.nn.sigmoid_cross_entropy_with_logits(outputs, targets)
# losses.append(tf.reduce_mean(tf.reduce_sum(loss, reduction_indices=1)))
losses = tf.reduce_mean(tf.convert_to_tensor(losses))

In [8]:
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
# gvs = optimizer.compute_gradients(losses, var_list=tf.all_variables())
# capped_gvs = [(tf.clip_by_value(grad, -10., 10.), var) for grad, var in gvs]
# opt = optimizer.apply_gradients(capped_gvs)
opt = optimizer.minimize(losses)

In [9]:
sess.run(tf.initialize_all_variables())

In [13]:
for i, (example_input, example_output) in task:
    _loss, _, _os = sess.run([losses, opt, outputs], feed_dict={inputs:example_input, targets: example_output})
    if i %50 == 0:
        print _loss, _os
        print example_input
        print example_output

0.724335 [[[ 0.01        0.01      ]
  [-0.20125696  0.04063954]
  [-0.06382775  0.46444342]
  [-0.14738114  0.42166984]
  [-0.1963329   0.41911349]
  [-0.2071355   0.41543931]]]
[[[ 0.  0.]
  [ 1.  0.]
  [ 0.  1.]
  [ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]]]
[[[ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]
  [ 1.  0.]
  [ 0.  1.]]]
0.399231 [[[-0.31512165 -0.47804797]
  [-0.50893426 -0.80386281]
  [-0.58841705 -1.2850039 ]
  [-0.69602656 -1.08495808]
  [-1.00188255 -0.38976461]
  [-1.40300214  0.46197939]]]
[[[ 0.  0.]
  [ 0.  0.]
  [ 0.  1.]
  [ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]]]
[[[ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]
  [ 0.  1.]]]
0.0474438 [[[ -1.76192391  -1.83253896]
  [ -3.16580486  -5.84514046]
  [ -2.69372582 -12.17786789]
  [  2.05054331 -15.88512135]
  [  3.46395683  -8.05510426]
  [ -7.45780659  10.50111294]]]
[[[ 1.  0.]
  [ 1.  0.]
  [ 0.  1.]
  [ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]]]
[[[ 0.  0.]
  [ 0.  0.]
  [ 0.  0.]
  [ 1.  0.]
  [ 1.  0.]
  [ 0.  1.]]]
0.0213

KeyboardInterrupt: 

In [11]:
for v in tf.trainable_variables():
    print v.name

ntm/w:0
ntm/b:0
ntm/ntm_cell/write/erase/Matrix:0
ntm/ntm_cell/write/add/Matrix:0
ntm/ntm_cell/lstm/linear/Matrix:0
ntm/ntm_cell/lstm/linear/bias:0
ntm/ntm_cell/addressing/writing/key_vector/Matrix:0
ntm/ntm_cell/addressing/writing/key_strength/Matrix:0
ntm/ntm_cell/addressing/writing/interpolation/Matrix:0
ntm/ntm_cell/addressing/writing/shifting/Matrix:0
ntm/ntm_cell/addressing/writing/sharpening/Matrix:0
ntm/ntm_cell/addressing/reading/key_vector/Matrix:0
ntm/ntm_cell/addressing/reading/key_strength/Matrix:0
ntm/ntm_cell/addressing/reading/interpolation/Matrix:0
ntm/ntm_cell/addressing/reading/shifting/Matrix:0
ntm/ntm_cell/addressing/reading/sharpening/Matrix:0


In [None]:
graph.get_tensor_by_name()