In [836]:
import tensorflow as tf
import numpy as np
import data

In [837]:
vocab_size = 1500
d, id_to_token = data.get_data(vocab_size)
for k in id_to_token:
    if id_to_token[k] == 'eos':
        id_to_token[k] = '\n'
print(len(d))
x, y = d[10:20], d[11:21]
max_word_length = max([len(id_to_token[np.argmax(j)]) for j in d])
print(max_word_length)

1909
12063
15


In [838]:
c_to_i, embedding = data.get_char_embedding()

def word_generator(ix, num=10):
    return d[ix : ix + num]

def sequence_char_matrix(ix, num = 10):
    # returns char matrices of all words in a sequence starting from ix and of length num    
    words = [id_to_token[np.argmax(j)] for j in d[ix : ix + num]]
    chars = [embedding[[c_to_i[c] for c in w]] for w in words]
    to_pad = [max_word_length - t for t in map(lambda x: len(x), words)]
    pad = [np.zeros([tp, embedding.shape[0]]) for tp in to_pad]
    reshape = [1, max_word_length, embedding.shape[0], 1]
    padded = np.concatenate([np.reshape(np.r_[ch, pd], reshape) for ch, pd in zip(chars, pad)], axis=0)
    print(padded.shape)
    return padded

_ = sequence_char_matrix(3, 2)
_ = word_generator(0, 2)

(2, 15, 87, 1)


In [839]:
# CNN hyperparameters
input_height = max_word_length
input_width = embedding.shape[0]
batch_size = 5
print(input_height, input_width)
filter_heights = [2, 3, 4]
feature_maps = [3, 4, 5]

15 87


In [840]:
def inference(inp):
    with tf.variable_scope('conv1') as scope:        
        weight = tf.get_variable('weights', [filter_heights[0], input_width, 1, feature_maps[0]], initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05))
        conv1 = tf.nn.tanh(tf.nn.conv2d(inp, weight, strides=[1, 1, 1, 1], padding='VALID'))
    pool1 = tf.squeeze(tf.nn.max_pool(conv1, ksize=[1, conv1.get_shape()[1], 1, 1], strides=[1, 1, 1, 1], padding='VALID'))
    
    with tf.variable_scope('conv2') as scope:        
        weight = tf.get_variable('weights', [filter_heights[1], input_width, 1, feature_maps[1]], initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05))
        conv2 = tf.nn.tanh(tf.nn.conv2d(inp, weight, strides=[1, 1, 1, 1], padding='VALID'))
    pool2 = tf.squeeze(tf.nn.max_pool(conv2, ksize=[1, conv2.get_shape()[1], 1, 1], strides=[1, 1, 1, 1], padding='VALID'))
    
    with tf.variable_scope('conv3') as scope:        
        weight = tf.get_variable('weights', [filter_heights[2], input_width, 1, feature_maps[2]], initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05))
        conv3 = tf.nn.tanh(tf.nn.conv2d(inp, weight, strides=[1, 1, 1, 1], padding='VALID'))
    pool3 = tf.squeeze(tf.nn.max_pool(conv3, ksize=[1, conv3.get_shape()[1], 1, 1], strides=[1, 1, 1, 1], padding='VALID'))
    
    pool_total = tf.concat(0, [pool1, pool2, pool3], name='total_pool')
    return pool_total

In [841]:
tf.reset_default_graph()
input_ = tf.placeholder(shape=[batch_size, max_word_length, embedding.shape[0], 1], dtype=tf.float32, name = 'cnn_in')
words = tf.split(0, batch_size, input_)
cnn_outputs = []
with tf.variable_scope("CNN") as scope:
    for idx, word in enumerate(words):
#         inp = tf.Variable(word, trainable=False, dtype=tf.float32)
        if idx != 0:
            scope.reuse_variables()
        p = inference(word)
        cnn_outputs.append(p)

In [842]:
# RNN hyperparameters

epochs = 5000
hidden_layer = 128
input_size = sum(feature_maps)
output_size = vocab_size
learning_rate = 0.1
num_steps = 15
inp_out_size

12

In [843]:
Wxh = tf.Variable(tf.random_uniform([input_size, hidden_layer], minval=-0.05, maxval=0.05))
Whh = tf.Variable(tf.random_uniform([hidden_layer, hidden_layer], minval=-0.05, maxval=0.05))
Why = tf.Variable(tf.random_uniform([hidden_layer, output_size], minval=-0.05, maxval=0.05))
bh = tf.Variable(tf.zeros([hidden_layer]), dtype=tf.float32)
by = tf.Variable(tf.zeros([output_size]), dtype=tf.float32)

In [844]:
def recurrence(prev, inp):
    i = tf.reshape(inp, shape=[1, -1])
    p = tf.reshape(prev, shape=[1, -1])
    h = tf.tanh((tf.matmul(p, Whh))  + tf.matmul(i, Wxh) + bh)
    h = tf.reshape(h, [hidden_layer])
    return h

In [847]:
a = tf.identity(cnn_outputs, name='lstm_in')
# b = tf.identity(cnn_outputs, name='lstm_out')
b = tf.placeholder(shape=[batch_size, vocab_size], dtype=tf.float32, name='targets')

initial = tf.placeholder(shape=[hidden_layer], dtype=tf.float32)
states = tf.scan(recurrence, a, initializer=initial)
outputs = tf.nn.softmax(tf.matmul(states, Why) + by)
loss = -tf.reduce_sum(b * tf.log(outputs))
# loss = tf.sqrt(tf.reduce_sum(tf.square(tf.sub(outputs, b))))
optimizer = tf.train.AdagradOptimizer(learning_rate)

# clipping gradients between -1 and 1.
grad_var_pairs = optimizer.compute_gradients(loss, tf.trainable_variables())
# for g,v in grad_var_pairs:
#     print(g, v.name)
clipped_grad_var_pairs = [(tf.clip_by_value(gv[0], -1, 1), gv[1]) for gv in grad_var_pairs]
optimize_op = optimizer.apply_gradients(clipped_grad_var_pairs)

In [848]:
sess = tf.Session()
ix = 0
feed = {input_: sequence_char_matrix(ix, batch_size), initial: np.zeros(hidden_layer), b: word_generator(ix, batch_size)}
sess.run(tf.initialize_all_variables())
print(sess.run(outputs, feed_dict=feed).shape)
print(sess.run(loss, feed_dict=feed))

(5, 15, 87, 1)
(5, 1500)
36.5687
