In [1]:
import numpy as np
import tensorflow as tf
import pickle

In [2]:
with open("sx_train.txt", "rb") as fp:
    X_train = pickle.load(fp)
    
with open("sy_train.txt", "rb") as fp:
    y_train = pickle.load(fp)
    
with open("s_word_embedding_matrix.txt", "rb") as fp:
    word_embedding_matrix = pickle.load(fp)
    
with open("s_word_to_index.txt", "rb") as fp:
    word_to_index = pickle.load(fp)
    
with open("s_index_to_word.txt", "rb") as fp:
    index_to_word = pickle.load(fp)

In [3]:
len(X_train[0])

12

In [4]:
def model_inputs():
    
    input_data = tf.placeholder(tf.int32, [None, None], name='input')
    lr = tf.placeholder(tf.float32, name='learning_rate')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    text_length = tf.placeholder(tf.int32, (None,), name='text_length')

    return input_data, lr, keep_prob, text_length

In [5]:
def encoding_layer(rnn_size, sequence_length, num_layers, rnn_inputs, keep_prob):
    
    for layer in range(num_layers):
        with tf.variable_scope('encoder_{}'.format(layer)):
            cell_fw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.random_uniform_initializer(0.1, 1))
            cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob = keep_prob)

            cell_bw = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.random_uniform_initializer(0.1, 1))
            cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob = keep_prob)

            enc_output, enc_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, 
                                                                    cell_bw, 
                                                                    rnn_inputs,
                                                                    sequence_length,
                                                                    dtype=tf.float32)
    enc_output = tf.concat(enc_output,2)
    enc_output = enc_output[:,-1,:]
    return enc_output, enc_state

In [6]:
def seq2seq_model(input_data, keep_prob, text_length, 
                  rnn_size, num_layers):
    
    embeddings = word_embedding_matrix
    
    enc_embed_input = tf.nn.embedding_lookup(embeddings, input_data)
    enc_output, enc_state = encoding_layer(rnn_size, text_length, num_layers, enc_embed_input, keep_prob)
    
    return enc_output,enc_state

In [7]:
def pad_sentence_batch(sentence_batch):
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [word_to_index['<PAD>']] * (max_sentence - len(sentence)) for sentence in sentence_batch]

In [8]:
def get_batches(texts):

    pad_texts_batch = np.array(pad_sentence_batch(texts))

    pad_texts_lengths = []
    for text in pad_texts_batch:
        pad_texts_lengths.append(len(text))

    return pad_texts_batch, pad_texts_lengths

In [9]:
rnn_size = 200
num_layers = 2
learning_rate = 0.001
keep_probability = 0.5

In [10]:
input_data, lr, keep_prob, text_length = model_inputs()

encoding_output, encoding_stat = seq2seq_model(tf.reverse(input_data,[-1]),
                                                          keep_prob,   
                                                          text_length,
                                                          rnn_size, 
                                                          num_layers, 
                                                          )
enc_ = tf.reduce_mean(encoding_output,axis=0)

In [11]:
enc_

<tf.Tensor 'Mean:0' shape=(400,) dtype=float32>

In [12]:
y_train = np.asarray(y_train).reshape((len(y_train),1))

In [13]:
train = tf.placeholder(tf.float32, shape=(400,None), name='train')
output = tf.placeholder(tf.float32, shape=(1,None), name='output')

# First layer
hid1_size = 128
w1 = tf.Variable(tf.random_normal([hid1_size, 400], stddev=0.01), name='w1')
b1 = tf.Variable(tf.constant(0.1, shape=(hid1_size, 1)), name='b1')
y1 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(w1, tf.reshape(enc_,(400,1))), b1)), keep_prob=0.5)

# Second layer
hid2_size = 256
w2 = tf.Variable(tf.random_normal([hid2_size, hid1_size], stddev=0.01), name='w2')
b2 = tf.Variable(tf.constant(0.1, shape=(hid2_size, 1)), name='b2')
y2 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(w2, y1), b2)), keep_prob=0.5)

# Output layer
wo = tf.Variable(tf.random_normal([1, hid2_size], stddev=0.01), name='wo')
bo = tf.Variable(tf.random_normal([1, 1]), name='bo')
yo = tf.add(tf.matmul(wo, y2), bo)

In [14]:
pred = tf.sigmoid(yo)
lr = tf.placeholder(tf.float32, name='learning_rate')
loss = tf.losses.mean_squared_error(output,pred)
optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)

In [22]:
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)

In [23]:
learning_rate = 0.0001
for epoch in range(10):
    avg_cost = 0.0

    for i in range(2):
        texts_batch, texts_lengths = get_batches(X_train[i])
        _, c = sess.run([optimizer, loss], feed_dict={output: y_train[i].reshape((1,1)),
                                                      input_data: texts_batch,
                                                      text_length: texts_lengths,
                                                      lr:learning_rate,
                                                      keep_prob: keep_probability})
        avg_cost += c
        #avg_cost /= len(X_train[i])

    print("Epoch: %d    Train Cost: %0.4f"%(epoch, avg_cost))
sess.close()

Epoch: 0    Train Cost: nan
Epoch: 1    Train Cost: nan
Epoch: 2    Train Cost: nan
Epoch: 3    Train Cost: nan
Epoch: 4    Train Cost: nan
Epoch: 5    Train Cost: nan
Epoch: 6    Train Cost: nan
Epoch: 7    Train Cost: nan
Epoch: 8    Train Cost: nan
Epoch: 9    Train Cost: nan
