In [2]:
from general_tools.notebook.gpu_utils import setup_one_gpu
GPU = 3
setup_one_gpu(GPU)

Picking GPU 3


In [3]:
import tensorflow as tf
import numpy as np
from toy_seq_data import ToySequenceData

In [4]:
def deep_lstm(n_layers, n_hidden, dropout_prob=None):
    cells = []
    for _ in range(n_layers):
        cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden,  state_is_tuple=True)
#         if dropout_prob is not None:
#             cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=1.0 - dropout_prob)
        cells.append(cell)

    model = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True)
    return model

In [5]:
def length_of_sequence(sequence):
    '''Input: (Tensor) batch size x max length x features
     Returns: the length of each sequence in the batch.
     Precondition: Each sequence with smaller length that max length, is padded with zeros.'''
    
    used = tf.sign(tf.reduce_max(tf.abs(sequence), 2))
    length = tf.reduce_sum(used, 1)
    length = tf.cast(length, tf.int32)
    return length

In [6]:
def last_relevant_rnn_output(output, length):
    '''Returns for a batch of output tensors of a dynamic_rnn those corresponding to the last non-padded input/output.
    Notes:
    In numpy this would just be output[:, length - 1], but we need the indexing to be part of the compute graph.
    Works likes this: we flatten the output tensor to shape: frames in all examples x output size. 
    Then we construct an index into that by creating a tensor with the start indices for each example 
    tf.range(0, batch_size) * max_length and add the individual sequence lengths to it. 
    tf.gather() then performs the actual indexing. 
    '''
    batch_size = tf.shape(output)[0]
    max_length = tf.shape(output)[1]
    out_size = int(output.get_shape()[2])
    index = tf.range(0, batch_size) * max_length + (length - 1)
    flat = tf.reshape(output, [-1, out_size])
    relevant = tf.gather(flat, index)
    return relevant

In [7]:
n_hidden = 64
n_layers = 1
max_steps = 20
step_feat_size = 1

n_classes = 2
batch_size = 10

In [8]:
input_sequence = tf.placeholder(tf.float32, [None, max_steps, step_feat_size])
target = tf.placeholder(tf.float32, [None, n_classes])
dropout_prob = tf.placeholder(tf.float32)

deep_lstm_model = deep_lstm(n_layers, n_hidden, dropout_prob)

len_seq = length_of_sequence(input_sequence)

rnn_outputs, final_state = tf.nn.dynamic_rnn(deep_lstm_model, input_sequence, dtype=tf.float32,
                                             sequence_length=len_seq,
                                            )

# tf.nn.dynamic_rnn returns the output activations and last hidden state.
# The output will be of size batch_size x max_step x n_hidden, 
# but with the last being zero vectors for sequences shorter than the maximum length.

# The final_state contains the last state (batch_size x n_hidden) of each layer of the stacked-deep-rnn.

In [9]:
from tflearn.layers.core import fully_connected
from tf_lab.point_clouds.encoders_decoders import decoder_with_fc_only

In [8]:
# hidden_states = []
# for i in range(n_layers):
#     hidden_states.append(final_state[i].h)

# if len(hidden_states) > 1:
#     joint_last_state = tf.concat_v2(hidden_states, axis=1)
# else:
#     joint_last_state = hidden_states[0]

# logits = decoder_with_fc_only(joint_last_state, layer_sizes=[100, n_classes])

In [10]:
rnn_out = last_relevant_rnn_output(rnn_outputs, len_seq)
logits = fully_connected(rnn_out, n_classes)
# logits = decoder_with_fc_only(rnn_out, layer_sizes=[100, n_classes])

In [11]:
def cost(output, target):
    # Compute cross entropy for each frame.
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(output, target)
    return tf.reduce_mean(cross_entropy)

def optimizer(loss, learning_rate=0.003):
    return tf.train.AdamOptimizer(learning_rate).minimize(loss)

In [12]:
problem_loss = cost(logits, target)
problem_opt = optimizer(problem_loss)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [13]:
gpu_config = tf.ConfigProto()
gpu_config.gpu_options.allow_growth = True

In [14]:
prediction = tf.argmax(logits, axis=1)
target_ = tf.argmax(target, axis=1)
correct_pred = tf.equal(prediction, target_)
avg_accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [15]:
trainset = ToySequenceData(n_samples=1000, max_seq_len=max_steps)
testset = ToySequenceData(n_samples=500, max_seq_len=max_steps)

In [33]:
init = tf.global_variables_initializer()
display_step = 100

sess = tf.Session(config=gpu_config)
sess.run(init)    
for epoch in range(10):
    for _ in range(100):
        batch_x, batch_y, _ = trainset.next(batch_size)
        feed_dict = {input_sequence: batch_x, target: batch_y, dropout_prob: 0}
        sess.run([problem_opt], feed_dict=feed_dict)

    print sess.run([problem_loss, avg_accuracy], feed_dict=feed_dict)


[0.68899345, 0.70000005]
[0.67014313, 0.70000005]
[0.72360677, 0.5]
[0.66229856, 0.5]
[0.45957518, 0.70000005]
[0.44110298, 0.89999998]
[0.31584129, 0.80000001]
[0.6290729, 0.60000002]
[0.34987733, 0.80000001]
[0.14505611, 0.89999998]


In [38]:
batch_x, batch_y, _ = testset.next(500)
feed_dict = {input_sequence: batch_x, target: batch_y, dropout_prob: 0}
print sess.run([problem_loss, avg_accuracy], feed_dict=feed_dict)

[0.2364198, 0.89600003]


In [84]:
# Load pre-trained word-embedding.
vocab_size = int(4e5)
embedding_dim = 100

word_embedding = tf.Variable(tf.constant(0.0, shape=[vocab_size, embedding_dim]), trainable=False, name="word_embedding")
embedding_placeholder = tf.placeholder(tf.float32, [vocab_size, embedding_dim])
embedding_init = word_embedding.assign(embedding_placeholder)

In [105]:

sess = tf.Session()
sess.run(embedding_init, feed_dict={embedding_placeholder: lala})

array([[-0.049107  ,  1.08360004, -0.96898001, ..., -0.35431999,
         0.46728   ,  0.34469   ],
       [-0.59596997, -0.022     ,  0.55533999, ...,  0.042286  ,
        -0.3973    ,  0.42844   ],
       [ 0.21253   , -0.094895  ,  0.53437001, ...,  0.29701   ,
        -0.12437   ,  0.20121001],
       ..., 
       [ 1.0632    , -0.028459  , -0.46533999, ...,  0.039003  ,
        -1.03390002, -0.52419001],
       [ 0.31733   , -0.20597   ,  0.13283999, ...,  0.65559   ,
        -0.70892   ,  0.13138001],
       [-1.00580001, -0.53103   , -0.15195   , ...,  0.20653   ,
         0.26804   , -0.95627999]], dtype=float32)

In [85]:
def load_glove_pretrained_model(glove_file):
    print "Loading glove model."
    embedding = dict()
    with open(glove_file, 'r') as f_in:
        for line in f_in:
            s_line = line.split()
            word = s_line[0]
            w_embedding = np.array([float(val) for val in s_line[1:]], dtype=np.float32)
            embedding[word] = w_embedding
    print "Done.", len(embedding), " words loaded!"
    return embedding

pretrained_emb_file = '/orions4-zfs/projects/lins2/Panos_Space/DATA/Language/glove.6B/glove.6B.100d.txt'
word_dict = load_glove_pretrained_model(pretrained_emb_file)

Loading glove model.
Done. 400000  words loaded!


In [102]:
def embedding_dictionary_to_matrix(in_dict):
    return np.array(in_dict.values())
