In [None]:
# Import tensorflow
import tensorflow as tf
# Import numpy
import numpy as np

## Practice of <a href='https://www.tensorflow.org/api_docs/python/tf/constant'>constants</a>

In [None]:
with tf.name_scope('constants'):
    a = tf.constant([-1,1, 1, -1], shape = [2,2], name='a')
    b = tf.constant([0,1], shape = [2,1], name = 'b')
    c = tf.matmul(a, b, name = 'multiply')
print(c)

In [None]:
# Why you cannot get the expected [1, -1]?
# What if we run the matrix multiplication with tf.Session?
with tf.Session() as sess:
    print(sess.run(c))

In [None]:
# Generate the computation graph
logs_dir = './computation_graphs'
with tf.Session() as sess:
    log_writer = tf.summary.FileWriter(logs_dir, sess.graph)
    sess.run(c)
log_writer.close()
# run tensorboard --logdir="./computation_graphs" to visualize the computation graph.

 A1: Compute np.matmul(np.array([[1,2],[3,4],[5,6]]), np.array([[1],[2]]) ) + np.array([1,0,1]) with Tensorflow.

## Practice of <a href='https://www.tensorflow.org/api_docs/python/tf/Variable'>variables</a>


In [None]:
M = tf.Variable(tf.random_normal([3,2], mean=0, stddev=1), name = 'M')
m_times_two = M.assign(M * 2)
init_m = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init_m)
    print('M = %s\n' % M.eval())
    sess.run(m_times_two)
    print('M * 2 = %s\n' %  M.eval())
    sess.run(m_times_two)
    print('M * 4 = %s\n' % M.eval())

In [None]:
# Create a place holder of type float 32-bit.
v = tf.placeholder(tf.float32, shape = [2, 1], name = 'v')
m_times_v = tf.matmul(M, v, name = 'M_v')
with tf.Session() as sess:
    sess.run(init_m)
    print('M = %s\n' % M.eval())
    print('M * [2,1] = %s \n' % sess.run(m_times_v, {v: [[2],[1]]}))
    print('M * [1,2] = %s \n' % sess.run(m_times_v, {v: [[1],[2]]}))

A2: Randomly generate a matrix W and a vector b, compute W * x + b, where x are column vectors from a set of randomly generated vectors.

In [None]:
# Example of word embedding look up.
x = tf.placeholder(tf.int32, shape = [None], name = 'v')
embeddings = tf.Variable(tf.random_uniform([10, 5], -1.0, 1.0), name = 'embed')
embed_seq = tf.nn.embedding_lookup(embeddings, x)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print('embed = %s\n' % embeddings.eval())
    print('sequence 1 \n')
    print(sess.run(tf.reshape(tf.reduce_sum(embed_seq, 0), [5,1]), {x: np.array([1,2,3])}))
    print('sequence 2 \n')
    print(sess.run(embed_seq, {x: np.array([1,3])}))

## Save and restore computation graphs.

In [None]:
import tensorflow as tf
import numpy as np

tf.reset_default_graph()
input_x = tf.placeholder(tf.int32, shape = [None, 6], name = 'v')
embeddings = tf.get_variable('embed', initializer = tf.random_uniform([10, 6], -1.0, 1.0))
embed_seq = tf.nn.embedding_lookup(embeddings, input_x)

mean_vec = tf.reduce_mean(embed_seq, 1)

saver = tf.train.Saver()
file_path ='./save_restore_test'
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(mean_vec, {input_x: np.random.randint(0,10,[4, 6])}))
    print(sess.run(mean_vec, {input_x: np.random.randint(0,10,[1, 6])}))
    saver.save(sess, file_path)

A3: Replace input_x = tf.placeholder(tf.int32, shape = [None, 6], name = 'v') with input_x = tf.placeholder(tf.int32, shape = [4, 6], name = 'v'), see what happens. Why?

In [None]:

with tf.Session() as sess:
    saver.restore(sess, file_path)
    print(sess.run([embeddings, mean_vec], {input_x: np.random.randint(0,9,[1, 6])}))


### Word Embeddings

A4: Use the following code or the Gensim library to load pre-trained word embeddings, and compute the means of word sequences.

-  https://www.tensorflow.org/tutorials/representation/word2vec
-  http://projector.tensorflow.org/
-  https://radimrehurek.com/gensim/models/word2vec.html
-  https://nlp.stanford.edu/projects/glove/


In [None]:
def load_GloVe(file_path):
    """
        Load word embeddings in the format of GloVe.
    
    """
    vocab = dict()
    embd = []
    with open(file_path,'r') as file:
        for line in file.readlines():
            row = line.strip().split(' ')
            vocab[row[0]] = len(vocab)
            embd.append(row[1:])
        print('Loaded GloVe!')
    embedding = np.asarray(embd, dtype=np.float32)
    return vocab,embedding

def load_Word2Vec(file_path):
    """
        Load word embeddings in the format of Word2Vec.
    
    """
    vocab = dict()
    embd = []
    with open(file_path,'r') as file:
        head_line = file.readline()
        head_line_tokens = head_line.strip().split(' ')
        num_words = int(head_line_tokens[0])
        word_dim = int(head_line_tokens[1])
        for line in file.readlines():
            row = line.strip().split(' ')
            vocab[row[0]] = len(vocab)
            embd.append(row[1:])
            assert(len(embd) == word_dim)
        print('Loaded word2vec!')
    embedding = np.asarray(embd, dtype=np.float32)
    return vocab,embedding

### Recurrent Neural Networks

More details can be found in the lecture note https://docs.google.com/document/d/1_ZqzBqFMV8YmdC2PmaTXOB9O1BZ14yLNTQd2_Bkff9Y/edit.

Some tricks for RNN : http://ruder.io/deep-learning-nlp-best-practices/index.html#lstmtricks

In [None]:
import random

def generate_inputs(batch_size, max_length, embedding_dim):
    input_sens = np.random.rand(batch_size, max_length, embedding_dim)
    sens_length = np.zeros(shape=[batch_size], dtype=np.int32)
    sens_mask_list = []
    if max_length > 1:
        for i in range(0, batch_size):
            sens_m_length = random.randint(1, max_length)
            sens_length[i] = sens_m_length
            for j in range(sens_m_length, max_length):
                input_sens[i, j] = np.zeros(embedding_dim)
    return (input_sens,np.array(sens_length))

In [None]:
import tensorflow as tf
import numpy as np

batch_size = 3
max_length = 5
embedding_dim = 2
lstm_cell_size = 2

input_sent, sent_length = generate_inputs(batch_size, max_length, embedding_dim)

tf.reset_default_graph()

# Define a computation graph for LSTM. It projects a sequence of word ids
# into a sequence of hidden representations.
with tf.name_scope('lstm_example'):
    embed_seq_p = tf.placeholder(tf.float32, shape=[batch_size, max_length, embedding_dim])
    sent_length_p = tf.placeholder(tf.int32, shape=[batch_size])
    
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(lstm_cell_size,forget_bias=1.0, state_is_tuple=True)
    initial_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
    hidden_seq, last_states = tf.nn.dynamic_rnn(lstm_cell, embed_seq_p, sequence_length=sent_length_p,
    initial_state = initial_state,time_major=False, dtype=tf.float32)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    rep_values = sess.run(hidden_seq, feed_dict={embed_seq_p : input_sent, sent_length_p : sent_length})
    print(rep_values)

In [None]:
import tensorflow as tf
import numpy as np

batch_size = 2
max_length = 3
embedding_dim = 2
lstm_cell_size = 2

input_sent, sent_length = generate_inputs(batch_size, max_length, embedding_dim)

tf.reset_default_graph()

with tf.name_scope('lstm_example'):
    embed_seq_p = tf.placeholder(tf.float32, shape=[batch_size, max_length, embedding_dim])
    sent_length_p = tf.placeholder(tf.int32, shape=[batch_size])
    with tf.variable_scope('forward'):
        fw_lstm = tf.contrib.rnn.BasicLSTMCell(lstm_cell_size,forget_bias=1.0, state_is_tuple=True)
        fw_init_state = fw_lstm.zero_state(batch_size, dtype=tf.float32)

    with tf.variable_scope('backward'):
        bw_lstm = tf.contrib.rnn.BasicLSTMCell(lstm_cell_size,forget_bias=1.0, state_is_tuple=True)
        bw_init_state = bw_lstm.zero_state(batch_size, dtype=tf.float32)
        
    outputs, output_states = tf.nn.bidirectional_dynamic_rnn(
        fw_lstm,
        bw_lstm,
        embed_seq_p,
        sequence_length=sent_length_p,
        initial_state_fw=fw_init_state,
        initial_state_bw=bw_init_state, dtype=tf.float32, scope="BiLSTM")
    rep = tf.concat(outputs, 2)
    
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    rep_values = sess.run(rep, feed_dict={embed_seq_p : input_sent, sent_length_p : sent_length})
    print(rep_values)

A5: How do you apply uni-directional LSTM and a bidirectional LSTM for constructing sentence representations ?

A6: Tune the models developed for Q1 and Q2 based on the tips in https://medium.com/@jonathan_hui/improve-deep-learning-models-performance-network-tuning-part-6-29bf90df6d2d and http://ruder.io/deep-learning-nlp-best-practices/index.html#bestpractices
