In [62]:
import tensorflow as tf
import numpy as np
from utilities import my_callbacks
from utilities import data_helper
import optparse
import sys
import math

np.set_printoptions(threshold=np.nan)

In [63]:
vocab = data_helper.load_all(filelist="final_data/wsj.all")
print(vocab)

Using features: None
{'X': 162058, 'S': 52415, '-': 2449404, 'O': 30440}
Total vocabulary size in the whole dataset: 4
['-', 'O', 'S', 'X', '0']


In [64]:
print("loading entity-gird for pos and neg documents...")

X_train_1, X_train_0, E = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.train", 
        perm_num = 20, maxlen=2000, window_size=6, vocab_list=vocab, emb_size=100)

X_dev_1, X_dev_0, E    = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.dev", 
        perm_num = 20, maxlen=2000, window_size=6, E = E, vocab_list=vocab, emb_size=100)

X_test_1, X_test_0, E    = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.test", 
        perm_num = 20, maxlen=2000, window_size=6, E = E, vocab_list=vocab, emb_size=100)


loading entity-gird for pos and neg documents...


In [65]:
num_train = len(X_train_1)
num_dev   = len(X_dev_1)
num_test  = len(X_test_1)
#assign Y value
y_train_1 = [1] * num_train 
y_dev_1 = [1] * num_dev 
y_test_1 = [1] * num_test 

print('.....................................')
print("Num of traing pairs: " + str(num_train))
print("Num of dev pairs: " + str(num_dev))
print("Num of test pairs: " + str(num_test))
#print("Num of permutation in train: " + str(opts.p_num)) 
#print("The maximum in length for CNN: " + str(opts.maxlen))
print('.....................................')

.....................................
Num of traing pairs: 23744
Num of dev pairs: 2678
Num of test pairs: 20411
.....................................


In [66]:
#randomly shuffle the training data
np.random.seed(113)
np.random.shuffle(X_train_1)
np.random.seed(113)
np.random.shuffle(X_train_0)

In [74]:
def forward_propagation(X_positive, X_negative, vocab, E, print_ = False):
    """
    Implements forward propagation of Neural coherence model
    
    Arguments:
    X_positive -- A Placeholder for positive document
    X_negative -- A Placeholder for negative document
    vocab -- Vocabulary list of entire entity grid list
    E -- initialized values for embedding matrix
    print_ -- Whether size of the variables to be printed
    
    Returns: 
    out_positive -- Coherence Score for positive document
    out_negative -- Coherence Score for negative document
    parameters -- a dictionary of tensors containing trainable parameters
    
    """
    
    ## Placeholders
    #X_positive = tf.placeholder(tf.int32, shape = [None, 2000]) #opts.maxlen=2000
    #X_negative = tf.placeholder(tf.int32, shape = [None, 2000]) #opts.maxlen=2000
    

    ## First Layer of NN: Transform each grammatical role in the grid into distributed representation - a real valued vector
    
    
    #Shared embedding matrix
    #W_embedding = tf.get_variable("W_embedding", [len(vocab), 100], initializer = tf.contrib.layers.xavier_initializer()) #embedding matrix 
    E = np.float32(E) # DataType of E is float64, which is not in list of allowed values in conv1D. Allowed DataType: float16, float32
    W_embedding = tf.get_variable("W_embedding", initializer = E) #embedding matrix 
   
    
    #Look up layer
    
    #for positive document
    embedding_positive = tf.nn.embedding_lookup(W_embedding, X_positive)
    
    #for negative document
    embedding_negative = tf.nn.embedding_lookup(W_embedding, X_negative)


    ## Second Layer of NN: Convolution Layer
    
    
    #shared filter and bias
    w_size = 6       #filter_size
    emb_size = 100   #embedding_size 
    nb_filter = 150  #num_filters 

    filter_shape = [w_size, emb_size, nb_filter]

    #W_conv_layer_1 = tf.get_variable("W_conv_layer_1", shape = filter_shape, initializer = tf.contrib.layers.xavier_initializer(seed = 0)) #filter for covolution layer 1
    W_conv_layer_1 = tf.get_variable("W_conv_layer_1", shape = filter_shape, initializer = tf.contrib.layers.xavier_initializer(seed = 100)) #filter for covolution layer 1
    b_conv_layer_1 =  tf.get_variable("b_conv_layer_1", shape=[nb_filter], initializer = tf.constant_initializer(0.0))  #bias for convolution layer 1

    
       
    #1D Convolution for positive document
    conv_layer_1_positive = tf.nn.conv1d(embedding_positive, W_conv_layer_1, stride=1, padding="VALID")  #embedding and W_conv_layer_1 both are 3D matrix
    conv_layer_1_with_bias_positive = tf.nn.bias_add(conv_layer_1_positive, b_conv_layer_1)    
    h_conv_layer_1_positive = tf.nn.relu(conv_layer_1_with_bias_positive, name="relu_conv_layer_1_positive") # Apply nonlinearity
    
    
    #1D Convolution for negative document
    conv_layer_1_negative = tf.nn.conv1d(embedding_negative, W_conv_layer_1, stride=1, padding="VALID")  #embedding and W_conv_layer_1 both are 3D matrix
    conv_layer_1_with_bias_negative = tf.nn.bias_add(conv_layer_1_negative, b_conv_layer_1)    
    h_conv_layer_1_negative = tf.nn.relu(conv_layer_1_with_bias_negative, name="relu_conv_layer_1_negative") # Apply nonlinearity

    

    ## Third Layer of NN: Pooling Layer
    
    
    #1D Pooling for positive document
    m_layer_1_positive = tf.nn.pool(h_conv_layer_1_positive, window_shape = [6], strides = [6], pooling_type = 'MAX', padding="VALID")

    #1D Pooling for negative document
    m_layer_1_negative = tf.nn.pool(h_conv_layer_1_negative, window_shape = [6], strides = [6], pooling_type = 'MAX', padding="VALID")



    ## Fourth Layer of NN: Fully Connected Layer
    
    #Dropout Early [As Dat Used]
    
    #for positive document
    #drop_out_early_positive = tf.nn.dropout(m_layer_1_positive, keep_prob=0.5)
    
    #for negative document
    #drop_out_early_negative = tf.nn.dropout(m_layer_1_negative, keep_prob=0.5)
    
    
    #Flatten
    
    #for positive document
    flatten_positive = tf.contrib.layers.flatten(m_layer_1_positive)
    #flatten_positive = tf.contrib.layers.flatten(drop_out_early_positive)
    
    #for negative document
    flatten_negative = tf.contrib.layers.flatten(m_layer_1_negative)
    #flatten_negative = tf.contrib.layers.flatten(drop_out_early_negative)
    

    #Dropout
    
    #for positive document
    drop_out_positive = tf.nn.dropout(flatten_positive, keep_prob=0.5, seed=100)
    
    #for negative document
    drop_out_negative = tf.nn.dropout(flatten_negative, keep_prob=0.5, seed=100)

    
    
    # Coherence Scoring
    v_fc_layer = tf.get_variable("v_fc_layer", shape = [49800, 1], initializer = tf.contrib.layers.xavier_initializer(seed = 100)) #Weight matrix for final layer
    b_fc_layer =  tf.get_variable("b_fc_layer", shape=[1], initializer = tf.constant_initializer(0.0))  #bias for final layer

    
    
    #for positive document
    #out_positive = tf.contrib.layers.fully_connected(drop_out_positive, num_outputs = 1, activation_fn=None)
    #out_positive = tf.sigmoid(out_positive)
    out_positive = tf.add(tf.matmul(drop_out_positive, v_fc_layer), b_fc_layer)
    
    #for negative document
    #out_negative = tf.contrib.layers.fully_connected(drop_out_negative, num_outputs = 1, activation_fn=None)
    #out_negative = tf.sigmoid(out_negative)
    out_negative = tf.add(tf.matmul(drop_out_negative, v_fc_layer), b_fc_layer)
    
    
    parameters = {"W_embedding": W_embedding,
                  "W_conv_layer_1": W_conv_layer_1,
                  "b_conv_layer_1": b_conv_layer_1,
                  "v_fc_layer": v_fc_layer,
                  "b_fc_layer": b_fc_layer}
    
    
    if(print_):
        print("Layer (type)          Output Shape")
        print("_________________________________________")
        print("\nInputLayer:")
        print("X_positive           ",   X_positive.shape)
        print("X_negative           ",   X_negative.shape)
        print("\nEmbedding Layer:")
        print("Embedding Matrix     ",   W_embedding.shape)
        print("Embedding Positive   ",   embedding_positive.shape)
        print("Embedding Negative   ",   embedding_negative.shape)
        print("\nConvolution 1D Layer:")
        print("Filter Shape         ",   W_conv_layer_1.shape)
        print("Conv Positive        ",   h_conv_layer_1_positive.shape)
        print("Conv Negative        ",   h_conv_layer_1_negative.shape)
        print("\nMax Pooling 1D Layer:")
        print("MaxPool Positive     ",   m_layer_1_positive.shape)
        print("MaxPool Negative     ",   m_layer_1_negative.shape)
        print("\nFlatten Layer: ")
        print("Flatten Positive     ",   flatten_positive.shape)
        print("Flatten Negative     ",   flatten_negative.shape)
        print("\nDropout Layer: ")
        print("Dropout Positive     ",   drop_out_positive.shape)
        print("Dropout Negative     ",   drop_out_negative.shape)
        print("\nFully Connected Layer:")
        print("FC Positive          ",   out_positive.shape)
        print("FC Negative          ",   out_negative.shape)
        
        
    
    return out_positive, out_negative, parameters
    

In [75]:
def ranking_loss(pos, neg):
    """
    Implements the ranking objective.
    
    Arguments:
    pos -- score for positive document batch
    neg -- score for negative document batch
    
    Returns:
    Average ranking loss for the batch  
    
    """
    
    loss = tf.maximum(1.0 + neg - pos, 0.0) 
    #print(loss)
    return tf.reduce_mean(loss)

In [76]:
def mini_batches(X, Y, m, mini_batch_size = 32):
    """
    Creates minibatches.
    
    Arguments:
    X -- Positive Documents
    Y -- Negative Documents
    m -- Number of Documents
    mini_batch_size -- Size of each mini batch. 
    
    Returns:
    list of mini batches from the positive and negative documents.
    
    """
        
    mini_batches = []
    
    num_complete_minibatches = math.floor(m/mini_batch_size) 
    
    for k in range(0, num_complete_minibatches):
        mini_batch_X = X[k * mini_batch_size : k * mini_batch_size + mini_batch_size, :]
        mini_batch_Y = Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size, :]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = X[num_complete_minibatches * mini_batch_size : m, :]
        mini_batch_Y = Y[num_complete_minibatches * mini_batch_size : m, :]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

In [85]:
tf.reset_default_graph()

In [86]:
## Create Placeholders
X_positive = tf.placeholder(tf.int32, shape = [None, 2000]) #Placeholder for positive document
X_negative = tf.placeholder(tf.int32, shape = [None, 2000]) #Placeholder for negative document

# Forward propagation
score_positive, score_negative, parameters = forward_propagation(X_positive, X_negative, vocab, E, print_=False)

In [87]:
# Cost function:
cost = ranking_loss(score_positive, score_negative)

# Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer that minimizes the cost.
#optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
#optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001, decay=0.0, momentum=0.9, epsilon=1e-8).minimize(cost)


## Using keras RMSProp

W_embedding = parameters["W_embedding"]
W_conv_layer_1 = parameters["W_conv_layer_1"]
b_conv_layer_1 = parameters["b_conv_layer_1"]
v_fc_layer = parameters["v_fc_layer"]
b_fc_layer = parameters["b_fc_layer"]
optimizer = tf.keras.optimizers.RMSprop().get_updates(cost, [W_embedding, W_conv_layer_1, b_conv_layer_1, v_fc_layer, b_fc_layer])

In [103]:
init = tf.global_variables_initializer()

num_epochs = 1
minibatch_size = 32
m = num_train

with tf.Session() as sess:

    sess.run(init)

    for epoch in range(num_epochs):

        minibatch_cost = 0.
        num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
        #minibatches = mini_batches(X_train_1, X_train_0, m, minibatch_size)
        minibatches = mini_batches(X_dev_1, X_dev_0, m, minibatch_size)

        for (i, minibatch) in enumerate(minibatches):
            if i == 10:
                break
            (minibatch_X_positive, minibatch_X_negative) = minibatch
            
            _ , temp_cost, pos, neg = sess.run([optimizer, cost, score_positive, score_negative], 
                        feed_dict={X_positive:minibatch_X_positive, 
                                   X_negative:minibatch_X_negative})
            """
            print("Epoch:", epoch, "Minibatch:", i) 
            print("Positive score:")
            print(pos) 
            print("Negative score:")
            print(neg)
            print("ranking loss:", temp_cost)
            
            print("*************** End of a minibatch **********************************")
            """
            print("Iteration ",i, ":  ",temp_cost)
            minibatch_cost += temp_cost / num_minibatches
        
        #print(minibatch_cost)
        print("******************* End of an epoch ******************************")
        wins = tf.greater(score_positive, score_negative)
        number_wins = tf.reduce_mean(tf.cast(wins, "float"))
        
        ties = tf.equal(score_positive, score_negative)
        number_ties = tf.reduce_mean(tf.cast(ties, "float"))

        losses = tf.less(score_positive, score_negative)
        number_losses = tf.reduce_mean(tf.cast(losses, "float"))

        recall = number_wins/(number_wins + number_ties + number_losses)
        
        precision = number_wins/(number_wins+number_losses)

        f1 = 2*precision*recall/(precision+recall)

        accuracy = number_wins/(number_wins + number_ties + number_losses)
        
        
        test_accuracy = accuracy.eval(feed_dict={X_positive:X_test_1, X_negative:X_test_0})
        
        test_f1 = f1.eval({X_positive:X_test_1, X_negative:X_test_0})
        

        print("Train Accuracy:", test_accuracy)
        print("Test Accuracy:", test_f1)
        

Iteration  0 :   0.997883
Iteration  1 :   0.981161
Iteration  2 :   1.00119
Iteration  3 :   0.931452
Iteration  4 :   0.989699
Iteration  5 :   0.995366
Iteration  6 :   0.909427
Iteration  7 :   1.0299
Iteration  8 :   0.924531
Iteration  9 :   0.971643
******************* End of an epoch ******************************
Hello


ResourceExhaustedError: OOM when allocating tensor with shape[20411,2000,100]
	 [[Node: embedding_lookup = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, _class=["loc:@W_embedding"], validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](W_embedding/read, _arg_Placeholder_0_0)]]

Caused by op 'embedding_lookup', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 478, in start
    self.io_loop.start()
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 281, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 232, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 397, in execute_request
    user_expressions, allow_stdin)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-86-cad23cf9c1e7>", line 6, in <module>
    score_positive, score_negative, parameters = forward_propagation(X_positive, X_negative, vocab, E, print_=False)
  File "<ipython-input-74-314e7ae23068>", line 36, in forward_propagation
    embedding_positive = tf.nn.embedding_lookup(W_embedding, X_positive)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/embedding_ops.py", line 328, in embedding_lookup
    transform_fn=None)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/embedding_ops.py", line 150, in _embedding_lookup_and_transform
    result = _clip(_gather(params[0], ids, name=name), ids, max_norm)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/embedding_ops.py", line 54, in _gather
    return array_ops.gather(params, ids, name=name)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py", line 2486, in gather
    params, indices, validate_indices=validate_indices, name=name)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1834, in gather
    validate_indices=validate_indices, name=name)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/home/tasnim/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[20411,2000,100]
	 [[Node: embedding_lookup = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, _class=["loc:@W_embedding"], validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](W_embedding/read, _arg_Placeholder_0_0)]]
