In [85]:
import tensorflow as tf
import numpy as np
from utilities import my_callbacks
from utilities import data_helper
import optparse
import sys
import math

np.set_printoptions(threshold=np.nan)

In [86]:
vocab = data_helper.load_all(filelist="final_data/wsj.all")
print(vocab)

Using features: None
{'S': 52415, '-': 2449404, 'O': 30440, 'X': 162058}
Total vocabulary size in the whole dataset: 4
['-', 'O', 'S', 'X', '0']


In [87]:
print("loading entity-gird for pos and neg documents...")

X_train_1, X_train_0, E = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.train", 
        perm_num = 20, maxlen=2000, window_size=6, vocab_list=vocab, emb_size=100)

X_dev_1, X_dev_0, E    = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.dev", 
        perm_num = 20, maxlen=2000, window_size=6, E = E, vocab_list=vocab, emb_size=100)

X_test_1, X_test_0, E    = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.test", 
        perm_num = 20, maxlen=2000, window_size=6, E = E, vocab_list=vocab, emb_size=100)


loading entity-gird for pos and neg documents...


In [88]:
num_train = len(X_train_1)
num_dev   = len(X_dev_1)
num_test  = len(X_test_1)
#assign Y value
y_train_1 = [1] * num_train 
y_dev_1 = [1] * num_dev 
y_test_1 = [1] * num_test 

print('.....................................')
print("Num of traing pairs: " + str(num_train))
print("Num of dev pairs: " + str(num_dev))
print("Num of test pairs: " + str(num_test))
#print("Num of permutation in train: " + str(opts.p_num)) 
#print("The maximum in length for CNN: " + str(opts.maxlen))
print('.....................................')

.....................................
Num of traing pairs: 23744
Num of dev pairs: 2678
Num of test pairs: 20411
.....................................


In [89]:
#randomly shuffle the training data
np.random.seed(113)
np.random.shuffle(X_train_1)
np.random.seed(113)
np.random.shuffle(X_train_0)

In [111]:
def forward_propagation(X_positive, X_negative, vocab, E, print_ = False):
    """
    Implements forward propagation of Neural coherence model
    
    Arguments:
    X_positive -- A Placeholder for positive document
    X_negative -- A Placeholder for negative document
    vocab -- Vocabulary list of entire entity grid list
    E -- initialized values for embedding matrix
    print_ -- Whether size of the variables to be printed
    
    Returns: 
    out_positive -- Coherence Score for positive document
    out_negative -- Coherence Score for negative document
    parameters -- a dictionary of tensors containing trainable parameters
    
    """
    
    ## Placeholders
    #X_positive = tf.placeholder(tf.int32, shape = [None, 2000]) #opts.maxlen=2000
    #X_negative = tf.placeholder(tf.int32, shape = [None, 2000]) #opts.maxlen=2000
    

    ## First Layer of NN: Transform each grammatical role in the grid into distributed representation - a real valued vector
    
    
    #Shared embedding matrix
    #W_embedding = tf.get_variable("W_embedding", [len(vocab), 100], initializer = tf.contrib.layers.xavier_initializer()) #embedding matrix 
    #E = np.float32(E) # DataType of E is float64, which is not in list of allowed values in conv1D. Allowed DataType: float16, float32
    E =  tf.convert_to_tensor(E, tf.float32) 
    W_embedding = tf.get_variable("W_embedding", initializer = E) #embedding matrix 
   
    
    #Look up layer
    
    #for positive document
    embedding_positive = tf.nn.embedding_lookup(W_embedding, X_positive)
    
    #for negative document
    embedding_negative = tf.nn.embedding_lookup(W_embedding, X_negative)


    ## Second Layer of NN: Convolution Layer
    
    
    #shared filter and bias
    w_size = 6       #filter_size
    emb_size = 100   #embedding_size 
    nb_filter = 150  #num_filters 

    filter_shape = [w_size, emb_size, nb_filter]

    #W_conv_layer_1 = tf.get_variable("W_conv_layer_1", shape = filter_shape, initializer = tf.contrib.layers.xavier_initializer(seed = 0)) #filter for covolution layer 1
    W_conv_layer_1 = tf.get_variable("W_conv_layer_1", shape = filter_shape, initializer = tf.contrib.layers.xavier_initializer(seed = 2018)) #filter for covolution layer 1
    b_conv_layer_1 =  tf.get_variable("b_conv_layer_1", shape=[nb_filter], initializer = tf.constant_initializer(0.0))  #bias for convolution layer 1

    
       
    #1D Convolution for positive document
    conv_layer_1_positive = tf.nn.conv1d(embedding_positive, W_conv_layer_1, stride=1, padding="VALID")  #embedding and W_conv_layer_1 both are 3D matrix
    conv_layer_1_with_bias_positive = tf.nn.bias_add(conv_layer_1_positive, b_conv_layer_1)    
    h_conv_layer_1_positive = tf.nn.relu(conv_layer_1_with_bias_positive, name="relu_conv_layer_1_positive") # Apply nonlinearity
    
    
    #1D Convolution for negative document
    conv_layer_1_negative = tf.nn.conv1d(embedding_negative, W_conv_layer_1, stride=1, padding="VALID")  #embedding and W_conv_layer_1 both are 3D matrix
    conv_layer_1_with_bias_negative = tf.nn.bias_add(conv_layer_1_negative, b_conv_layer_1)    
    h_conv_layer_1_negative = tf.nn.relu(conv_layer_1_with_bias_negative, name="relu_conv_layer_1_negative") # Apply nonlinearity

    

    ## Third Layer of NN: Pooling Layer
    
    
    #1D Pooling for positive document
    m_layer_1_positive = tf.nn.pool(h_conv_layer_1_positive, window_shape = [6], strides = [6], pooling_type = 'MAX', padding="VALID")

    #1D Pooling for negative document
    m_layer_1_negative = tf.nn.pool(h_conv_layer_1_negative, window_shape = [6], strides = [6], pooling_type = 'MAX', padding="VALID")



    ## Fourth Layer of NN: Fully Connected Layer
    
    #Dropout Early [As Dat Used]
    
    #for positive document
    #drop_out_early_positive = tf.nn.dropout(m_layer_1_positive, keep_prob=0.5)
    
    #for negative document
    #drop_out_early_negative = tf.nn.dropout(m_layer_1_negative, keep_prob=0.5)
    
    
    #Flatten
    
    #for positive document
    flatten_positive = tf.contrib.layers.flatten(m_layer_1_positive)
    #flatten_positive = tf.contrib.layers.flatten(drop_out_early_positive)
    
    #for negative document
    flatten_negative = tf.contrib.layers.flatten(m_layer_1_negative)
    #flatten_negative = tf.contrib.layers.flatten(drop_out_early_negative)
    

    #Dropout
    
    #for positive document
    drop_out_positive = tf.nn.dropout(flatten_positive, keep_prob=0.5, seed=2018)
    
    #for negative document
    drop_out_negative = tf.nn.dropout(flatten_negative, keep_prob=0.5, seed=2018)

    
    
    # Coherence Scoring
    v_fc_layer = tf.get_variable("v_fc_layer", shape = [49800, 1], initializer = tf.contrib.layers.xavier_initializer(seed = 2018)) #Weight matrix for final layer
    b_fc_layer =  tf.get_variable("b_fc_layer", shape=[1], initializer = tf.constant_initializer(0.0))  #bias for final layer

    
    
    #for positive document
    #out_positive = tf.contrib.layers.fully_connected(drop_out_positive, num_outputs = 1, activation_fn=None)
    #out_positive = tf.sigmoid(out_positive)
    out_positive = tf.add(tf.matmul(drop_out_positive, v_fc_layer), b_fc_layer)
    
    #for negative document
    #out_negative = tf.contrib.layers.fully_connected(drop_out_negative, num_outputs = 1, activation_fn=None)
    #out_negative = tf.sigmoid(out_negative)
    out_negative = tf.add(tf.matmul(drop_out_negative, v_fc_layer), b_fc_layer)
    
    
    parameters = {"W_embedding": W_embedding,
                  "W_conv_layer_1": W_conv_layer_1,
                  "b_conv_layer_1": b_conv_layer_1,
                  "v_fc_layer": v_fc_layer,
                  "b_fc_layer": b_fc_layer}
    
    
    if(print_):
        print("Layer (type)          Output Shape")
        print("_________________________________________")
        print("\nInputLayer:")
        print("X_positive           ",   X_positive.shape)
        print("X_negative           ",   X_negative.shape)
        print("\nEmbedding Layer:")
        print("Embedding Matrix     ",   W_embedding.shape)
        print("Embedding Positive   ",   embedding_positive.shape)
        print("Embedding Negative   ",   embedding_negative.shape)
        print("\nConvolution 1D Layer:")
        print("Filter Shape         ",   W_conv_layer_1.shape)
        print("Conv Positive        ",   h_conv_layer_1_positive.shape)
        print("Conv Negative        ",   h_conv_layer_1_negative.shape)
        print("\nMax Pooling 1D Layer:")
        print("MaxPool Positive     ",   m_layer_1_positive.shape)
        print("MaxPool Negative     ",   m_layer_1_negative.shape)
        print("\nFlatten Layer: ")
        print("Flatten Positive     ",   flatten_positive.shape)
        print("Flatten Negative     ",   flatten_negative.shape)
        print("\nDropout Layer: ")
        print("Dropout Positive     ",   drop_out_positive.shape)
        print("Dropout Negative     ",   drop_out_negative.shape)
        print("\nFully Connected Layer:")
        print("FC Positive          ",   out_positive.shape)
        print("FC Negative          ",   out_negative.shape)
        
        
    
    return out_positive, out_negative, parameters
    

In [112]:
def ranking_loss(pos, neg):
    """
    Implements the ranking objective.
    
    Arguments:
    pos -- score for positive document batch
    neg -- score for negative document batch
    
    Returns:
    Average ranking loss for the batch  
    
    """
    
    loss = tf.maximum(1.0 + neg - pos, 0.0) 
    #print(loss)
    return tf.reduce_mean(loss)

In [113]:
def mini_batches(X, Y, mini_batch_size = 32):
    """
    Creates minibatches.
    
    Arguments:
    X -- Positive Documents
    Y -- Negative Documents
    mini_batch_size -- Size of each mini batch. 
    
    Returns:
    list of mini batches from the positive and negative documents.
    
    """
    m = len(X)   
    mini_batches = []
    
    num_complete_minibatches = math.floor(m/mini_batch_size) 
    
    for k in range(0, num_complete_minibatches):
        mini_batch_X = X[k * mini_batch_size : k * mini_batch_size + mini_batch_size, :]
        mini_batch_Y = Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size, :]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = X[num_complete_minibatches * mini_batch_size : m, :]
        mini_batch_Y = Y[num_complete_minibatches * mini_batch_size : m, :]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

In [114]:
tf.reset_default_graph()

In [115]:
## Create Placeholders
X_positive = tf.placeholder(tf.int32, shape = [None, 2000]) #Placeholder for positive document
X_negative = tf.placeholder(tf.int32, shape = [None, 2000]) #Placeholder for negative document

# Forward propagation
score_positive, score_negative, parameters = forward_propagation(X_positive, X_negative, vocab, E, print_=True)

Layer (type)          Output Shape
_________________________________________

InputLayer:
X_positive            (?, 2000)
X_negative            (?, 2000)

Embedding Layer:
Embedding Matrix      (5, 100)
Embedding Positive    (?, 2000, 100)
Embedding Negative    (?, 2000, 100)

Convolution 1D Layer:
Filter Shape          (6, 100, 150)
Conv Positive         (?, 1995, 150)
Conv Negative         (?, 1995, 150)

Max Pooling 1D Layer:
MaxPool Positive      (?, 332, 150)
MaxPool Negative      (?, 332, 150)

Flatten Layer: 
Flatten Positive      (?, 49800)
Flatten Negative      (?, 49800)

Dropout Layer: 
Dropout Positive      (?, 49800)
Dropout Negative      (?, 49800)

Fully Connected Layer:
FC Positive           (?, 1)
FC Negative           (?, 1)


In [116]:
# Cost function:
cost = ranking_loss(score_positive, score_negative)

# Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer that minimizes the cost.
#optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
#optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001, decay=0.0, momentum=0.9, epsilon=1e-8).minimize(cost)


## Using keras RMSProp

W_embedding = parameters["W_embedding"]
W_conv_layer_1 = parameters["W_conv_layer_1"]
b_conv_layer_1 = parameters["b_conv_layer_1"]
v_fc_layer = parameters["v_fc_layer"]
b_fc_layer = parameters["b_fc_layer"]
optimizer = tf.keras.optimizers.RMSprop().get_updates(cost, [W_embedding, W_conv_layer_1, b_conv_layer_1, v_fc_layer, b_fc_layer])

In [117]:
"""
X_train_1 = X_train_1[:100, :]
X_train_0 = X_train_0[:100, :]
X_test_1 = X_test_1[:100, :]
X_test_0 = X_test_0[:100, :]


num_train = len(X_train_1)
num_test  = len(X_test_1)


print('.....................................')
print("Num of traing pairs: " + str(num_train))
print("Num of test pairs: " + str(num_test))
"""

'\nX_train_1 = X_train_1[:100, :]\nX_train_0 = X_train_0[:100, :]\nX_test_1 = X_test_1[:100, :]\nX_test_0 = X_test_0[:100, :]\n\n\nnum_train = len(X_train_1)\nnum_test  = len(X_test_1)\n\n\nprint(\'.....................................\')\nprint("Num of traing pairs: " + str(num_train))\nprint("Num of test pairs: " + str(num_test))\n'

In [118]:
init = tf.global_variables_initializer()

num_epochs = 1
minibatch_size = 32
m = 4#num_train
p = 4#num_test

wins_count = 0
ties_count = 0
losses_count = 0

with tf.Session() as sess:

    sess.run(init)

    for epoch in range(num_epochs):

        minibatch_cost = 0.
        num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
        minibatches = mini_batches(X_train_1, X_train_0, minibatch_size)
        #minibatches = mini_batches(X_dev_1, X_dev_0, m, minibatch_size)

        for (i, minibatch) in enumerate(minibatches):
            #if i == 10:
            #    break
            (minibatch_X_positive, minibatch_X_negative) = minibatch
            
            _ , temp_cost, pos, neg = sess.run([optimizer, cost, score_positive, score_negative], 
                        feed_dict={X_positive:minibatch_X_positive, 
                                X_negative:minibatch_X_negative})
            """
            print("Epoch:", epoch, "Minibatch:", i) 
            print("Positive score:")
            print(pos) 
            print("Negative score:")
            print(neg)
            print("ranking loss:", temp_cost)
            
            print("*************** End of a minibatch **********************************")
            """
            print("Iteration ",i, ":  ",temp_cost)
            minibatch_cost += temp_cost / num_minibatches
        
        #print(minibatch_cost)
        print("******************* End of an epoch ******************************")
        print("******************* End of Training ******************************")
        
        

        #num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
        minibatches = mini_batches(X_test_1, X_test_0, minibatch_size)
        
        wins = tf.greater(score_positive, score_negative)
        number_wins = tf.reduce_sum(tf.cast(wins, tf.float32))
        
        ties = tf.equal(score_positive, score_negative)
        number_ties = tf.reduce_sum(tf.cast(ties, tf.float32))

        losses = tf.less(score_positive, score_negative)
        number_losses = tf.reduce_sum(tf.cast(losses, tf.float32))
        
        for (i, minibatch) in enumerate(minibatches):
            
            (minibatch_X_positive, minibatch_X_negative) = minibatch
            
            num_wins, num_ties, num_losses = sess.run([number_wins, number_ties, number_losses], feed_dict={X_positive:minibatch_X_positive, X_negative:minibatch_X_negative})
            
            wins_count += num_wins
            ties_count += num_ties
            losses_count += num_losses
        
        """
        wins = tf.greater(score_positive, score_negative)
        number_wins = tf.reduce_sum(tf.cast(wins, "float"))
        
        ties = tf.equal(score_positive, score_negative)
        number_ties = tf.reduce_sum(tf.cast(ties, "float"))

        losses = tf.less(score_positive, score_negative)
        number_losses = tf.reduce_sum(tf.cast(losses, "float"))
        
        """
        print("Wins: ", wins_count)
        print("Ties: ", ties_count)
        print("losses: ", losses_count)
        
        recall = wins_count/(wins_count + ties_count + losses_count)
        
        precision = wins_count/(wins_count+losses_count)

        f1 = 2*precision*recall/(precision+recall)

        accuracy = wins_count/(wins_count + ties_count + losses_count)
        
        
        #test_accuracy, test_f1 = sess.run([accuracy, f1], feed_dict={X_positive:X_test_1, X_negative:X_test_0})
        
        #accuracy.eval(feed_dict={X_positive:X_test_1, X_negative:X_test_0})
        #test_f1 = f1.eval({X_positive:X_test_1, X_negative:X_test_0})
        

        print("Test Accuracy:", accuracy)
        print("Test F1 Score:", f1)
        

Iteration  0 :   1.00031




Iteration  1 :   1.00034
Iteration  2 :   0.999933
Iteration  3 :   0.996796
Iteration  4 :   0.99645
Iteration  5 :   0.995236
Iteration  6 :   0.989544
Iteration  7 :   0.996985
Iteration  8 :   0.997288
Iteration  9 :   0.980418
Iteration  10 :   0.969542
Iteration  11 :   0.973834
Iteration  12 :   0.978689
Iteration  13 :   0.956851
Iteration  14 :   0.915783
Iteration  15 :   0.915817
Iteration  16 :   0.925021
Iteration  17 :   0.925157
Iteration  18 :   0.905358
Iteration  19 :   0.896002
Iteration  20 :   0.848196
Iteration  21 :   0.846935
Iteration  22 :   0.827453
Iteration  23 :   0.801131
Iteration  24 :   0.719103
Iteration  25 :   0.707709
Iteration  26 :   0.787961
Iteration  27 :   0.740446
Iteration  28 :   0.731502
Iteration  29 :   0.660232
Iteration  30 :   0.597832
Iteration  31 :   0.493742
Iteration  32 :   0.495878
Iteration  33 :   0.557759
Iteration  34 :   0.548353
Iteration  35 :   0.561471
Iteration  36 :   0.678467
Iteration  37 :   0.427315
Iteration  3

Iteration  299 :   0.443947
Iteration  300 :   0.16748
Iteration  301 :   0.395949
Iteration  302 :   0.130437
Iteration  303 :   0.37054
Iteration  304 :   0.17084
Iteration  305 :   0.52354
Iteration  306 :   0.322959
Iteration  307 :   0.117723
Iteration  308 :   0.261638
Iteration  309 :   0.235293
Iteration  310 :   0.412146
Iteration  311 :   0.160367
Iteration  312 :   0.196979
Iteration  313 :   0.0355089
Iteration  314 :   0.32905
Iteration  315 :   0.271852
Iteration  316 :   0.142261
Iteration  317 :   0.327174
Iteration  318 :   0.134738
Iteration  319 :   0.16805
Iteration  320 :   0.239435
Iteration  321 :   0.248101
Iteration  322 :   0.0883594
Iteration  323 :   0.189641
Iteration  324 :   0.353189
Iteration  325 :   0.322678
Iteration  326 :   0.0704272
Iteration  327 :   0.331277
Iteration  328 :   0.103396
Iteration  329 :   0.273302
Iteration  330 :   0.231561
Iteration  331 :   0.342465
Iteration  332 :   0.200452
Iteration  333 :   0.105461
Iteration  334 :   0.09

Iteration  591 :   0.249045
Iteration  592 :   0.126978
Iteration  593 :   0.245895
Iteration  594 :   0.270346
Iteration  595 :   0.149601
Iteration  596 :   0.184495
Iteration  597 :   0.29692
Iteration  598 :   0.0524721
Iteration  599 :   0.0498137
Iteration  600 :   0.00268316
Iteration  601 :   0.359878
Iteration  602 :   0.0284119
Iteration  603 :   0.512002
Iteration  604 :   0.00976515
Iteration  605 :   0.0944705
Iteration  606 :   0.0818596
Iteration  607 :   0.142551
Iteration  608 :   0.23253
Iteration  609 :   0.314334
Iteration  610 :   0.329693
Iteration  611 :   0.218377
Iteration  612 :   0.189505
Iteration  613 :   0.283545
Iteration  614 :   0.231134
Iteration  615 :   0.137554
Iteration  616 :   0.273521
Iteration  617 :   0.00856686
Iteration  618 :   0.0104585
Iteration  619 :   0.133027
Iteration  620 :   0.105552
Iteration  621 :   0.128106
Iteration  622 :   0.186766
Iteration  623 :   0.146196
Iteration  624 :   0.417335
Iteration  625 :   0.152648
Iteration 