In [26]:
import tensorflow as tf
import numpy as np
from utilities import my_callbacks
from utilities import data_helper
import optparse
import sys
import math

np.set_printoptions(threshold=np.nan)

In [2]:
vocab = data_helper.load_all(filelist="final_data/wsj.all")
print(vocab)

Using features: None
{'X': 162058, 'O': 30440, '-': 2449404, 'S': 52415}
Total vocabulary size in the whole dataset: 4
['-', 'O', 'S', 'X', '0']


In [3]:
print("loading entity-gird for pos and neg documents...")

X_train_1, X_train_0, E = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.train", 
        perm_num = 20, maxlen=2000, window_size=6, vocab_list=vocab, emb_size=100)

X_dev_1, X_dev_0, E    = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.dev", 
        perm_num = 20, maxlen=2000, window_size=6, E = E, vocab_list=vocab, emb_size=100)

X_test_1, X_test_0, E    = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.test", 
        perm_num = 20, maxlen=2000, window_size=6, E = E, vocab_list=vocab, emb_size=100)


loading entity-gird for pos and neg documents...


In [5]:
E

array([[  7.03970985e-03,  -8.52192802e-03,   7.89863525e-03,
         -1.27012905e-03,  -7.44644532e-03,   1.51715749e-03,
          6.80941839e-03,  -1.29758898e-03,   3.91821121e-03,
          3.69276194e-03,   4.01296731e-03,   5.59388526e-03,
          2.85498737e-03,   9.22052336e-03,  -7.83070229e-03,
          5.92212678e-03,   6.65160166e-03,  -4.67983274e-03,
          6.73370778e-03,   6.42538131e-04,   3.38151282e-04,
         -8.02824575e-03,   8.37737989e-03,   3.33316984e-03,
         -6.50441043e-03,  -5.64616975e-03,  -6.42494388e-04,
         -1.28217516e-03,   7.78708962e-03,  -5.54801458e-03,
          1.78038732e-03,  -4.45596859e-03,   5.14443672e-04,
         -4.81285778e-03,   5.78972608e-04,  -3.75718510e-03,
          8.83244960e-04,  -5.15886991e-03,  -8.11523952e-03,
         -6.21067241e-03,  -6.99429344e-03,   7.88893673e-03,
         -3.98495804e-03,  -4.54271061e-03,  -9.87040510e-03,
          1.96026909e-03,   5.88701763e-03,   1.97242136e-03,
        

In [6]:
num_train = len(X_train_1)
num_dev   = len(X_dev_1)
num_test  = len(X_test_1)
#assign Y value
y_train_1 = [1] * num_train 
y_dev_1 = [1] * num_dev 
y_test_1 = [1] * num_test 

print('.....................................')
print("Num of traing pairs: " + str(num_train))
print("Num of dev pairs: " + str(num_dev))
print("Num of test pairs: " + str(num_test))
#print("Num of permutation in train: " + str(opts.p_num)) 
#print("The maximum in length for CNN: " + str(opts.maxlen))
print('.....................................')

.....................................
Num of traing pairs: 23744
Num of dev pairs: 2678
Num of test pairs: 20411
.....................................


In [7]:
#randomly shuffle the training data
np.random.seed(113)
np.random.shuffle(X_train_1)
np.random.seed(113)
np.random.shuffle(X_train_0)

In [9]:
def forward_propagation(X_positive, X_negative, vocab, E, print_ = False):
    """
    Implements forward propagation of Neural coherence model
    
    Arguments:
    X_positive -- A Placeholder for positive document
    X_negative -- A Placeholder for negative document
    vocab -- Vocabulary list of entire entity grid list
    E -- initialized values for embedding matrix
    print_ -- Whether size of the variables to be printed
    
    Returns: 
    out_positive: Coherence Score for positive document
    out_negative: Coherence Score for negative document
    
    """
    
    ## Placeholders
    #X_positive = tf.placeholder(tf.int32, shape = [None, 2000]) #opts.maxlen=2000
    #X_negative = tf.placeholder(tf.int32, shape = [None, 2000]) #opts.maxlen=2000
    

    ## First Layer of NN: Transform each grammatical role in the grid into distributed representation - a real valued vector
    
    
    #Shared embedding matrix
    #W_embedding = tf.get_variable("W_embedding", [len(vocab), 100], initializer = tf.contrib.layers.xavier_initializer()) #embedding matrix 
    E = np.float32(E) # DataType of E is float64, which is not in list of allowed values in conv1D. Allowed DataType: float16, float32
    W_embedding = tf.get_variable("W_embedding", initializer = E) #embedding matrix 
   
    
    #Look up layer
    
    #for positive document
    embedding_positive = tf.nn.embedding_lookup(W_embedding, X_positive)
    
    #for negative document
    embedding_negative = tf.nn.embedding_lookup(W_embedding, X_negative)


    ## Second Layer of NN: Convolution Layer
    
    
    #shared filter and bias
    w_size = 6       #filter_size
    emb_size = 100   #embedding_size 
    nb_filter = 150  #num_filters 

    filter_shape = [w_size, emb_size, nb_filter]

    #W_conv_layer_1 = tf.get_variable("W_conv_layer_1", shape = filter_shape, initializer = tf.contrib.layers.xavier_initializer(seed = 0)) #filter for covolution layer 1
    W_conv_layer_1 = tf.get_variable("W_conv_layer_1", shape = filter_shape, initializer = tf.contrib.layers.xavier_initializer(seed = 100)) #filter for covolution layer 1
    b_conv_layer_1 =  tf.get_variable("b_conv_layer_1", shape=[nb_filter], initializer = tf.constant_initializer(0.0))  #bias for convolution layer 1

    
       
    #1D Convolution for positive document
    conv_layer_1_positive = tf.nn.conv1d(embedding_positive, W_conv_layer_1, stride=1, padding="VALID")  #embedding and W_conv_layer_1 both are 3D matrix
    conv_layer_1_with_bias_positive = tf.nn.bias_add(conv_layer_1_positive, b_conv_layer_1)    
    h_conv_layer_1_positive = tf.nn.relu(conv_layer_1_with_bias_positive, name="relu_conv_layer_1_positive") # Apply nonlinearity
    
    
    #1D Convolution for negative document
    conv_layer_1_negative = tf.nn.conv1d(embedding_negative, W_conv_layer_1, stride=1, padding="VALID")  #embedding and W_conv_layer_1 both are 3D matrix
    conv_layer_1_with_bias_negative = tf.nn.bias_add(conv_layer_1_negative, b_conv_layer_1)    
    h_conv_layer_1_negative = tf.nn.relu(conv_layer_1_with_bias_negative, name="relu_conv_layer_1_negative") # Apply nonlinearity

    

    ## Third Layer of NN: Pooling Layer
    
    
    #1D Pooling for positive document
    m_layer_1_positive = tf.nn.pool(h_conv_layer_1_positive, window_shape = [6], strides = [6], pooling_type = 'MAX', padding="VALID")

    #1D Pooling for negative document
    m_layer_1_negative = tf.nn.pool(h_conv_layer_1_negative, window_shape = [6], strides = [6], pooling_type = 'MAX', padding="VALID")



    ## Fourth Layer of NN: Fully Connected Layer
    
    #Dropout Early [As Dat Used]
    
    #for positive document
    #drop_out_early_positive = tf.nn.dropout(m_layer_1_positive, keep_prob=0.5)
    
    #for negative document
    #drop_out_early_negative = tf.nn.dropout(m_layer_1_negative, keep_prob=0.5)
    
    
    #Flatten
    
    #for positive document
    flatten_positive = tf.contrib.layers.flatten(m_layer_1_positive)
    #flatten_positive = tf.contrib.layers.flatten(drop_out_early_positive)
    
    #for negative document
    flatten_negative = tf.contrib.layers.flatten(m_layer_1_negative)
    #flatten_negative = tf.contrib.layers.flatten(drop_out_early_negative)
    

    #Dropout
    
    #for positive document
    drop_out_positive = tf.nn.dropout(flatten_positive, keep_prob=0.5, seed=100)
    
    #for negative document
    drop_out_negative = tf.nn.dropout(flatten_negative, keep_prob=0.5, seed=100)

    
    
    # Coherence Scoring
    
    #for positive document
    out_positive = tf.contrib.layers.fully_connected(drop_out_positive, num_outputs = 1, activation_fn=None)
    #out_positive = tf.sigmoid(out_positive)
    
    #for negative document
    out_negative = tf.contrib.layers.fully_connected(drop_out_negative, num_outputs = 1, activation_fn=None)
    #out_negative = tf.sigmoid(out_negative)
    
    if(print_):
        print("Layer (type)          Output Shape")
        print("_________________________________________")
        print("\nInputLayer:")
        print("X_positive           ",   X_positive.shape)
        print("X_negative           ",   X_negative.shape)
        print("\nEmbedding Layer:")
        print("Embedding Matrix     ",   W_embedding.shape)
        print("Embedding Positive   ",   embedding_positive.shape)
        print("Embedding Negative   ",   embedding_negative.shape)
        print("\nConvolution 1D Layer:")
        print("Filter Shape         ",   W_conv_layer_1.shape)
        print("Conv Positive        ",   h_conv_layer_1_positive.shape)
        print("Conv Negative        ",   h_conv_layer_1_negative.shape)
        print("\nMax Pooling 1D Layer:")
        print("MaxPool Positive     ",   m_layer_1_positive.shape)
        print("MaxPool Negative     ",   m_layer_1_negative.shape)
        print("\nFlatten Layer: ")
        print("Flatten Positive     ",   flatten_positive.shape)
        print("Flatten Negative     ",   flatten_negative.shape)
        print("\nDropout Layer: ")
        print("Dropout Positive     ",   drop_out_positive.shape)
        print("Dropout Negative     ",   drop_out_negative.shape)
        print("\nFully Connected Layer:")
        print("FC Positive          ",   out_positive.shape)
        print("FC Negative          ",   out_negative.shape)
        
        
    
    return out_positive, out_negative
    

In [10]:
def ranking_loss(pos, neg):
    """
    Implements the ranking objective.
    
    Arguments:
    pos -- score for positive document batch
    neg -- score for negative document batch
    
    Returns:
    Average ranking loss for the batch  
    
    """
    
    loss = tf.maximum(1.0 + neg - pos, 0.0) 
    #print(loss)
    return tf.reduce_mean(loss)

In [11]:
def mini_batches(X, Y, m, mini_batch_size = 32):
    """
    Creates minibatches.
    
    Arguments:
    X -- Positive Documents
    Y -- Negative Documents
    m -- Number of Documents
    mini_batch_size -- Size of each mini batch. 
    
    Returns:
    list of mini batches from the positive and negative documents.
    
    """
        
    mini_batches = []
    
    num_complete_minibatches = math.floor(m/mini_batch_size) 
    
    for k in range(0, num_complete_minibatches):
        mini_batch_X = X[k * mini_batch_size : k * mini_batch_size + mini_batch_size, :]
        mini_batch_Y = Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size, :]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = X[num_complete_minibatches * mini_batch_size : m, :]
        mini_batch_Y = Y[num_complete_minibatches * mini_batch_size : m, :]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

In [22]:
tf.reset_default_graph()

In [23]:
## Create Placeholders
X_positive = tf.placeholder(tf.int32, shape = [None, 2000]) #Placeholder for positive document
X_negative = tf.placeholder(tf.int32, shape = [None, 2000]) #Placeholder for negative document

# Forward propagation
score_positive, score_negative = forward_propagation(X_positive, X_negative, vocab, E, print_=True)

Layer (type)          Output Shape
_________________________________________

InputLayer:
X_positive            (?, 2000)
X_negative            (?, 2000)

Embedding Layer:
Embedding Matrix      (5, 100)
Embedding Positive    (?, 2000, 100)
Embedding Negative    (?, 2000, 100)

Convolution 1D Layer:
Filter Shape          (6, 100, 150)
Conv Positive         (?, 1995, 150)
Conv Negative         (?, 1995, 150)

Max Pooling 1D Layer:
MaxPool Positive      (?, 332, 150)
MaxPool Negative      (?, 332, 150)

Flatten Layer: 
Flatten Positive      (?, 49800)
Flatten Negative      (?, 49800)

Dropout Layer: 
Dropout Positive      (?, 49800)
Dropout Negative      (?, 49800)

Fully Connected Layer:
FC Positive           (?, 1)
FC Negative           (?, 1)


In [24]:
# Cost function:
cost = ranking_loss(score_positive, score_negative)

# Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer that minimizes the cost.
#optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001, epsilon=1e-8).minimize(cost)

In [25]:
init = tf.global_variables_initializer()

num_epochs = 1
minibatch_size = 32
m = num_train

with tf.Session() as sess:

    sess.run(init)

    for epoch in range(num_epochs):

        minibatch_cost = 0.
        num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
        minibatches = mini_batches(X_train_1, X_train_0, m, minibatch_size)

        for (i, minibatch) in enumerate(minibatches):

            (minibatch_X_positive, minibatch_X_negative) = minibatch
            
            _ , temp_cost, pos, neg = sess.run([optimizer, cost, score_positive, score_negative], 
                        feed_dict={X_positive:minibatch_X_positive, 
                                   X_negative:minibatch_X_negative})
            """
            print("Epoch:", epoch, "Minibatch:", i) 
            print("Positive score:")
            print(pos) 
            print("Negative score:")
            print(neg)
            print("ranking loss:", temp_cost)
            
            print("*************** End of a minibatch **********************************")
            """
            print("Iteration ",i, ":  ",temp_cost)
            minibatch_cost += temp_cost / num_minibatches
        
        #print(minibatch_cost)
        print("******************* End of an epoch ******************************")

Iteration  0 :   1.00089
Iteration  1 :   0.996619
Iteration  2 :   0.99267
Iteration  3 :   0.990049
Iteration  4 :   0.98509
Iteration  5 :   0.981425
Iteration  6 :   0.977038
Iteration  7 :   0.972658
Iteration  8 :   0.967735
Iteration  9 :   0.961783
Iteration  10 :   0.957051
Iteration  11 :   0.950258
Iteration  12 :   0.94398
Iteration  13 :   0.93509
Iteration  14 :   0.92691
Iteration  15 :   0.916664
Iteration  16 :   0.905267
Iteration  17 :   0.889641
Iteration  18 :   0.87271
Iteration  19 :   0.849821
Iteration  20 :   0.822262
Iteration  21 :   0.788423
Iteration  22 :   0.743976
Iteration  23 :   0.689716
Iteration  24 :   0.613655
Iteration  25 :   0.5239
Iteration  26 :   0.401033
Iteration  27 :   0.242984
Iteration  28 :   0.0390434
Iteration  29 :   0.0
Iteration  30 :   0.0
Iteration  31 :   0.0
Iteration  32 :   0.0
Iteration  33 :   0.0
Iteration  34 :   0.0
Iteration  35 :   0.0
Iteration  36 :   0.0
Iteration  37 :   0.0
Iteration  38 :   0.0
Iteration  39 :

Iteration  356 :   0.0
Iteration  357 :   0.0
Iteration  358 :   0.0
Iteration  359 :   0.0
Iteration  360 :   0.0
Iteration  361 :   0.0
Iteration  362 :   0.0
Iteration  363 :   0.0
Iteration  364 :   0.0
Iteration  365 :   0.0
Iteration  366 :   0.0
Iteration  367 :   0.0
Iteration  368 :   0.0
Iteration  369 :   0.0
Iteration  370 :   0.0
Iteration  371 :   0.0
Iteration  372 :   0.0
Iteration  373 :   0.0
Iteration  374 :   0.0
Iteration  375 :   0.0
Iteration  376 :   0.0
Iteration  377 :   0.0
Iteration  378 :   0.0
Iteration  379 :   0.0
Iteration  380 :   0.0
Iteration  381 :   0.0
Iteration  382 :   0.0
Iteration  383 :   0.0
Iteration  384 :   0.0
Iteration  385 :   0.0
Iteration  386 :   0.0
Iteration  387 :   0.0
Iteration  388 :   0.0
Iteration  389 :   0.0
Iteration  390 :   0.0
Iteration  391 :   0.0
Iteration  392 :   0.0
Iteration  393 :   0.0
Iteration  394 :   0.0
Iteration  395 :   0.0
Iteration  396 :   0.0
Iteration  397 :   0.0
Iteration  398 :   0.0
Iteration  

Iteration  713 :   0.0
Iteration  714 :   0.0
Iteration  715 :   0.0
Iteration  716 :   0.0
Iteration  717 :   0.0
Iteration  718 :   0.0
Iteration  719 :   0.0
Iteration  720 :   0.0
Iteration  721 :   0.0
Iteration  722 :   0.0
Iteration  723 :   0.0
Iteration  724 :   0.0
Iteration  725 :   0.0
Iteration  726 :   0.0
Iteration  727 :   0.0
Iteration  728 :   0.0
Iteration  729 :   0.0
Iteration  730 :   0.0
Iteration  731 :   0.0
Iteration  732 :   0.0
Iteration  733 :   0.0
Iteration  734 :   0.0
Iteration  735 :   0.0
Iteration  736 :   0.0
Iteration  737 :   0.0
Iteration  738 :   0.0
Iteration  739 :   0.0
Iteration  740 :   0.0
Iteration  741 :   0.0
******************* End of an epoch ******************************
