In [82]:
import tensorflow as tf
import numpy as np
from utilities import my_callbacks
from utilities import data_helper
import optparse
import sys
import math

In [83]:
vocab = data_helper.load_all(filelist="final_data/wsj.all")
print(vocab)

Using features: None
{'-': 2449404, 'X': 162058, 'S': 52415, 'O': 30440}
Total vocabulary size in the whole dataset: 4
['-', 'O', 'S', 'X', '0']


In [84]:
print("loading entity-gird for pos and neg documents...")

X_train_1, X_train_0, E = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.train", 
        perm_num = 20, maxlen=2000, window_size=6, vocab_list=vocab, emb_size=100)

X_dev_1, X_dev_0, E    = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.dev", 
        perm_num = 20, maxlen=2000, window_size=6, vocab_list=vocab, emb_size=100)

X_test_1, X_test_0, E    = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.dev", 
        perm_num = 20, maxlen=2000, window_size=6, vocab_list=vocab, emb_size=100)


loading entity-gird for pos and neg documents...


In [85]:
num_train = len(X_train_1)
num_dev   = len(X_dev_1)
num_test  = len(X_test_1)
#assign Y value
y_train_1 = [1] * num_train 
y_dev_1 = [1] * num_dev 
y_test_1 = [1] * num_test 

print('.....................................')
print("Num of traing pairs: " + str(num_train))
print("Num of dev pairs: " + str(num_dev))
print("Num of test pairs: " + str(num_test))
#print("Num of permutation in train: " + str(opts.p_num)) 
#print("The maximum in length for CNN: " + str(opts.maxlen))
print('.....................................')

.....................................
Num of traing pairs: 23744
Num of dev pairs: 2678
Num of test pairs: 2678
.....................................


In [86]:
X_train_1.shape

(23744, 2000)

In [87]:
def forward_propagation(X_positive, X_negative, vocab, E):
    """
    Implements forward propagation of Neural coherence model
    
    Args:
    X_positive - A Placeholder for positive document
    X_negative - A Placeholder for negative document
    vocab - Vocabulary list of entire entity grid list
    E - initialized values for embedding matrix
    
    Returns: 
    out_positive: Coherence Score for positive document
    out_negative: Coherence Score for negative document
    
    """
    
    ## Placeholders
    #X_positive = tf.placeholder(tf.int32, shape = [None, 2000]) #opts.maxlen=2000
    #X_negative = tf.placeholder(tf.int32, shape = [None, 2000]) #opts.maxlen=2000
    

    ## First Layer of NN: Transform each grammatical role in the grid into distributed representation - a real valued vector
    
    
    #Shared embedding matrix
    #W_embedding = tf.get_variable("W_embedding", [len(vocab), 100], initializer = tf.contrib.layers.xavier_initializer()) #embedding matrix 
    E = np.float32(E) # DataType of E is float64, which is not in list of allowed values in conv1D. Allowed DataType: float16, float32
    W_embedding = tf.get_variable("W_embedding", initializer = E) #embedding matrix 
   
    
    #Look up layer
    
    #for positive document
    embedding_positive = tf.nn.embedding_lookup(W_embedding, X_positive)
    
    #for negative document
    embedding_negative = tf.nn.embedding_lookup(W_embedding, X_negative)


    ## Second Layer of NN: Convolution Layer
    
    
    #shared filter and bias
    w_size = 6       #filter_size
    emb_size = 100   #embedding_size 
    nb_filter = 150  #num_filters 

    filter_shape = [w_size, emb_size, nb_filter]

    #W_conv_layer_1 = tf.get_variable("W_conv_layer_1", shape = filter_shape, initializer = tf.contrib.layers.xavier_initializer(seed = 0)) #filter for covolution layer 1
    W_conv_layer_1 = tf.get_variable("W_conv_layer_1", shape = filter_shape, initializer = tf.contrib.layers.xavier_initializer(seed = 0)) #filter for covolution layer 1
    b_conv_layer_1 =  tf.get_variable("b_conv_layer_1", shape=[nb_filter], initializer = tf.constant_initializer(0.0))  #bias for convolution layer 1

    
       
    #1D Convolution for positive document
    conv_layer_1_positive = tf.nn.conv1d(embedding_positive, W_conv_layer_1, stride=1, padding="VALID")  #embedding and W_conv_layer_1 both are 3D matrix
    conv_layer_1_with_bias_positive = tf.nn.bias_add(conv_layer_1_positive, b_conv_layer_1)    
    h_conv_layer_1_positive = tf.nn.relu(conv_layer_1_with_bias_positive, name="relu_conv_layer_1_positive") # Apply nonlinearity
    
    
    #1D Convolution for negative document
    conv_layer_1_negative = tf.nn.conv1d(embedding_negative, W_conv_layer_1, stride=1, padding="VALID")  #embedding and W_conv_layer_1 both are 3D matrix
    conv_layer_1_with_bias_negative = tf.nn.bias_add(conv_layer_1_negative, b_conv_layer_1)    
    h_conv_layer_1_negative = tf.nn.relu(conv_layer_1_with_bias_negative, name="relu_conv_layer_1_negative") # Apply nonlinearity

    

    ## Third Layer of NN: Pooling Layer
    
    
    #1D Pooling for positive document
    m_layer_1_positive = tf.nn.pool(h_conv_layer_1_positive, window_shape = [6], strides = [6], pooling_type = 'MAX', padding="VALID")

    #1D Pooling for negative document
    m_layer_1_negative = tf.nn.pool(h_conv_layer_1_negative, window_shape = [6], strides = [6], pooling_type = 'MAX', padding="VALID")



    ## Fourth Layer of NN: Fully Connected Layer
    
    #Dropout Early [As Dat Used]
    
    #for positive document
    #drop_out_early_positive = tf.nn.dropout(m_layer_1_positive, keep_prob=0.5)
    
    #for negative document
    #drop_out_early_negative = tf.nn.dropout(m_layer_1_negative, keep_prob=0.5)
    
    
    #Flatten
    
    #for positive document
    flatten_positive = tf.contrib.layers.flatten(m_layer_1_positive)
    #flatten_positive = tf.contrib.layers.flatten(drop_out_early_positive)
    
    #for negative document
    flatten_negative = tf.contrib.layers.flatten(m_layer_1_negative)
    #flatten_negative = tf.contrib.layers.flatten(drop_out_early_negative)
    

    #Dropout
    
    #for positive document
    drop_out_positive = tf.nn.dropout(flatten_positive, keep_prob=0.5)
    
    #for negative document
    drop_out_negative = tf.nn.dropout(flatten_negative, keep_prob=0.5)

    
    
    # Coherence Scoring
    
    #for positive document
    out_positive = tf.contrib.layers.fully_connected(drop_out_positive, num_outputs = 1, activation_fn=None)
    #out_positive = tf.sigmoid(out_positive)
    
    #for negative document
    out_negative = tf.contrib.layers.fully_connected(drop_out_negative, num_outputs = 1, activation_fn=None)
    #out_negative = tf.sigmoid(out_negative)
    
    return out_positive, out_negative
    

In [88]:
def ranking_loss(pos, neg):
    loss = tf.maximum(1.0 + neg - pos, 0.0) 
    #print(loss)
    return tf.reduce_mean(loss)

In [89]:
def mini_batches(X, Y, m, mini_batch_size = 32):
        
    mini_batches = []
    
    num_complete_minibatches = math.floor(m/mini_batch_size) 
    
    for k in range(0, num_complete_minibatches):
        mini_batch_X = X[k * mini_batch_size : k * mini_batch_size + mini_batch_size, :]
        mini_batch_Y = Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size, :]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = X[num_complete_minibatches * mini_batch_size : m, :]
        mini_batch_Y = Y[num_complete_minibatches * mini_batch_size : m, :]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

In [90]:
tf.reset_default_graph()

In [91]:
## Create Placeholders
X_positive = tf.placeholder(tf.int32, shape = [None, 2000]) #Placeholder for positive document
X_negative = tf.placeholder(tf.int32, shape = [None, 2000]) #Placeholder for negative document

# Forward propagation
score_positive, score_negative = forward_propagation(X_positive, X_negative, vocab, E)
    
# Cost function:
cost = ranking_loss(score_positive, score_negative)

# Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer that minimizes the cost.
#optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(cost)

In [92]:
init = tf.global_variables_initializer()

num_epochs = 10
minibatch_size = 32
m = num_train

with tf.Session() as sess:

    sess.run(init)

    for epoch in range(num_epochs):

        minibatch_cost = 0.
        num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
        minibatches = mini_batches(X_train_1, X_train_0, m, minibatch_size)

        for (i, minibatch) in enumerate(minibatches):

            (minibatch_X_positive, minibatch_X_negative) = minibatch
            
            _ , temp_cost, pos, neg = sess.run([optimizer, cost, score_positive, score_negative], 
                        feed_dict={X_positive:minibatch_X_positive, 
                                   X_negative:minibatch_X_negative})
            """
            print("Epoch:", epoch, "Minibatch:", i) 
            print("Positive score:")
            print(pos) 
            print("Negative score:")
            print(neg)
            print("ranking loss:", temp_cost)
            
            print("*************** End of a minibatch **********************************")
            """
            print(temp_cost)
            minibatch_cost += temp_cost / num_minibatches
        
        print(minibatch_cost)
        print("******************* End of an epoch ******************************")

1.01745
1.00291
0.99492
0.989602
0.979202
0.967393
0.955515
0.94186
0.928024
0.914875
0.90779
0.88832
0.87543
0.849164
0.823289
0.799827
0.764193
0.723716
0.683784
0.620073
0.538295
0.435788
0.322477
0.170587
0.00896939
0.0
0.00195491
0.0
0.000111954
0.0
0.0
0.0
0.000643511
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.000340635
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0

KeyboardInterrupt: 