In [15]:
from keras.layers import Input, Embedding, Convolution1D, MaxPooling1D, Flatten, Dense, Dropout, merge, concatenate
#from keras.layers import concatenate
from keras.models import Model
from keras.utils import np_utils
from keras import backend as K

import tensorflow as tf
import numpy as np
from utilities import my_callbacks
from utilities import data_helper
import optparse
import sys


np.set_printoptions(threshold=np.nan)

In [16]:
vocab = data_helper.load_all(filelist="final_data/wsj.all")
print(vocab)

Using features: None
{'-': 2449404, 'X': 162058, 'S': 52415, 'O': 30440}
Total vocabulary size in the whole dataset: 4
['-', 'O', 'S', 'X', '0']


In [17]:
print("loading entity-gird for pos and neg documents...")

X_train_1, X_train_0, E = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.train", 
        perm_num = 20, maxlen=2000, window_size=6, vocab_list=vocab, emb_size=100)

X_dev_1, X_dev_0, E    = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.dev", 
        perm_num = 20, maxlen=2000, window_size=6, E = E, vocab_list=vocab, emb_size=100)

X_test_1, X_test_0, E    = data_helper.load_and_numberize_Egrid_with_Feats("final_data/wsj.test", 
        perm_num = 20, maxlen=2000, window_size=6, E = E, vocab_list=vocab, emb_size=100)

loading entity-gird for pos and neg documents...


In [18]:
num_train = len(X_train_1)
num_dev   = len(X_dev_1)
num_test  = len(X_test_1)
#assign Y value
y_train_1 = [1] * num_train 
y_dev_1 = [1] * num_dev 
y_test_1 = [1] * num_test 

print('.....................................')
print("Num of traing pairs: " + str(num_train))
print("Num of dev pairs: " + str(num_dev))
print("Num of test pairs: " + str(num_test))
#print("Num of permutation in train: " + str(opts.p_num)) 
#print("The maximum in length for CNN: " + str(opts.maxlen))
print('.....................................')

.....................................
Num of traing pairs: 23744
Num of dev pairs: 2678
Num of test pairs: 20411
.....................................


In [19]:
# One hot encoding of the outputs
y_train_1 = np_utils.to_categorical(y_train_1, 2)
y_dev_1 = np_utils.to_categorical(y_dev_1, 2)
y_test_1 = np_utils.to_categorical(y_test_1, 2)

#randomly shuffle the training data
np.random.seed(113)
np.random.shuffle(X_train_1)
np.random.seed(113)
np.random.shuffle(X_train_0)

In [20]:
"""
np.random.seed(113)

E = 0.01 * np.random.uniform( -1.0, 1.0, (len(vocab), 100))
E[len(vocab)-1] = 0
#E
"""

'\nnp.random.seed(113)\n\nE = 0.01 * np.random.uniform( -1.0, 1.0, (len(vocab), 100))\nE[len(vocab)-1] = 0\n#E\n'

In [21]:
def ranking_loss(y_true, y_pred):
    pos = y_pred[:, 0]
    neg = y_pred[:, 1]
    #loss = -K.sigmoid(pos-neg) # use 
    loss = K.maximum(1.0 + neg - pos, 0.0) #if you want to use margin ranking loss
    return K.mean(loss) + 0 * y_true

In [22]:
# first, define a CNN model for sequence of entities 
sent_input = Input(shape=(2000,), dtype='int32', name='sent_input')

# embedding layer encodes the input into sequences of 300-dimenstional vectors. 

E = np.float32(E) #E was float64 which doesn't work for tensorflow conv1d function
x = Embedding(input_dim=len(vocab), output_dim=100, weights= [E], input_length=2000)(sent_input)


# add a convolutiaon 1D layer
#x = Dropout(dropout_ratio)(x)
filter_init =  tf.keras.initializers.glorot_uniform(seed=2018) 
x = Convolution1D(filters=150, kernel_size=6, padding='valid', activation='relu', kernel_initializer=filter_init)(x)


# add max pooling layers
#x = AveragePooling1D(pool_length=pool_length)(x)
x = MaxPooling1D(pool_size=6)(x)


#x = Dropout(opts.dropout_ratio)(x)
x = Flatten()(x)


#x = Dense(hidden_size, activation='relu')(x)
x = Dropout(0.5, seed=2018)(x)


# add latent coherence score
v_init = tf.keras.initializers.glorot_uniform(seed=2018) 
out_x = Dense(1, activation=None, kernel_initializer=v_init)(x)


shared_cnn = Model(sent_input, out_x)

In [23]:
print(shared_cnn.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sent_input (InputLayer)      (None, 2000)              0         
_________________________________________________________________
embedding_2 (Embedding)      (None, 2000, 100)         500       
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 1995, 150)         90150     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 332, 150)          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 49800)             0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 49800)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 49801     
Total para

In [24]:
# Inputs of pos and neg document
pos_input = Input(shape=(2000,), dtype='int32', name="pos_input")
neg_input = Input(shape=(2000,), dtype='int32', name="neg_input")


# these two models will share eveything from shared_cnn
pos_branch = shared_cnn(pos_input)
neg_branch = shared_cnn(neg_input)



concatenated = merge([pos_branch, neg_branch], mode='concat', name="coherence_out")
#concatenated = concatenate([pos_branch, neg_branch], name="coherence_out")
# output is two latent coherence score


final_model = Model([pos_input, neg_input], concatenated)


#final_model.compile(loss='ranking_loss', optimizer='adam')
final_model.compile(loss={'coherence_out': ranking_loss}, optimizer="rmsprop")

# setting callback
histories = my_callbacks.Histories()

#print(shared_cnn.summary())

  if sys.path[0] == '':
  name=name)


In [25]:
num_epochs = 10

for ep in range(num_epochs):
    
    #Train Phase:
    
    final_model.fit([X_train_1, X_train_0], y_train_1, validation_data=None, shuffle=False, epochs=1,
                                  verbose=0, batch_size=32, callbacks=[histories])
    
    #Test Phase:
    
    y_pred = final_model.predict([X_test_1, X_test_0])        
    
    ties = 0
    wins = 0
    n = len(y_pred)
    for i in range(0,n):
        if y_pred[i][0] > y_pred[i][1]:
            wins = wins + 1
        elif y_pred[i][0] == y_pred[i][1]:
            ties = ties + 1
    #print("Perform on test set after Epoch: " + str(ep) + "...!")    
    #print(" -Wins: " + str(wins) + " Ties: "  + str(ties))
    loss = n - (wins+ties)

    recall = wins/n;
    prec = wins/(wins + loss)
    f1 = 2*prec*recall/(prec+recall)
    
    
    print("***********Epoch: ",ep,"  ******************")
    
    print("Wins: ", wins)
    print("Ties: ", ties)
    print("losses: ", loss)

    print(" -Test acc: " + str(wins/n))
    print(" -Test f1 : " + str(f1))
    





***********Epoch:  0   ******************
Wins:  16582
Ties:  0
losses:  3829
 -Test acc: 0.8124050756944785
 -Test f1 : 0.8124050756944785
***********Epoch:  1   ******************
Wins:  16487
Ties:  0
losses:  3924
 -Test acc: 0.8077507226495517
 -Test f1 : 0.8077507226495518
***********Epoch:  2   ******************
Wins:  16433
Ties:  0
losses:  3978
 -Test acc: 0.8051050903924355
 -Test f1 : 0.8051050903924355
***********Epoch:  3   ******************
Wins:  16350
Ties:  0
losses:  4061
 -Test acc: 0.8010386556268678
 -Test f1 : 0.8010386556268678
***********Epoch:  4   ******************
Wins:  16366
Ties:  0
losses:  4045
 -Test acc: 0.8018225466660134
 -Test f1 : 0.8018225466660133
***********Epoch:  5   ******************
Wins:  16397
Ties:  0
losses:  4014
 -Test acc: 0.8033413355543579
 -Test f1 : 0.8033413355543579
***********Epoch:  6   ******************
Wins:  16286
Ties:  0
losses:  4125
 -Test acc: 0.7979030914702856
 -Test f1 : 0.7979030914702856
***********Epoch:  7

In [26]:
#for (i, loss) in enumerate(histories.losses):
    #print("Iteration ",i, ":  ",loss)