In [1]:
import numpy as np
np.random.seed(1337) 
import os 
from keras import backend as K
from keras.models import Model, Sequential
from keras.layers import Concatenate, Embedding, Input, Dropout, Dense, Activation, Bidirectional, GlobalMaxPooling1D
from keras.layers import Convolution1D, LSTM, GRU, CuDNNGRU, CuDNNLSTM, Permute, Lambda, Flatten
from keras import optimizers
from keras.engine.topology import Layer
import tensorflow as tf
from keras import regularizers
from keras.models import load_model


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# Data and Scorer path settings 

data_npy_path = './data/data_all.npy'
scorer_perl_script = "./corpus/SemEval2010_task8_scorer-v1.2/semeval2010_task8_scorer-v1.2.pl"
train_answer_key = "./files/train_answer_keys.txt"
val_answer_key = "./files/val_answer_keys.txt"
test_answer_key = "./files/test_answer_keys.txt"

proposed_ans = "./model/proposed_ans.txt"
scorer_output = "./model/scorer_output.txt"

perl_exe_path = "C:/Dwimperl/perl/bin/perl.exe" # for windows 


In [3]:
# Data Loading

train_set, val_set, test_set, embedding, label_to_int, int_to_label = np.load(data_npy_path)

train_x, train_y = train_set
val_x, val_y = val_set
test_x, test_y = test_set

train_x_copy = train_x
train_y_copy = train_y

max_sentence_len = train_x.shape[1]
n_out = len(label_to_int)

print("train_x.shape", train_x.shape)
print("train_y.shape", train_y.shape)
print("val_x.shape", val_x.shape)
print("val_y.shape", val_y.shape)
print("test_x.shape", test_x.shape)
print("test_y.shape", test_y.shape)
print("embedding.shape", embedding.shape)
print("len(label_to_int)", len(label_to_int))
print("len(int_to_label)", len(int_to_label))
print("max_sentence_len", max_sentence_len)
print("n_out", n_out)
print()


train_x.shape (7208, 101)
train_y.shape (7208,)
val_x.shape (792, 101)
val_y.shape (792,)
test_x.shape (2717, 101)
test_y.shape (2717,)
embedding.shape (25656, 300)
len(label_to_int) 19
len(int_to_label) 19
max_sentence_len 101
n_out 19



In [4]:
def get_mask_entities(x):
    ''' 1 for entity words, 0 otherwise '''
    
    ret = np.zeros_like(x)
    #print("get_mask shape", ret.shape)
    # e1s 2409
    # e1e 2408
    # e2s 2411
    # e2e 2410
    for i in range(x.shape[0]): 
        e1 = [0, 0]
        e2 = [0, 0]
        for j in range(x.shape[1]):
            if x[i][j] == 2409:
                e1[0] = j
            elif x[i][j] == 2408: 
                e1[1] = j
            elif x[i][j] == 2411: 
                e2[0] = j
            elif x[i][j] == 2410: 
                e2[1] = j
                break
        for j in range(e1[0]+1, e1[1]): 
            ret[i][j] = 1
        for j in range(e2[0]+1, e2[1]): 
            ret[i][j] = 1
        #print(ret[i][e1[0]:e1[1]+1], x[i][e1[0]:e1[1]+1])
    
    return ret



In [5]:
train_x_mask = get_mask_entities(train_x)
train_x_mask_copy = train_x_mask
val_x_mask = get_mask_entities(val_x)
test_x_mask = get_mask_entities(test_x)

print("train_x_mask.shape", train_x_mask.shape)
print("train_x_mask_copy.shape", train_x_mask_copy.shape)
print("val_x_mask.shape", val_x_mask.shape)
print("test_x_mask.shape", test_x_mask.shape)


train_x_mask.shape (7208, 101)
train_x_mask_copy.shape (7208, 101)
val_x_mask.shape (792, 101)
test_x_mask.shape (2717, 101)


In [6]:
# Parameters 

units = 64
batch_size = 128 #10
dropout_emb = 0.64
dropout_model = 0.32
dropout_pen = 0.32
l2_val = 0.00001
learning_rate = 1.0
activation_fn = 'tanh'
nb_epoch = 256
adadelta = optimizers.Adadelta(lr=learning_rate, decay=0.0)
save_model = True
es_epoch_stop = 20

print("units", units)
print("batch_size", batch_size)
print("dropout_emb", dropout_emb)
print("dropout_model", dropout_model)
print("dropout_pen", dropout_pen)
print("l2_val", l2_val)
print("learning_rate", learning_rate)
print("activation_fn", activation_fn)
print("nb_epoch", nb_epoch)
print("adadelta", adadelta)
print("save_model", save_model)
print("es_epoch_stop", es_epoch_stop)
print()

embeddings_reg = regularizers.l2(l2_val)
kernel_reg = regularizers.l2(l2_val)
recurrent_reg = regularizers.l2(l2_val)


units 64
batch_size 128
dropout_emb 0.64
dropout_model 0.32
dropout_pen 0.32
l2_val 1e-05
learning_rate 1.0
activation_fn tanh
nb_epoch 256
adadelta <keras.optimizers.Adadelta object at 0x000001C3D499D470>
save_model True
es_epoch_stop 20



In [7]:


# class AttnLayer(Layer):
#     # Attention-Based Bidirectional Long Short-Term Memory Networks for Relation Classification
#     # input: ?, 102, 200
    
#     def __init__(self, **kwargs):
#         super(AttnLayer, self).__init__(**kwargs)
    
#     def build(self, input_shape):
#         # Create a trainable weight variable for this layer.
#         self.W = self.add_weight(name='W', shape=(input_shape[-1], 1), initializer='glorot_uniform', trainable=True, regularizer = kernel_reg)        
#         super(AttnLayer, self).build(input_shape)  # Be sure to call this somewhere!

#     def call(self, x):
#         M = K.tanh(x) # ? 101 600
#         alpha_bar = tf.matmul(M, self.W)  # ? 101 1
#         alpha = K.permute_dimensions(alpha_bar, pattern=[0, 2, 1]) # ? 1 101

#         alpha = K.softmax(alpha) # ? 1 101
#         r_bar = tf.matmul(alpha, M) # ? 1 600
#         r = K.squeeze(r_bar, axis=-2) # ? 600
#         r = K.tanh(r)
#         return r
    
#     def compute_output_shape(self, input_shape):
#         return (input_shape[0], input_shape[-1])
    

    

# class MaskSumPoolingLayer(Layer):
#     # input_shape [(None, 101, 600), (None, 101)]
#     # input:  (?, 101, 600) (?, 101)
#     # output: (?, 600)
    
#     def __init__(self, **kwargs):
#         super(MaskSumPoolingLayer, self).__init__(**kwargs)
    
#     def build(self, input_shape):
#         super(MaskSumPoolingLayer, self).build(input_shape)  # Be sure to call this somewhere!

#     def call(self, x):
#         x_1_float32 = K.cast(x[1], dtype='float32')
#         #print("x_1_float32", x_1_float32)
#         x_0 = K.permute_dimensions(x[0], pattern=[2, 0, 1])
#         #print("x_0", x_0)
#         x_0 = tf.multiply(x_0, x_1_float32)
#         #print("x_0", x_0)
#         x_0 = K.permute_dimensions(x_0, pattern=[1, 2, 0])
#         #print("x_0", x_0)
#         x_0 = K.sum(x_0, axis=-2)
#         #print("x_0", x_0)
#         return x_0
    
#     def compute_output_shape(self, input_shape):
#         # input_shape [(None, 101, 600), (None, 101)]
#         return (input_shape[0][0], input_shape[0][-1])    



In [8]:

class MaskMaxPoolingLayer(Layer):
    # input_shape [(None, 101, 600), (None, 101)]
    # input:  (?, 101, 600) (?, 101)
    # output: (?, 600)
    
    def __init__(self, **kwargs):
        super(MaskMaxPoolingLayer, self).__init__(**kwargs)
    
    def build(self, input_shape):
        super(MaskMaxPoolingLayer, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, x):
        x_1_float32 = K.cast(x[1], dtype='float32')
        #print("x_1_float32", x_1_float32)
        x_0 = K.permute_dimensions(x[0], pattern=[2, 0, 1])
        #print("x_0", x_0)
        x_0 = tf.multiply(x_0, x_1_float32)
        #print("x_0", x_0)
        x_0 = K.permute_dimensions(x_0, pattern=[1, 2, 0])
        #print("x_0", x_0)
        x_0 = K.max(x_0, axis=-2)
        #print("x_0", x_0)
        return x_0
    
    def compute_output_shape(self, input_shape):
        # input_shape [(None, 101, 600), (None, 101)]
        return (input_shape[0][0], input_shape[0][-1])
    

In [9]:
# Model

words_input = Input(shape=(max_sentence_len,), dtype='int32', name='words_input')
words_input_mask = Input(shape=(max_sentence_len,), dtype='int32', name='words_input_mask')

words = Embedding(embedding.shape[0], embedding.shape[1], weights=[embedding], trainable=True, embeddings_regularizer=embeddings_reg, name="words_Embedding")(words_input)
words = Dropout(dropout_emb)(words)

output = Convolution1D(filters=256, kernel_size=3, activation=activation_fn, padding='same', strides=1, kernel_regularizer=kernel_reg)(words)
output = Dropout(dropout_model)(output)

output = Bidirectional( CuDNNGRU(units, return_sequences=True, recurrent_regularizer=recurrent_reg), merge_mode='concat') (output)
output_h = Activation('tanh') (output)

output1 = GlobalMaxPooling1D()(output_h) 
output2 = MaskMaxPoolingLayer()([output_h, words_input_mask]) 

output = Dense(1, kernel_regularizer=kernel_reg)(output_h)
output = Permute((2, 1))(output)
output = Activation('softmax', name="attn_softmax")(output)
output = Lambda(lambda x: tf.matmul(x[0], x[1])) ([output, output_h])
output3 = Flatten() (output)

output = Concatenate()([output1, output2, output3])
output = Dropout(dropout_pen)(output)

output = Dense(300, kernel_regularizer=kernel_reg, activation='relu')(output)
output = Dense(n_out, kernel_regularizer=kernel_reg)(output)
output = Activation('softmax')(output)

model = Model(inputs=[words_input, words_input_mask], outputs=[output])
model.compile(loss='sparse_categorical_crossentropy', optimizer=adadelta, metrics=['accuracy'])
model.summary(line_length=120)



________________________________________________________________________________________________________________________
Layer (type)                           Output Shape               Param #       Connected to                            
words_input (InputLayer)               (None, 101)                0                                                     
________________________________________________________________________________________________________________________
words_Embedding (Embedding)            (None, 101, 300)           7696800       words_input[0][0]                       
________________________________________________________________________________________________________________________
dropout_1 (Dropout)                    (None, 101, 300)           0             words_Embedding[0][0]                   
________________________________________________________________________________________________________________________
conv1d_1 (Conv1D)               

In [10]:

# Evaluation functions 

def get_precision(test_y_pred, test_y, label): 
    label_count = 0
    label_count_correct = 0
    
    for i in range(len(test_y_pred)):
        if test_y_pred[i] == label: 
            label_count += 1 
            if test_y_pred[i] == test_y[i]: 
                label_count_correct += 1
    
    if label_count_correct == 0: 
        return 0.0
    else: 
        ret = float(label_count_correct) / float(label_count)
        return ret
        

def get_macro_f1(test_y_pred, test_y, n_out):
    f1_sum = 0
    f1_count = 0
    for label in range(1, n_out):        
        prec = get_precision(test_y_pred, test_y, label)
        recall = get_precision(test_y, test_y_pred, label)
        f1 = 0 if float(prec+recall)==float(0) else float(2*prec*recall/float(prec+recall))
        f1_sum += f1
        f1_count += 1
    macro_f1 = float(f1_sum) / float(f1_count)    
    return macro_f1
    

def get_accuracy(test_y_pred, test_y):
    acc =  float(np.sum(test_y_pred == test_y)) / float(len(test_y))
    return acc
    
    
def predict_classes(prediction):
    return prediction.argmax(axis=-1)


def get_PRF1_semeval(y_pred, answer_key):
    
    f_out = open(proposed_ans, 'w')
    for i in range(len(y_pred)):
        f_out.write(str(i+1) + "\t" + int_to_label[y_pred[i]] + "\n" )
    f_out.close()
    
    if os.name == 'nt': 
        os.system(perl_exe_path + " " + scorer_perl_script + " " + proposed_ans + " " + answer_key + " > " + scorer_output)
    else: 
        os.system("perl " + " " + scorer_perl_script + " " + proposed_ans + " " + answer_key + " > " + scorer_output)
        
    f_in = open(scorer_output, 'r')
    lines = f_in.readlines()
    f_in.close()
    
    lines = [ l  for l in lines[-30:] if l.strip() != '']
        
    acc = float(lines[-17].strip().split()[-1][:-1]) / 100.0
    PRF1 = lines[-2].strip().split()
    P = float(PRF1[2][:-1]) / 100.0
    R = float(PRF1[5][:-1]) / 100.0
    macro_f1 = float(PRF1[8][:-1]) / 100.0
    
    return (acc, P, R, macro_f1)




In [11]:


# In[15]:

print("Start training... \n")

# train_x = train_x[:200]
# train_y = train_y[:200]
# train_x_copy = train_x_copy[:200]
# train_y_copy = train_y_copy[:200]
# val_x = val_x[:200]
# val_y = val_y[:200]
# test_x = test_x[:200]
# test_y = test_y[:200]


train_max_acc = 0
train_max_f1 = 0
val_max_acc = 0
val_max_f1 = 0
test_max_acc = 0
test_max_f1 = 0

test_f1_final = 0
test_f1_final_max = 0

es_epoch = 0 

for epoch in range(nb_epoch):
    print("Epoch: ", epoch+1, "/", nb_epoch)
    es_epoch += 1
    if es_epoch > es_epoch_stop:
        print("Early Stopping...") 
        break
    
    index = np.arange(len(train_x))
    np.random.shuffle(index)
    train_x = train_x[index]
    train_x_mask = train_x_mask[index]
    train_y = train_y[index]
    
    # Input lists for fit and predict
    #
    train_input_list = [train_x, train_x_mask]
    train_input_list_copy = [train_x_copy, train_x_mask_copy]
    val_input_list = [val_x, val_x_mask]
    test_input_list = [test_x, test_x_mask]
    #train_input_list = [train_x]
    #train_input_list_copy = [train_x_copy]
    #val_input_list = [val_x]
    #test_input_list = [test_x]

    
    model.fit(train_input_list, train_y, batch_size=batch_size, verbose=1, epochs=1) 
    
    train_y_pred = predict_classes(model.predict(train_input_list_copy, verbose=1))
    val_y_pred = predict_classes(model.predict(val_input_list, verbose=1))
    test_y_pred = predict_classes(model.predict(test_input_list, verbose=1))
    
    train_PRF1 = get_PRF1_semeval(train_y_pred, train_answer_key)
    val_PRF1 = get_PRF1_semeval(val_y_pred, val_answer_key)
    test_PRF1 = get_PRF1_semeval(test_y_pred, test_answer_key)

    train_max_f1 = max(train_max_f1, train_PRF1[3])
    val_max_f1 = max(val_max_f1, val_PRF1[3])
    test_max_f1 = max(test_max_f1, test_PRF1[3])

    train_max_acc = max(train_max_acc, train_PRF1[0])
    val_max_acc = max(val_max_acc, val_PRF1[0])
    test_max_acc = max(test_max_acc, test_PRF1[0])
    
    if val_max_f1 == val_PRF1[3]: 
        test_f1_final = test_PRF1[3]
        test_f1_final_max = max(test_f1_final, test_f1_final_max)
        if save_model : 
            model_file_name = "model.keras"
            model.save('./model/' + model_file_name)
            print("Model saved", model_file_name)
        es_epoch = 0
    
    print("Train Accuracy: %.4f (max: %.4f)" % (train_PRF1[0], train_max_acc))
    print("Val   Accuracy: %.4f (max: %.4f)" % (val_PRF1[0], val_max_acc))
    print("Test  Accuracy: %.4f (max: %.4f)" % (test_PRF1[0], test_max_acc))
    
    print("Train Macro F1 Semeval Official: %.4f (max: %.4f)" % (train_PRF1[3], train_max_f1))
    print("Val   Macro F1 Semeval Official: %.4f (max: %.4f)" % (val_PRF1[3], val_max_f1))
    print("Test  Macro F1 Semeval Official: %.4f (max: %.4f)" % (test_PRF1[3], test_max_f1))
    print("Test P %.4f | R %.4f | macro_F1: %.4f" % test_PRF1[1:] )
    print("Test test_max_f1_final: %.4f (max: %.4f)" % (test_f1_final, test_f1_final_max) )
        
    print()



Start training... 

Epoch:  1 / 256
Epoch 1/1
Model saved model.keras
Train Accuracy: 0.3430 (max: 0.3430)
Val   Accuracy: 0.3510 (max: 0.3510)
Test  Accuracy: 0.3419 (max: 0.3419)
Train Macro F1 Semeval Official: 0.2704 (max: 0.2704)
Val   Macro F1 Semeval Official: 0.2704 (max: 0.2704)
Test  Macro F1 Semeval Official: 0.2630 (max: 0.2630)
Test P 0.3215 | R 0.3258 | macro_F1: 0.2630
Test test_max_f1_final: 0.2630 (max: 0.2630)

Epoch:  2 / 256
Epoch 1/1
Model saved model.keras
Train Accuracy: 0.4452 (max: 0.4452)
Val   Accuracy: 0.4280 (max: 0.4280)
Test  Accuracy: 0.4428 (max: 0.4428)
Train Macro F1 Semeval Official: 0.4818 (max: 0.4818)
Val   Macro F1 Semeval Official: 0.4618 (max: 0.4618)
Test  Macro F1 Semeval Official: 0.4767 (max: 0.4767)
Test P 0.4724 | R 0.5319 | macro_F1: 0.4767
Test test_max_f1_final: 0.4767 (max: 0.4767)

Epoch:  3 / 256
Epoch 1/1
Model saved model.keras
Train Accuracy: 0.5165 (max: 0.5165)
Val   Accuracy: 0.4949 (max: 0.4949)
Test  Accuracy: 0.5083 (max: 0

In [12]:


# # In[ ]:

# from keras_visualize_activations_master import read_activations

# def create_heatmap_file(model, layer_name, out_file_path="./model/test_heatmap.txt", batch_size=500): 
    
#     print("Loading data...")
    
#     data_npy_path = './data/data_all.npy'
#     train_file = "./files/train_attn_sp.txt"
#     val_file = "./files/val_attn_sp.txt"
#     test_file = "./files/test_attn_sp.txt"

#     train_set, val_set, test_set, embedding, label_to_int, int_to_label = np.load(data_npy_path)
#     train_x, train_y = train_set
#     val_x, val_y = val_set
#     test_x, test_y = test_set
#     train_x_copy = train_x
#     train_y_copy = train_y
#     max_sentence_len = train_x.shape[1]
#     n_out = len(label_to_int)
    
#     l2_val = 0.00001
#     embeddings_reg = regularizers.l2(l2_val)
#     kernel_reg = regularizers.l2(l2_val)
#     recurrent_reg = regularizers.l2(l2_val)

#     # # Getting activations 
    
#     print("Getting activations...")

#     def get_activation(data, model, batch_size=100):
#         ret = None
#         for i in range(0, data.shape[0], batch_size): 
#             start = i 
#             endd = min(i+batch_size, data.shape[0]) 
#             act = read_activations.get_activations(model, data[start:endd], layer_name=layer_name, print_shape_only=True)
#             if i == 0: 
#                 ret = act[0] 
#             else: 
#                 ret = np.concatenate((ret, act[0]))
#         return ret 

#     test_act = get_activation(test_x, model, batch_size)
#     print("Activatons shape test_act.shape", test_act.shape)

#     def softmax(x):
#         e_x = np.exp(x - np.max(x))
#         return e_x / e_x.sum(axis=0)    
    
#     print("Creating heatmap file...")
    
#     def write_heatmap_file(act, in_file_name, out_file_path):
#         f = open(in_file_name, 'r')
#         lines = f.readlines()
#         f.close()

#         num_labels = [ l.strip().split()[:2] for l in lines ]
#         lines = [ l.strip().split()[2:] for l in lines ]

#         print("act", act.shape)
#         act = np.sum(act, axis=-2)
#         print("act", act.shape)

#         for j in range(act.shape[0]):
#             act[j] = softmax(act[j])
            
#         print("np.sum(act[0])", np.sum(act[0]))

#         f = open(out_file_path, "w")

#         for i in range(len(lines)): 
#             f.write(str(num_labels[i][1]) + "\n")
#             for j in range(len(lines[i])):
#                 tmp = ( "%.6f " % (act[i][j]*100.0)  )
#                 f.write(lines[i][j] + "\t" + tmp + "\n")
#             f.write("\n")

#         f.close()
        
#         print("Created", out_file_path)
    
#     write_heatmap_file(test_act, test_file, out_file_path)
#     return test_act
    

# model_keras_path = "./model/model.keras"
# model = load_model(model_keras_path, custom_objects={"tf": tf, "MaskMaxPoolingLayer": MaskMaxPoolingLayer})
# model.summary()

# layer_name = "attn_softmax"
# act = create_heatmap_file(model, layer_name, out_file_path="./model/test_heatmap.txt")


