In [47]:
import tensorflow as tf
import numpy as np
import tensorflow.keras as keras
from tensorflow.keras import layers
import pickle
from time import gmtime, strftime

In [48]:
MAX_LEN = 97
POS_MIN = -100
POS_EMBED_LEN = 200
base_dir = './data/'

In [54]:
word_embed = pickle.load(open(base_dir + 'word_embed', 'rb'))
word_embed = np.transpose(word_embed, )
PAD_ID = word_embed.shape[0]-1
relation_count = 19
print(word_embed.shape, PAD_ID)

(22549, 50) 22548


In [50]:
print(word_embed.shape)

(22549, 50)


In [55]:
# load dataset from tfrecord

def pad_fixed_length(words):
    words = tf.pad(words,tf.constant([[0, MAX_LEN-words.shape[0]]]), constant_values=PAD_ID)
    return words

def processing(raw):
    features = tf.io.parse_single_example(
        raw,
        features={
            'idxs': tf.io.FixedLenFeature([2], tf.int64),
            'label': tf.io.FixedLenFeature([1], tf.int64),
            'lexical': tf.io.FixedLenFeature([8], tf.int64),
            'words': tf.io.VarLenFeature(tf.int64)
        }
    )
    idxs = tf.cast(features['idxs'], tf.int32)
    pos1 = tf.range(0, MAX_LEN, 1, dtype=tf.int32) - idxs[0]
    pos2 = tf.range(0, MAX_LEN, 1, dtype=tf.int32) - idxs[1]
    pos1 = pos1 - POS_MIN
    pos2 = pos2 - POS_MIN
    label = tf.squeeze(tf.one_hot(features['label'], depth=relation_count), axis=0)
    lexical = tf.cast(features['lexical'], tf.int32)
    words = tf.cast(tf.sparse.to_dense(features['words']), tf.int32)
    words = tf.py_function(pad_fixed_length, [words], Tout=tf.int32)
    return pos1, pos2, label, lexical, words

train_ds = tf.data.TFRecordDataset(filenames = [base_dir + 'train.tfrecords']).map(processing).shuffle(2000).batch(128)
test_ds = tf.data.TFRecordDataset(filenames = [base_dir + 'test.tfrecords']).map(processing).shuffle(2000).batch(128)

In [56]:
next(iter(train_ds))[2].shape

TensorShape([128, 19])

In [57]:
class Network(keras.Model):
    def __init__(self, word_embed, pos_dim, word_dim, filters_num, conv_size, output_dim, drop_rate):
        super(Network, self).__init__()
        self.pos_dim = pos_dim
        self.word_dim = word_dim
        self.drop_rate = drop_rate
        self.word_embed = tf.Variable(word_embed, dtype=tf.float32)
        self.pos1_embed = tf.Variable(tf.random.uniform([POS_EMBED_LEN, pos_dim],minval=0,maxval=1), dtype=tf.float32)
        self.pos2_embed = tf.Variable(tf.random.uniform([POS_EMBED_LEN, pos_dim],minval=0,maxval=1), dtype=tf.float32)
        
        self.conv1 = layers.Conv1D(filters_num, conv_size, padding='same', activation=tf.nn.relu, bias_initializer=keras.initializers.constant(0.1))
        self.pool1 = layers.MaxPool1D(MAX_LEN, padding='same')
        self.conv2 = layers.Conv1D(filters_num, 4, padding='same', activation=tf.nn.relu, bias_initializer=keras.initializers.constant(0.1))
        self.pool2 = layers.MaxPool1D(MAX_LEN, padding='same')
        self.conv3 = layers.Conv1D(filters_num, 5, padding='same', activation=tf.nn.relu, bias_initializer=keras.initializers.constant(0.1))
        self.pool3 = layers.MaxPool1D(MAX_LEN, padding='same')
        
        self.dense1 = layers.Dense(output_dim, kernel_regularizer= keras.regularizers.l2(0.01), bias_initializer=keras.initializers.constant(0.1))
        
        
#     def build(self, input_shape):
#         super(Network, self).build(input_shape)
    
    def call(self, inputs, training):
        pos1 = inputs[0]
        pos2 = inputs[1]
        lexical = inputs[2]
        words = inputs[3]
        pf1 = tf.nn.embedding_lookup(self.pos1_embed, pos1)
        pf2 = tf.nn.embedding_lookup(self.pos2_embed, pos2)
        wf = tf.nn.embedding_lookup(self.word_embed, words)
        lexical = tf.nn.embedding_lookup(self.word_embed, lexical)
        lexical = tf.reshape(lexical, [-1, 8 * self.word_dim])
        wf = tf.concat([pf1, pf2, wf], axis=2)
        if training:
            wf = tf.nn.dropout(wf, self.drop_rate)
        wf1 = self.conv1(wf)
        wf1 = self.pool1(wf1)
        wf2 = self.conv1(wf)
        wf2 = self.pool1(wf2)
        wf3 = self.conv1(wf)
        wf3 = self.pool1(wf3)
        wf = tf.concat([wf1, wf2, wf3], axis=2)
        sentence = tf.squeeze(wf, axis=1)
        sentence = tf.concat([lexical, sentence], axis=1)
        if training:
            sentence = tf.nn.dropout(sentence, self.drop_rate)
        output = self.dense1(sentence)
        
        return output
        
    

In [61]:
network = Network(word_embed, 5, 50, 100, 3, relation_count, 0.5)

In [62]:
optimizer = keras.optimizers.Adam(learning_rate=1e-3)
criterion = keras.losses.CategoricalCrossentropy(from_logits=True)

In [63]:
# l2_param = 0.1

def train_func(dataset):
    global best_acc
    loss_count = 0
    correct_count = 0
    step_count = 0
    sample_count = 0
    for step, (pos1, pos2, label, lexical, words) in enumerate(dataset):
        with tf.GradientTape() as tape:
            output = network([pos1, pos2, lexical, words], training=True)
#             print(output.shape, label.shape)
            loss = criterion(label, output)
            loss += tf.reduce_sum(network.losses)
        grads = tape.gradient(loss, network.trainable_variables)
        optimizer.apply_gradients(zip(grads, network.trainable_variables))
#         print(tf.argmax(output, axis=1).shape, tf.argmax(label, axis=1).shape)
        correct = tf.reduce_sum(tf.cast(tf.argmax(output, axis=1) == tf.argmax(label, axis=1), dtype=tf.int32))
        
        sample_count += label.shape[0]
        step_count += 1
        correct_count += correct 
        loss_count += loss
    accuracy = correct_count / sample_count
    avg_loss = loss_count / step_count
    return avg_loss.numpy(), accuracy.numpy(), network.losses

def evaluate_func(dataset):
    loss_count = 0
    correct_count = 0
    step_count = 0
    sample_count = 0
    for step, (pos1, pos2, label, lexical, words) in enumerate(dataset):
        output = network([pos1, pos2, lexical, words], training=False)
        loss = criterion(label, output)
        loss += tf.reduce_sum(network.losses)
        correct = tf.reduce_sum(tf.cast(tf.argmax(output, axis=1) == tf.argmax(label, axis=1), dtype=tf.int32))
        sample_count += label.shape[0]
        step_count += 1
        correct_count += correct 
        loss_count += loss
    accuracy = correct_count / sample_count
    avg_loss = loss_count / step_count
    return avg_loss.numpy(), accuracy.numpy()

def predict_func(dataset):
    correct_count = 0
    sample_count = 0
    y_pred = []
    y_true = []
    for step, (pos1, pos2, label, lexical, words) in enumerate(dataset):
        output = network([pos1, pos2, lexical, words], training=False)
        pred_batch = tf.argmax(output, axis=1)
        lb_batch = tf.argmax(label, axis=1)
        correct = tf.reduce_sum(tf.cast(pred_batch == lb_batch, dtype=tf.int32))
        sample_count += label.shape[0]
        correct_count += correct
        y_pred.extend(pred_batch)
        y_true.extend(lb_batch)
    accuracy = correct_count / sample_count
    
    return y_pred, y_true, accuracy


In [66]:
best_acc = 0
save_dir = './saved/'
for epoch in range(80):
    train_avg_loss, train_accuracy, reg_loss = train_func(train_ds)
    test_avg_loss, test_accuracy = evaluate_func(test_ds)
    if test_accuracy > best_acc:
        network.save_weights(save_dir + 'bestckpt')
        best_acc = test_accuracy
#     print(reg_loss)
    print('training loss is {0:.4f}, accuracy is {1:.4f} test loss is {2:.4f}, accuracy is {3:.4f}'.format(train_avg_loss,\
                                                            train_accuracy, test_avg_loss, test_accuracy))
print('best accuracy is {}'.format(best_acc))
        

training loss is 0.3559, accuracy is 0.9415 test loss is 0.9231, accuracy is 0.7722
training loss is 0.3443, accuracy is 0.9466 test loss is 0.9248, accuracy is 0.7725
training loss is 0.3438, accuracy is 0.9395 test loss is 0.9207, accuracy is 0.7667
training loss is 0.3456, accuracy is 0.9433 test loss is 0.9131, accuracy is 0.7681
training loss is 0.3341, accuracy is 0.9456 test loss is 0.9342, accuracy is 0.7674
training loss is 0.3298, accuracy is 0.9426 test loss is 0.9367, accuracy is 0.7593
training loss is 0.3218, accuracy is 0.9475 test loss is 0.9219, accuracy is 0.7637
training loss is 0.3194, accuracy is 0.9496 test loss is 0.9321, accuracy is 0.7637
training loss is 0.3015, accuracy is 0.9543 test loss is 0.9369, accuracy is 0.7667
training loss is 0.3048, accuracy is 0.9481 test loss is 0.9362, accuracy is 0.7689
best accuracy is 0.7725432462274567


In [149]:
network.load_weights(save_dir + 'bestckpt19')
y_true, y_pred, accuracy = predict_func(test_ds)
y_true = [item.numpy() for item in y_true]
y_pred = [item.numpy() for item in y_pred]
print(accuracy.numpy())

0.7725432462274567


In [150]:
import pickle
id2rel = pickle.load(open(base_dir + 'lb2rel.dict', 'rb'))
unique_relations = pickle.load(open(base_dir + 'unique_relations', 'rb'))
y_true = [id2rel[item] for item in y_true]
y_pred = [id2rel[item] for item in y_pred]

In [151]:
import re
ptn = re.compile(r'\(e1,e2\)|\(e2,e1\)')
unique_relations = list(set([re.sub(ptn, '', item) for item in unique_relations]))
print(unique_relations)
y_true = [re.sub(ptn, '', item) for item in y_true]
y_pred = [re.sub(ptn, '', item) for item in y_pred]

['Member-Collection', 'Message-Topic', 'Instrument-Agency', 'Entity-Origin', 'Other', 'Content-Container', 'Component-Whole', 'Product-Producer', 'Entity-Destination', 'Cause-Effect']


In [154]:
from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred, labels = [item for item in unique_relations if item !='Other']))

                    precision    recall  f1-score   support

 Member-Collection       0.85      0.82      0.84       242
     Message-Topic       0.89      0.79      0.83       294
 Instrument-Agency       0.71      0.70      0.71       158
     Entity-Origin       0.84      0.83      0.83       262
 Content-Container       0.84      0.81      0.82       201
   Component-Whole       0.80      0.79      0.80       316
  Product-Producer       0.75      0.81      0.78       215
Entity-Destination       0.91      0.84      0.87       318
      Cause-Effect       0.93      0.90      0.91       340

         micro avg       0.85      0.82      0.83      2346
         macro avg       0.84      0.81      0.82      2346
      weighted avg       0.85      0.82      0.83      2346

