In [None]:
import pickle
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import *
from keras import Model
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
import keras.metrics
import tensorflow as tf
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import classification_report
from torchtext import data
import pandas as pd
from sklearn.metrics import accuracy_score
from torch.utils.data import Dataset, DataLoader
from collections import Counter
from sklearn.utils import class_weight
from sklearn.metrics import precision_recall_fscore_support

In [None]:
def read_data(filename):
  dfile = open(filename, 'rb')     
  data = pickle.load(dfile)
  dfile.close()
  return data

In [None]:
X_train, Y_train, labels_train = read_data('TimeBank/data_train_tb')
X_val, Y_val, labels_val = read_data('TimeBank/data_val_tb')
X_test, Y_test, labels_test = read_data('TimeBank/data_test_tb')

In [None]:
unique_tokens = read_data('TimeBank/unique_tokens_tb')

In [None]:
MAX_NB_WORDS = 5000
MAX_SEQUENCE_LENGTH = 140
EMBEDDING_DIM = 300

TEST_SIZE = 0.15
VAL_SIZE = 0.15

In [None]:
unique_pos, unique_deps, unique_words = unique_tokens[0], unique_tokens[1], unique_tokens[2]

In [None]:
tokenizer1 = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer1.fit_on_texts(unique_pos)
word_index1 = tokenizer1.word_index

tokenizer2 = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer2.fit_on_texts(unique_words)
word_index2 = tokenizer2.word_index

tokenizer3 = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer3.fit_on_texts(unique_deps)
word_index3 = tokenizer3.word_index

In [None]:
#train
seq1 = tokenizer1.texts_to_sequences(X_train[0])
seq11 = pad_sequences(seq1, maxlen=MAX_SEQUENCE_LENGTH)

seq2 = tokenizer2.texts_to_sequences(X_train[2])
seq12 = pad_sequences(seq2, maxlen=MAX_SEQUENCE_LENGTH)

seq3 = tokenizer3.texts_to_sequences(X_train[1])
seq13 = pad_sequences(seq3, maxlen=MAX_SEQUENCE_LENGTH)

In [None]:
#val

seq1 = tokenizer1.texts_to_sequences(X_val[0])
seq11_val = pad_sequences(seq1, maxlen=MAX_SEQUENCE_LENGTH)

seq2 = tokenizer2.texts_to_sequences(X_val[2])
seq12_val = pad_sequences(seq2, maxlen=MAX_SEQUENCE_LENGTH)

seq3 = tokenizer3.texts_to_sequences(X_val[1])
seq13_val = pad_sequences(seq3, maxlen=MAX_SEQUENCE_LENGTH)

In [None]:
#test

seq1 = tokenizer1.texts_to_sequences(X_test[0])
seq11_test = pad_sequences(seq1, maxlen=MAX_SEQUENCE_LENGTH)

seq2 = tokenizer2.texts_to_sequences(X_test[2])
seq12_test = pad_sequences(seq2, maxlen=MAX_SEQUENCE_LENGTH)

seq3 = tokenizer3.texts_to_sequences(X_test[1])
seq13_test = pad_sequences(seq3, maxlen=MAX_SEQUENCE_LENGTH)

In [None]:
pos_vec = read_data('pos.vector')
dep_vec = read_data('deps.vector')

In [None]:
word_vec = {}
word_vec['PADDING'] = 300
f = open('glove.42B.300d.txt')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    word_vec[word.lower()] = line
f.close()

In [None]:
# pos tags

embedding_matrix1 = np.zeros((len(word_index1) + 1, 28))
for word, i in word_index1.items():
    embedding_vector = pos_vec.get(word)
    if embedding_vector is not None:
        embedding_matrix1[i] = np.asarray(embedding_vector.split()[1:], dtype='float32')

In [None]:
#word vec

embedding_matrix2 = np.zeros((len(word_index2) + 1, EMBEDDING_DIM))
for word, i in word_index2.items():
    embedding_vector = word_vec.get(word)
    if embedding_vector is not None:
        embedding_matrix2[i] = np.asarray(embedding_vector.split()[1:], dtype='float32')

In [None]:
# deps vec

embedding_matrix3 = np.zeros((len(word_index3) + 1, len(dep_vec['PADDING'])))
for word, i in word_index3.items():
    embedding_vector = dep_vec.get(word)
    if embedding_vector is not None:
        embedding_matrix3[i] = np.asarray(embedding_vector, dtype='float32')

In [None]:
del word_vec

In [None]:
def get_class_weights(training_labels):
    class_weights = class_weight.compute_class_weight('balanced',np.unique(training_labels),training_labels)
    uni = list(np.unique(training_labels))

    labelset = [ 'BEFORE', 'AFTER', 'SIMULTANEOUS', 'IBEFORE', 'IAFTER', 'IS_INCLUDED', 'INCLUDES', 'IDENTITY', 'BEGUN_BY', 'ENDED_BY',
    'BEGINS','ENDS','DURING','DURING_INV']

    weights = []

    for i in labelset:
      try:
        idx = uni.index(i)
        weights.append(class_weights[idx])
      except:
        weights.append(0)
    return weights



# Model

In [None]:
def defineModel(l1,l2,l3,l4,d1,out,d):

    embedding_layer1 = Embedding(len(word_index2) + 1,EMBEDDING_DIM,weights=[embedding_matrix2],input_length=MAX_SEQUENCE_LENGTH,trainable=False)
    embedding_layer2 = Embedding(len(word_index1) + 1,28,weights=[embedding_matrix1],input_length=MAX_SEQUENCE_LENGTH,trainable=False)
    embedding_layer3 = Embedding(len(word_index3) + 1,77,weights=[embedding_matrix3],input_length=MAX_SEQUENCE_LENGTH,trainable=False)

    wi = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32',name="inp1")
    wi2 = embedding_layer1(wi)

    pi_sen = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32',name="inp22")
    pi2_sen = embedding_layer2(pi_sen)

    di_sen = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32',name="inp33")
    di2_sen = embedding_layer3(di_sen)

    lstm1_sen = Bidirectional(LSTM(l1, activation='tanh', dropout=d, return_sequences=True), name = 'bid1temp_sen')(pi2_sen)  #  pos  features
    lstm2_sen = Bidirectional(LSTM(l2, activation='tanh', dropout=d, return_sequences=True), name= 'bid2temp_sen')(di2_sen)   #  dep features
    lstm3 = Bidirectional(LSTM(l4, activation='tanh', dropout=d+0.1, return_sequences=True), name = 'bid3')(wi2)  #  word features

    hid_sen = concatenate([lstm1_sen, lstm2_sen, lstm3])    
    
    lstm5 = Bidirectional(LSTM(l4, activation='tanh', dropout=d), name = 'bid3templstm2_sen')(hid_sen)

    yii = Dense(d1, activation='relu', name='dense1')(lstm5)
    yi = Dense(out, activation="softmax", name='dense2')(yii)
    model = Model(inputs=[pi_sen,di_sen,wi],outputs=yi)
    return model


# Train

In [None]:
from sklearn.metrics import precision_recall_fscore_support
macro_avg = list()
accuracy = list()
class_clink = list()
class_clinkr = list()
checkpoints = list()
nodeslist = list()

def trainModel():
    num_classes = 14

    epochs = 50
    batchsize = 64
    lr = 0.005
    file1 = 'TimeBank/chkpt/'
    
    out = num_classes

    training_data, y_train, training_labels, val_data, y_val, val_labels = [seq11, seq13, seq12] , Y_train, labels_train, [seq11_val, seq13_val, seq12_val], Y_val, labels_val
    weights = get_class_weights(training_labels)

    set_nodes = [32, 32, 64, 64, 32, 0.30]
    l1 = set_nodes[0]
    l2 = set_nodes[1]
    l3 = set_nodes[2]
    l4 = set_nodes[3]
    d1 = set_nodes[4]
    d = set_nodes[5]
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

    checkpoint_filepath = file1 + f'model'   
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath,save_weights_only=True,monitor='val_accuracy',mode='max',save_best_only=True)
    callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=20)

    model = defineModel(l1,l2,l3,l4,d1,out,d)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'], loss_weights=weights)
    model.fit(x = training_data, y = y_train, epochs = epochs, batch_size = batchsize,validation_data=(val_data,y_val), callbacks=[callback, model_checkpoint_callback], verbose=0)
    model.load_weights(checkpoint_filepath)
    return model

In [None]:
model = trainModel()

In [None]:
batchsize = 64
lr = 0.005
epochs = 50

training_data, y_train, training_labels, val_data, y_val, val_labels = [seq11, seq13, seq12] , Y_train, labels_train, [seq11_val, seq13_val, seq12_val], Y_val, labels_val
weights = get_class_weights(training_labels)

file1 = 'TimeBank/chkpt/'
checkpoint_filepath = file1 + f'model'
optimizer =  tf.keras.optimizers.Adam(learning_rate=lr)
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath,save_weights_only=True,monitor='val_accuracy',mode='max',save_best_only=True)
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=20)
model.fit(x = training_data, y = y_train, epochs = epochs, batch_size = batchsize,validation_data=(val_data,y_val), callbacks=[callback,model_checkpoint_callback],verbose=0)

# Best Model Results

In [None]:
def format_report(report, scores):
  # [ 'BEFORE', 'AFTER', 'SIMULTANEOUS', 'IBEFORE', 'IAFTER', 'IS_INCLUDED', 'INCLUDES', 'IDENTITY', 'BEGUN_BY', 'ENDED_BY',
    # 'BEGINS','ENDS','DURING','DURING_INV']


  print(f"              {'{0:>10}'.format('precision')} {'{0:>10}'.format('recall')} {'{0:>10}'.format('f1-score')}")
  print(f"       Before {'{0:>10}'.format(round(report['0']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['0']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['0']['f1-score']*100.0, 1))}")
  print(f"        After {'{0:>10}'.format(round(report['1']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['1']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['1']['f1-score']*100.0, 1))}")
  print(f" Simultaneous {'{0:>10}'.format(round(report['2']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['2']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['2']['f1-score']*100.0, 1))}")
  print(f"      IBefore {'{0:>10}'.format(round(report['3']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['3']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['3']['f1-score']*100.0, 1))}")
  print(f"       IAfter {'{0:>10}'.format(round(report['4']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['4']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['4']['f1-score']*100.0, 1))}")
  print(f"  Is Included {'{0:>10}'.format(round(report['5']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['5']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['5']['f1-score']*100.0, 1))}")
  print(f"     Includes {'{0:>10}'.format(round(report['6']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['6']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['6']['f1-score']*100.0, 1))}")
  print(f"     Identity {'{0:>10}'.format(round(report['7']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['7']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['7']['f1-score']*100.0, 1))}")
  print(f"     Begun by {'{0:>10}'.format(round(report['8']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['8']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['8']['f1-score']*100.0, 1))}")
  print(f"     Ended by {'{0:>10}'.format(round(report['9']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['9']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['9']['f1-score']*100.0, 1))}")
  print(f"       Begins {'{0:>10}'.format(round(report['10']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['10']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['10']['f1-score']*100.0, 1))}")
  print(f"         Ends {'{0:>10}'.format(round(report['11']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['11']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['11']['f1-score']*100.0, 1))}")
  print(f"       During {'{0:>10}'.format(round(report['12']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['12']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['12']['f1-score']*100.0, 1))}")
  print(f"   During inv {'{0:>10}'.format(round(report['13']['precision']*100.0, 1))} {'{0:>10}'.format(round(report['13']['recall']*100.0, 1))} {'{0:>10}'.format(round(report['13']['f1-score']*100.0, 1))}")
  print("")
  print(f"    micro avg {'{0:>10}'.format(round(scores[0]*100.0, 1))} {'{0:>10}'.format(round(scores[1]*100.0, 1))} {'{0:>10}'.format(round(scores[2]*100.0, 1))}")

In [None]:
data_test = [seq11_test,seq13_test,seq12_test]
model = defineModel(32,32,64,64,32,14,0.3)
model.load_weights('TimeBank/model_tb.h5', by_name=True)
classes = np.argmax(model.predict(x = data_test), axis=-1)
y_test_classes = Y_test.argmax(1)
y_pred_classes = classes

report = classification_report(y_true=y_test_classes, y_pred=y_pred_classes, zero_division=0, output_dict=True, digits= 3, labels=[0,1,2,3,4,5,6,7,8,9,10,11,12,13])
scores = precision_recall_fscore_support(y_true=y_test_classes, y_pred=y_pred_classes, average='micro', labels=[0,1,2,3,4,5,6,7,8,9,10,11,12,13])
format_report(report, scores)

               precision     recall   f1-score
       Before       35.5       38.6       37.0
        After       51.6       61.1       55.9
 Simultaneous       31.2       29.4       30.3
      IBefore        0.0        0.0        0.0
       IAfter        0.0        0.0        0.0
  Is Included       50.0       36.4       42.1
     Includes       18.8       21.4       20.0
     Identity       34.1       46.7       39.4
     Begun by        0.0        0.0        0.0
     Ended by      100.0       22.2       36.4
       Begins        0.0        0.0        0.0
         Ends        0.0        0.0        0.0
       During        0.0        0.0        0.0
   During inv        0.0        0.0        0.0

    micro avg       40.3       40.3       40.3
