In [1]:
# import all required modeuls
import tensorflow as tf
import keras
import numpy as np
import pandas as pd
from keras.models import Model, Input
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional
from keras_contrib.layers import CRF
from keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt
import pickle
from seqeval.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

#check version of tensorflow and keras
print(tf.__version__)
print(keras.__version__)

Using TensorFlow backend.


1.15.0
2.3.1


In [2]:
#define ner_tags
tag_values = ['O', 'B-E', 'I-E', 'PAD']
tag_dict = {tag: num for num, tag in enumerate(tag_values)}
print(tag_dict)
n_tags = len(tag_values); n_tags  

{'O': 0, 'B-E': 1, 'I-E': 2, 'PAD': 3}


4

In [3]:
# define constants
word_embedding_size = 768
MAX_LEN = 32
nr_epoches = 15

In [4]:
# import the bert word embeddings, target labels(y) from pickle 
infile = open('y_train_file_v2','rb')
y_train = pickle.load(infile)
infile.close()
infile = open('train_embeddings_file_v2','rb')
train_embeddings = pickle.load(infile)
infile.close()

infile = open('valid_embeddings_file_v2','rb')
valid_embeddings = pickle.load(infile)
infile.close()
infile = open('y_valid_file_v2','rb')
y_valid = pickle.load(infile)
infile.close()

infile = open('test_embeddings_file_v2','rb')
test_embeddings = pickle.load(infile)
infile.close()
infile = open('y_test_file_v2','rb')
y_test = pickle.load(infile)
infile.close()

In [5]:
# define the Bi-LSTM + CRF model with BERT word embeddings
input = Input(shape=(MAX_LEN,word_embedding_size))
model = Bidirectional(LSTM(units=int(MAX_LEN/4), 
                           return_sequences= True,
                           dropout=0.5,
                          recurrent_dropout=0.5))(input)
model = TimeDistributed(Dense(n_tags, activation="softmax"))(model)  
crf = CRF(n_tags)
model = crf(model)
model = Model(input, model)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [6]:
# compile the model with required optimizer, loss function and evaluation metrices
model.compile(optimizer='adam', loss=crf.loss_function, metrics=[crf.accuracy, 'accuracy'])
model.summary()



Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 32, 768)           0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 32, 16)            49728     
_________________________________________________________________
time_distributed_1 (TimeDist (None, 32, 4)             68        
_________________________________________________________________
crf_1 (CRF)                  (None, 32, 4)             44        
Total params: 49,840
Trainable params: 49,840
Non-trainable params: 0
_________________________________________________________________


In [7]:
# train the model with training data and validate with validation data
history = model.fit(np.asarray(train_embeddings), np.asarray(y_train), batch_size=32, epochs=nr_epoches,
                    validation_data = (np.asarray(valid_embeddings), np.asarray(y_valid)), verbose=2)


Train on 406 samples, validate on 46 samples
Epoch 1/15
 - 2s - loss: 1.7821 - crf_viterbi_accuracy: 0.3996 - accuracy: 0.2036 - val_loss: 1.7214 - val_crf_viterbi_accuracy: 0.4389 - val_accuracy: 0.4368
Epoch 2/15
 - 1s - loss: 1.7005 - crf_viterbi_accuracy: 0.4329 - accuracy: 0.2036 - val_loss: 1.6534 - val_crf_viterbi_accuracy: 0.4199 - val_accuracy: 0.4164
Epoch 3/15
 - 1s - loss: 1.6302 - crf_viterbi_accuracy: 0.4312 - accuracy: 0.2036 - val_loss: 1.5849 - val_crf_viterbi_accuracy: 0.4339 - val_accuracy: 0.4334
Epoch 4/15
 - 1s - loss: 1.5624 - crf_viterbi_accuracy: 0.4378 - accuracy: 0.2036 - val_loss: 1.5145 - val_crf_viterbi_accuracy: 0.4353 - val_accuracy: 0.4361
Epoch 5/15
 - 1s - loss: 1.4906 - crf_viterbi_accuracy: 0.4344 - accuracy: 0.2036 - val_loss: 1.4396 - val_crf_viterbi_accuracy: 0.4307 - val_accuracy: 0.4307
Epoch 6/15
 - 1s - loss: 1.4156 - crf_viterbi_accuracy: 0.3977 - accuracy: 0.2036 - val_loss: 1.3604 - val_crf_viterbi_accuracy: 0.4288 - val_accuracy: 0.4280


In [12]:
# function to convert numebrs to NER_TAGs
key_dict = {num: key for key, num in tag_dict.items()}

def pred_to_ner_tag(pred_list):
    ner_tags_list = []
    for pred in pred_list:
        ner_tags = []
        for value in pred:
            num = np.argmax(value)
            ner_tags.append(key_dict[num])
        ner_tags_list.append(ner_tags)
    return ner_tags_list

In [13]:
# calculate and print evaluation metrices for validation dataset
print('----------------------------------    Validation Datset    ----------------------------------')
valid_pred = model.predict(np.asarray(valid_embeddings), verbose=1)
pred_labels = pred_to_ner_tag(valid_pred)
valid_labels = pred_to_ner_tag(y_valid)
print("F-score: {:.1%}".format(f1_score(valid_labels, pred_labels)))
print("Accuracy: {:.1%}".format(accuracy_score(valid_labels, pred_labels)))
print(classification_report(valid_labels, pred_labels))

----------------------------------    Validation Datset    ----------------------------------
F-score: 34.3%
Accuracy: 92.8%
           precision    recall  f1-score   support

        E       0.00      0.00      0.00        46
      PAD       0.36      0.67      0.47        46

micro avg       0.35      0.34      0.34        92
macro avg       0.18      0.34      0.23        92



In [14]:
# print NER_TAGs of one sample for visualization
print(pred_labels[0])
print(valid_labels[0])

['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-E', 'I-E', 'I-E']


In [15]:
# calculate and print evaluation metrices for testing dataset
print('----------------------------------    Testing Datset    ----------------------------------')
test_pred = model.predict(np.asarray(test_embeddings), verbose=1)
pred_labels = pred_to_ner_tag(test_pred)
test_labels = pred_to_ner_tag(y_test)
print("F1-score: {:.1%}".format(f1_score(test_labels, pred_labels)))
print("Accuracy: {:.1%}".format(accuracy_score(test_labels, pred_labels)))
print(classification_report(test_labels, pred_labels))

----------------------------------    Testing Datset    ----------------------------------
F1-score: 37.1%
Accuracy: 93.5%
           precision    recall  f1-score   support

      PAD       0.38      0.74      0.50       114
        E       0.00      0.00      0.00       114

micro avg       0.37      0.37      0.37       228
macro avg       0.19      0.37      0.25       228

