In [96]:
import sys
import os
import re
import string
import json
import urllib.request
import numpy as np

from tqdm import tqdm

import torch
torch.manual_seed(42)
np.random.seed(42)
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader, TensorDataset

from torchcrf import CRF
from sklearn.metrics import f1_score

torch.manual_seed(1)
device = torch.device("cpu")

In [97]:
BATCH_SIZE = 16

In [98]:
with open('Data/labels.json') as f:
    labels = json.load(f)

In [99]:
# invert the labels
labels_inv = {v: k for k, v in labels.items()}

In [100]:
def read_file(filename):
    with open(filename, 'r') as file:
        text = file.readlines()
    return text

In [101]:
train_data = read_file('Data/train.txt')
val_data = read_file('Data/dev.txt')
test_data = read_file("Data/test.txt")
test_data2 = read_file("Data/test_2.txt")

In [102]:
embeddings = {}
emb_dim = 50
with open('glove.6B/glove.6B.50d.txt','r') as f:
  for line in f:
    values = line.split()
    word = values[0]
    vector = np.asarray(values[1:],'float32')
    embeddings[word]=vector

In [103]:
NUMERIC_KEY = "<numeric>"
UNK_KEY = "<unk>"
PAD_KEY = "<pad>"
CONC_KEY = "<conc>"
TILDA_KEY = "<tilda>"
TILDA_NUM_KEY = "<til_num>"
SPEED_KEY = "<speed>"

ADDITIONAL_KEYS = [NUMERIC_KEY, UNK_KEY, PAD_KEY, CONC_KEY, TILDA_KEY, TILDA_NUM_KEY, SPEED_KEY]

In [104]:
for k in ADDITIONAL_KEYS:
    embeddings[k] = np.random.normal(scale=0.6, size=(emb_dim, ))

In [105]:
vocab_keys = []
vocab_keys.append(NUMERIC_KEY)
vocab_keys.append(UNK_KEY)
vocab_keys.append(PAD_KEY)
vocab_keys.append(CONC_KEY)
vocab_keys.append(TILDA_KEY)
vocab_keys.append(TILDA_NUM_KEY)
vocab_keys.append(SPEED_KEY)
vocab = {k: v for v, k in enumerate(vocab_keys)}

In [106]:
len(vocab)

7

In [107]:
def build_train_vocab(data):
    vocab = {}
    num_words = 0
    for line in data:
        split_line = line.split("\t")
        if len(split_line) == 2:
            word = split_line[0]
            word = word.lower()
            if word not in vocab:
                vocab[word] = 1
    return vocab

In [108]:
# train_data = train_data + val_data

In [109]:
train_vocab = build_train_vocab(train_data)
# extend the vocab with the train_vocab
idx = len(vocab)
for word in train_vocab:
    if word not in vocab:
        vocab[word] = idx
        idx += 1

In [110]:
len(vocab)

7404

In [111]:
# dump vocab
with open('vocab.json', 'w') as fp:
    json.dump(vocab, fp)

In [112]:
def check_if_conc(word):
    # check if word is a concentration
    if re.match(r'[a-zA-Z]*\/[a-zA-Z]*', word):
        return True
    elif word == "%":
        return True
    return False

In [113]:
def check_numeric(word):
    word = word.replace(",", "")
    word = word.replace("-", "", 1)
    word = word.replace(".", "", 1)
    if word.isdigit():
        return True
    return False

In [114]:
def check_if_speed(word):
    if "xg" in word:
        return True
    elif "rpm" in word:
        return True
    return False

In [115]:
def get_vector(word):       
    if check_if_speed(word):
        return embeddings[SPEED_KEY]
    elif check_numeric(word):
        return embeddings[NUMERIC_KEY]
    elif word in embeddings:
        return embeddings[word]
    else:
        return embeddings[UNK_KEY]

In [116]:
def get_idx_inference(word, vocab):
    if check_if_speed(word):
        return vocab[SPEED_KEY]
    elif check_numeric(word):
        return vocab[NUMERIC_KEY]
    elif word in vocab:
        return vocab[word]
    else:
        return vocab[UNK_KEY]

In [117]:
matrix_len = len(vocab)
weights_matrix = np.zeros((matrix_len, emb_dim))

for i, word in enumerate(vocab):
    weights_matrix[i] = get_vector(word)

In [118]:
weights_matrix = torch.from_numpy(weights_matrix).float()

In [119]:
def get_data(data):
    sent_labels = []
    all_labels = []
    sent_idx = []
    all_idx = []
    for line in (data):
        split_line = line.split("\t")
        if len(split_line) == 2:
            word = split_line[0]
            tag = split_line[1]
            tag = tag.replace("\n", "")
            word = word.lower()
            sent_idx.append(get_idx_inference(word, vocab))
            tag_idx = labels[tag]
            sent_labels.append(tag_idx)
        elif line=="\n":
            sent_idx = np.array(sent_idx)
            sent_labels = np.array(sent_labels)
            all_idx.append(sent_idx)
            all_labels.append(sent_labels)
            sent_idx = []
            sent_labels = []
        else:
            print(line)
    return np.asarray(all_idx, dtype=object), np.asarray(all_labels, dtype=object)

In [120]:
trainX, trainY = get_data(train_data)
valX, valY = get_data(val_data)
test1X, test1Y = get_data(test_data)
test2X, test2Y = get_data(test_data2)

In [121]:
from sklearn.utils import shuffle
test1X, test1Y = shuffle(test1X, test1Y, random_state=42)
test2X, test2Y = shuffle(test2X, test2Y, random_state=42)

In [122]:
# extend trainX with test1X and test2X
trainX = np.concatenate((trainX, test1X[:2000], test2X[:2000]), axis=0)
trainY = np.concatenate((trainY, test1Y[:2000], test2Y[:2000]), axis=0)

In [123]:
testX = test1X
testY = test1Y

In [124]:
trainData = []
valData = []
testData = []
for i in range(len(trainX)):
    trainData.append((trainX[i], trainY[i]))
for i in range(len(valX)):
    valData.append((valX[i], valY[i]))
for i in range(len(testX)):
    testData.append((testX[i], testY[i]))
trainData = np.array(trainData, dtype=object)
valData = np.array(valData, dtype=object)
testData = np.array(testData, dtype=object)

In [125]:
def custom_collate(data):
    
    batch_size = len(data)
    
    max_len = -1
    for i in range(batch_size):
        if len(data[i][0]) > max_len:
            max_len = len(data[i][0])
    
    seq_lengths = []
    for i in range(batch_size):
        seq_lengths.append(len(data[i][0]))
    
    padded_data = []
    padded_labels = []
    mask = []
    for i in range(batch_size):
        padded_data.append(np.pad(data[i][0], (0, max_len-len(data[i][0])), 'constant', constant_values=(vocab["<pad>"])))
        padded_labels.append(np.pad(data[i][1], (0, max_len-len(data[i][1])), 'constant', constant_values=["37"]))
        mask.append(np.pad(np.ones(len(data[i][0])), (0, max_len-len(data[i][0])), 'constant', constant_values=0).astype(bool))
    
    padded_data = torch.from_numpy(np.array(padded_data))
    padded_labels = torch.from_numpy(np.array(padded_labels))
    mask = torch.from_numpy(np.array(mask))

    return [padded_data, padded_labels, seq_lengths, mask]

In [126]:
trainDataLoader = DataLoader(trainData, batch_size=BATCH_SIZE, shuffle=False, collate_fn=custom_collate)
valDataLoader = DataLoader(valData, batch_size=BATCH_SIZE, shuffle=False, collate_fn=custom_collate)
testDataLoader = DataLoader(testData, batch_size=BATCH_SIZE, shuffle=False, collate_fn=custom_collate)

In [127]:
for batch in trainDataLoader:
    X, y, seq_lens, mask = batch

In [128]:
class BiLSTMCRF(nn.Module):
    def __init__(self, weights_matrix, hidden_dim, tagset_size):
        super(BiLSTMCRF, self).__init__()
        self.hidden_dim = hidden_dim
        self.embedding = nn.Embedding.from_pretrained(weights_matrix, freeze=False)
        embedding_dim = weights_matrix.shape[1]
        # self.embedding = nn.Embedding(len(vocab), embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True)
        self.dropout_layer = nn.Dropout(p=0.5)
        self.hidden2tag = nn.Linear(hidden_dim*2, tagset_size)
        self.crf = CRF(tagset_size, batch_first=True)
        

    def forward(self, sentence, labels, mask):
        embeds = self.embedding(sentence)
        lstm_out, _ = self.lstm(embeds)
        lstm_out = self.dropout_layer(lstm_out)
        emissions = self.hidden2tag(lstm_out)
        return -self.crf(emissions, labels, mask=mask)

    def predict(self, sentence, mask):
        embeds = self.embedding(sentence)
        lstm_out, _ = self.lstm(embeds)
        lstm_out = self.dropout_layer(lstm_out)
        scores = self.hidden2tag(lstm_out)
        return self.crf.decode(scores, mask=mask)

In [129]:
def train_one_epoch(model, iterator, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0
    model.train()
    for batch in tqdm(iterator):
        optimizer.zero_grad()
        X, y, seq_lens, mask = batch
        loss = model(X, y, mask)
        predictions = model.predict(X, mask)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(iterator)

In [130]:
def get_scores(preds, gold):
    flatten_preds = []
    flatten_gold = []
    for i in range(len(preds)):
        for j in range(len(preds[i])):
            flatten_preds.append(preds[i][j])
            flatten_gold.append(gold[i][j])
    idx = np.where(np.array(flatten_gold) != 0)[0]
    micro_f1 =  f1_score(np.array(flatten_preds)[idx], np.array(flatten_gold)[idx], average='micro')
    macro_f1 =  f1_score(np.array(flatten_preds)[idx], np.array(flatten_gold)[idx], average='macro')
    return micro_f1, macro_f1

In [131]:
def train_model(model,epochs):
    loss_function = nn.CrossEntropyLoss(ignore_index=37)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train_f1s = []
    val_f1s = []

    best_epoch = 0
    best_val_f1 = 0

    for epoch in (range(epochs)):
        print("Training Epoch {}".format(epoch))
        training_loss = train_one_epoch(model, trainDataLoader, optimizer, loss_function)
        print("Training Loss: {}".format(training_loss))

        model.eval()

        train_preds = []
        for batch in trainDataLoader:
            X, y, seq_lens, mask = batch
            predictions = model.predict(X, mask)
            train_preds.extend(predictions)
        train_preds = np.array(train_preds, dtype=object)

        train_micro_f1, train_macro_f1 = get_scores(train_preds, trainY)

        val_preds = []
        for batch in testDataLoader:
            X, y, seq_lens, mask = batch
            predictions = model.predict(X, mask)
            val_preds.extend(predictions)
        val_preds = np.array(val_preds, dtype=object)

        val_micro_f1, val_macro_f1 = get_scores(val_preds, testY)

        print("Training Micro F1: {}".format(train_micro_f1))
        print("Training Macro F1: {}".format(train_macro_f1))
        print("Validation Micro F1: {}".format(val_micro_f1))
        print("Validation Macro F1: {}".format(val_macro_f1))

        train_f1 = (train_micro_f1 + train_macro_f1) / 2
        val_f1 = (val_micro_f1 + val_macro_f1) / 2

        train_f1s.append(train_f1)
        val_f1s.append(val_f1)

        if val_f1 > best_val_f1:
            print("New Best Model at Epoch {}".format(epoch))
            print("Validation Micro F1: {}".format(val_micro_f1))
            print("Validation Macro F1: {}".format(val_macro_f1))
            best_val_f1 = val_f1
            best_epoch = epoch
            torch.save(model.state_dict(), 'best_model_2.pt')
        
        if epoch>=best_epoch + 3:
            break
        
        model.train()

    return model, train_f1s, val_f1s

In [132]:
ner = BiLSTMCRF(weights_matrix, 256, 38)

In [133]:
ner, train_f1s, val_f1s = train_model(ner, 30)

Training Epoch 0


100%|██████████| 700/700 [00:55<00:00, 12.53it/s]


Training Loss: 310.7379181780134
Training Micro F1: 0.6343544163161654
Training Macro F1: 0.41264621055031414
Validation Micro F1: 0.6427086664534698
Validation Macro F1: 0.4123955811075538
New Best Model at Epoch 0
Validation Micro F1: 0.6427086664534698
Validation Macro F1: 0.4123955811075538
Training Epoch 1


100%|██████████| 700/700 [00:59<00:00, 11.74it/s]


Training Loss: 192.33611167907713
Training Micro F1: 0.7005098801682406
Training Macro F1: 0.5253626873677553
Validation Micro F1: 0.6968340262232171
Validation Macro F1: 0.5095334206126828
New Best Model at Epoch 1
Validation Micro F1: 0.6968340262232171
Validation Macro F1: 0.5095334206126828
Training Epoch 2


100%|██████████| 700/700 [00:59<00:00, 11.69it/s]


Training Loss: 150.2544627925328
Training Micro F1: 0.7349714308388223
Training Macro F1: 0.583660107278476
Validation Micro F1: 0.7217780620402943
Validation Macro F1: 0.5604412352900827
New Best Model at Epoch 2
Validation Micro F1: 0.7217780620402943
Validation Macro F1: 0.5604412352900827
Training Epoch 3


100%|██████████| 700/700 [01:00<00:00, 11.59it/s]


Training Loss: 126.44831134251186
Training Micro F1: 0.757489484961511
Training Macro F1: 0.6251558955522054
Validation Micro F1: 0.7350095938599296
Validation Macro F1: 0.5903636483428863
New Best Model at Epoch 3
Validation Micro F1: 0.7350095938599296
Validation Macro F1: 0.5903636483428863
Training Epoch 4


100%|██████████| 700/700 [01:00<00:00, 11.50it/s]


Training Loss: 111.4937015914917
Training Micro F1: 0.7710796762161733
Training Macro F1: 0.6562136489989597
Validation Micro F1: 0.743404221298369
Validation Macro F1: 0.6167652409869026
New Best Model at Epoch 4
Validation Micro F1: 0.743404221298369
Validation Macro F1: 0.6167652409869026
Training Epoch 5


100%|██████████| 700/700 [01:00<00:00, 11.51it/s]


Training Loss: 101.05636154174805
Training Micro F1: 0.7829041345924926
Training Macro F1: 0.6798896351740622
Validation Micro F1: 0.7505996162456027
Validation Macro F1: 0.6312226612308607
New Best Model at Epoch 5
Validation Micro F1: 0.7505996162456027
Validation Macro F1: 0.6312226612308607
Training Epoch 6


100%|██████████| 700/700 [01:00<00:00, 11.56it/s]


Training Loss: 93.42610597882953
Training Micro F1: 0.7941334021109436
Training Macro F1: 0.7005520224246234
Validation Micro F1: 0.7575151902782219
Validation Macro F1: 0.6450174571156831
New Best Model at Epoch 6
Validation Micro F1: 0.7575151902782219
Validation Macro F1: 0.6450174571156831
Training Epoch 7


100%|██████████| 700/700 [01:00<00:00, 11.53it/s]


Training Loss: 87.33163429805211
Training Micro F1: 0.8050650742004603
Training Macro F1: 0.7154195168475109
Validation Micro F1: 0.7634713783178766
Validation Macro F1: 0.6571003001607479
New Best Model at Epoch 7
Validation Micro F1: 0.7634713783178766
Validation Macro F1: 0.6571003001607479
Training Epoch 8


100%|██████████| 700/700 [01:00<00:00, 11.49it/s]


Training Loss: 81.32386232921056
Training Micro F1: 0.8176335211491151
Training Macro F1: 0.7357932569995984
Validation Micro F1: 0.7704669011832427
Validation Macro F1: 0.6775158340258987
New Best Model at Epoch 8
Validation Micro F1: 0.7704669011832427
Validation Macro F1: 0.6775158340258987
Training Epoch 9


100%|██████████| 700/700 [01:01<00:00, 11.46it/s]


Training Loss: 75.95323977606637
Training Micro F1: 0.8325430521387192
Training Macro F1: 0.7551776059283645
Validation Micro F1: 0.7814998401023345
Validation Macro F1: 0.687472727517887
New Best Model at Epoch 9
Validation Micro F1: 0.7814998401023345
Validation Macro F1: 0.687472727517887
Training Epoch 10


100%|██████████| 700/700 [01:01<00:00, 11.41it/s]


Training Loss: 70.33803054264614
Training Micro F1: 0.84638123958416
Training Macro F1: 0.7778536965900456
Validation Micro F1: 0.78621682123441
Validation Macro F1: 0.6969765661699727
New Best Model at Epoch 10
Validation Micro F1: 0.78621682123441
Validation Macro F1: 0.6969765661699727
Training Epoch 11


100%|██████████| 700/700 [01:01<00:00, 11.38it/s]


Training Loss: 64.55386378424508
Training Micro F1: 0.8605071026109039
Training Macro F1: 0.7963374399159747
Validation Micro F1: 0.7931323952670291
Validation Macro F1: 0.7086683563517763
New Best Model at Epoch 11
Validation Micro F1: 0.7931323952670291
Validation Macro F1: 0.7086683563517763
Training Epoch 12


100%|██████████| 700/700 [01:01<00:00, 11.35it/s]


Training Loss: 58.90062255314418
Training Micro F1: 0.871537973176732
Training Macro F1: 0.8128664547524647
Validation Micro F1: 0.7980492484809721
Validation Macro F1: 0.7151359034869739
New Best Model at Epoch 12
Validation Micro F1: 0.7980492484809721
Validation Macro F1: 0.7151359034869739
Training Epoch 13


100%|██████████| 700/700 [01:02<00:00, 11.29it/s]


Training Loss: 53.09438088825771
Training Micro F1: 0.8830350765812237
Training Macro F1: 0.8249241144045971
Validation Micro F1: 0.8042452830188679
Validation Macro F1: 0.7190095642139552
New Best Model at Epoch 13
Validation Micro F1: 0.8042452830188679
Validation Macro F1: 0.7190095642139552
Training Epoch 14


100%|██████████| 700/700 [01:01<00:00, 11.34it/s]


Training Loss: 48.024720241001674
Training Micro F1: 0.8972204586937544
Training Macro F1: 0.8416888678180965
Validation Micro F1: 0.8137591941157659
Validation Macro F1: 0.7368723634871233
New Best Model at Epoch 14
Validation Micro F1: 0.8137591941157659
Validation Macro F1: 0.7368723634871233
Training Epoch 15


100%|██████████| 700/700 [01:02<00:00, 11.25it/s]


Training Loss: 43.42842693192618
Training Micro F1: 0.903122768034283
Training Macro F1: 0.859017804798423
Validation Micro F1: 0.8098017268947874
Validation Macro F1: 0.735441646929043
Training Epoch 16


100%|██████████| 700/700 [01:02<00:00, 11.22it/s]


Training Loss: 39.19116806983948
Training Micro F1: 0.9132509324656773
Training Macro F1: 0.8674082758056034
Validation Micro F1: 0.8171570195075151
Validation Macro F1: 0.7398619472000773
New Best Model at Epoch 16
Validation Micro F1: 0.8171570195075151
Validation Macro F1: 0.7398619472000773
Training Epoch 17


100%|██████████| 700/700 [01:02<00:00, 11.21it/s]


Training Loss: 35.48506118910653
Training Micro F1: 0.918240615824141
Training Macro F1: 0.8668569802369814
Validation Micro F1: 0.8167173009274065
Validation Macro F1: 0.7387254505979396
Training Epoch 18


100%|██████████| 700/700 [01:02<00:00, 11.22it/s]


Training Loss: 32.59913829667228
Training Micro F1: 0.9274759939687325
Training Macro F1: 0.8870442683726275
Validation Micro F1: 0.8239126958746402
Validation Macro F1: 0.7498467862112992
New Best Model at Epoch 18
Validation Micro F1: 0.8239126958746402
Validation Macro F1: 0.7498467862112992
Training Epoch 19


100%|██████████| 700/700 [01:02<00:00, 11.21it/s]


Training Loss: 29.95822246823992
Training Micro F1: 0.9363443377509721
Training Macro F1: 0.8994534274553224
Validation Micro F1: 0.8295091141669332
Validation Macro F1: 0.7587550011212714
New Best Model at Epoch 19
Validation Micro F1: 0.8295091141669332
Validation Macro F1: 0.7587550011212714
Training Epoch 20


100%|██████████| 700/700 [01:02<00:00, 11.22it/s]


Training Loss: 27.782852725982664
Training Micro F1: 0.9366816125704309
Training Macro F1: 0.8981314717286574
Validation Micro F1: 0.8282299328429805
Validation Macro F1: 0.7578757410396254
Training Epoch 21


100%|██████████| 700/700 [01:02<00:00, 11.14it/s]


Training Loss: 25.98751249585833
Training Micro F1: 0.9412645821760178
Training Macro F1: 0.9050213978622158
Validation Micro F1: 0.8289494723377039
Validation Macro F1: 0.7550934412221413
Training Epoch 22


100%|██████████| 700/700 [01:02<00:00, 11.16it/s]


Training Loss: 24.385292972837174
Training Micro F1: 0.9483076739941274
Training Macro F1: 0.9154328775122739
Validation Micro F1: 0.8318676047329709
Validation Macro F1: 0.7553466651805627


In [147]:
# load best model
ner.load_state_dict(torch.load('best_model_2.pt'))

<All keys matched successfully>

In [135]:
torch.save(ner, "model.pt")

In [148]:
ner.eval()

BiLSTMCRF(
  (embedding): Embedding(7404, 50)
  (lstm): LSTM(50, 256, bidirectional=True)
  (dropout_layer): Dropout(p=0.5, inplace=False)
  (hidden2tag): Linear(in_features=512, out_features=38, bias=True)
  (crf): CRF(num_tags=38)
)

In [149]:
from sklearn.metrics import f1_score

In [150]:
# get validation predictions using valDataloader
val_preds = []
for batch in valDataLoader:
    X, y, seq_lens, mask = batch
    predictions = ner.predict(X, mask)
    val_preds.extend(predictions)
val_preds = np.array(val_preds, dtype=object)

In [151]:
flatten_val_preds = []
flatten_valY = []
for i in range(len(val_preds)):
    for j in range(len(val_preds[i])):
        flatten_val_preds.append(val_preds[i][j])
        flatten_valY.append(valY[i][j])

In [152]:
val_preds

array([list([6, 14, 15, 2, 3, 0, 0, 22, 0, 0, 26, 0]),
       list([6, 2, 0, 0, 0, 6, 14, 15, 0, 16, 17, 2, 0, 0, 4, 20, 0, 16, 17, 17, 2, 0, 0, 0]),
       list([6, 21, 2, 3, 3, 0, 2, 3, 0]), ...,
       list([6, 0, 22, 0, 11, 12, 0, 6, 2, 0, 0, 22, 0]),
       list([6, 4, 0, 1, 0, 6, 0, 22, 0]),
       list([6, 0, 18, 19, 0, 11, 12, 0, 0, 0, 4, 6, 0, 6, 0, 2, 0])],
      dtype=object)

In [153]:
# write predictions to file
with open('val_preds.txt', 'w') as f:
    for i in range(len(val_preds)):
        for j in range(len(val_preds[i])):
            f.write(labels_inv[val_preds[i][j]] + '\n')
        f.write("\n")

In [154]:
!python3 eval.py Data/dev.txt val_preds.txt

CLASSIFICATION Report
                 precision    recall  f1_score  true_entities  pred_entities
Reagent           0.786298  0.784423  0.785359         5033.0         5021.0
Action            0.808380  0.795055  0.801662         3640.0         3580.0
Modifier          0.463139  0.512513  0.486576         1998.0         2211.0
Location          0.688480  0.640020  0.663366         1989.0         1849.0
Amount            0.918056  0.769499  0.837239         1718.0         1440.0
Time              0.885714  0.874431  0.880037         1099.0         1085.0
Device            0.574341  0.530454  0.551526          903.0          834.0
Method            0.463080  0.486696  0.474595          902.0          948.0
Concentration     0.746121  0.747175  0.746648          708.0          709.0
Temperature       0.865819  0.914925  0.889695          670.0          708.0
Measure-Type      0.461977  0.565116  0.508368          430.0          526.0
Generic-Measure   0.478992  0.190000  0.272076        

In [155]:
testX, testY = get_data(test_data)
testData = []
for i in range(len(testX)):
    testData.append((testX[i], testY[i]))
testData = np.array(testData, dtype=object)
testDataLoader = DataLoader(testData, batch_size=BATCH_SIZE, shuffle=False, collate_fn=custom_collate)

In [156]:
# get validation predictions using valDataloader
test_preds = []
for batch in testDataLoader:
    X, y, seq_lens, mask = batch
    predictions = ner.predict(X, mask)
    test_preds.extend(predictions)
test_preds = np.array(test_preds, dtype=object)

In [157]:
# write predictions to file
with open('new_test_preds.txt', 'w') as f:
    for i in range(len(test_preds)):
        for j in range(len(test_preds[i])):
            f.write(labels_inv[test_preds[i][j]] + '\n')
        f.write("\n")

In [158]:
!python3 eval.py Data/test.txt new_test_preds.txt

CLASSIFICATION Report
                 precision    recall  f1_score  true_entities  pred_entities
Reagent           0.854554  0.825050  0.839543         6459.0         6236.0
Action            0.859139  0.827670  0.843111         4532.0         4366.0
Modifier          0.553834  0.583835  0.568439         2326.0         2452.0
Amount            0.930435  0.838635  0.882153         2169.0         1955.0
Location          0.760563  0.756000  0.758275         2000.0         1988.0
Time              0.940207  0.909145  0.924415         1695.0         1639.0
Method            0.577120  0.521218  0.547746         1084.0          979.0
Temperature       0.922027  0.917556  0.919786         1031.0         1026.0
Concentration     0.829317  0.836032  0.832661          988.0          996.0
Device            0.750000  0.665541  0.705251          888.0          788.0
Measure-Type      0.555035  0.635389  0.592500          373.0          427.0
Speed             0.892966  0.895706  0.894334        