In [2]:
import sys
import os
import re
import string
import json
import urllib.request
import numpy as np

from tqdm import tqdm

import torch
torch.manual_seed(42)
np.random.seed(42)
import torch.autograd as autograd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader, TensorDataset

from torchcrf import CRF
from sklearn.metrics import f1_score

torch.manual_seed(1)
device = torch.device("cpu")

In [58]:
BATCH_SIZE = 32

In [59]:
with open('Data/labels.json') as f:
    labels = json.load(f)

In [60]:
# invert the labels
labels_inv = {v: k for k, v in labels.items()}

In [61]:
def read_file(filename):
    with open(filename, 'r') as file:
        text = file.readlines()
    return text

In [62]:
train_data = read_file('Data/train.txt')
val_data = read_file('Data/dev.txt')
test_data = read_file("Data/test.txt")
test_data2 = read_file("Data/test_2.txt")

In [63]:
embeddings = {}
emb_dim = 50
with open('glove.6B/glove.6B.50d.txt','r') as f:
  for line in f:
    values = line.split()
    word = values[0]
    vector = np.asarray(values[1:],'float32')
    embeddings[word]=vector

In [64]:
NUMERIC_KEY = "<numeric>"
UNK_KEY = "<unk>"

ADDITIONAL_KEYS = [NUMERIC_KEY, UNK_KEY]

In [65]:
for k in ADDITIONAL_KEYS:
    embeddings[k] = np.random.normal(scale=0.6, size=(emb_dim, ))

In [66]:
# vocab_keys = list(embeddings.keys())
vocab_keys = []
vocab_keys.append("<unk>")
vocab_keys.append("<pad>")
vocab_keys.append("<numeric>")
vocab_keys.append("<conc>")
vocab = {k: v for v, k in enumerate(vocab_keys)}

In [67]:
len(vocab)

3

In [68]:
def build_train_vocab(data):
    vocab = {}
    num_words = 0
    for line in data:
        split_line = line.split("\t")
        if len(split_line) == 2:
            word = split_line[0]
            word = word.lower()
            if word not in vocab:
                vocab[word] = 1
    return vocab

In [69]:
train_data = train_data + val_data

In [70]:
train_vocab = build_train_vocab(train_data)
# extend the vocab with the train_vocab
idx = len(vocab)
for word in train_vocab:
    if word not in vocab:
        vocab[word] = idx
        idx += 1

In [71]:
len(vocab)

8548

In [72]:
# dump vocab
with open('vocab.json', 'w') as fp:
    json.dump(vocab, fp)

In [73]:
def check_if_quantity(word):
    # check if the word is a quantity
    if re.match(r'^\d+\.?\d*[a-zA-Z]*$', word):
        return True
    return False

In [74]:
def get_quantity_vector(word):
    num = re.findall(r'\d+\.?\d*', word)
    if len(num) > 0:
        num = float(num[0])
    else:
        num = 0
    unit = re.findall(r'[a-zA-Z]+', word)
    if len(unit) > 0:
        unit = unit[0]
    else:
        unit = ""
    if unit in embeddings:
        return embeddings["<numeric>"] + embeddings[unit]
    else:
        return np.random.normal(scale=0.6, size=(emb_dim, ))

In [20]:
def check_if_conc(word):
    # check if word is a concentration
    if re.match(r'[a-zA-Z]*\/[a-zA-Z]*', word):
        return True
    elif word == "%":
        return True
    return False

In [75]:
def get_vector(word):
    word = word.replace("~","")
    temp = word.replace(",", "")
    temp = temp.replace("-", "")
    if temp.replace(".", "", 1).isdigit():
        return embeddings[NUMERIC_KEY]
    elif check_if_conc(word):
        return embeddings["<conc>"]
    elif word in embeddings:
        return embeddings[word]
    elif check_if_quantity(word):
        return get_quantity_vector(word)
    else:
        return np.random.normal(scale=0.6, size=(emb_dim, ))

In [76]:
matrix_len = len(vocab)
weights_matrix = np.zeros((matrix_len, emb_dim))

for i, word in enumerate(vocab):
    #weights_matrix[i] = get_vector(word)
    if word in embeddings:
        weights_matrix[i] = embeddings[word]
    else:
        weights_matrix[i] = np.random.normal(scale=0.6, size=(emb_dim, ))

In [77]:
weights_matrix = torch.from_numpy(weights_matrix).float()

In [78]:
def get_data(data):
    sent_labels = []
    all_labels = []
    sent_idx = []
    all_idx = []
    for line in (data):
        split_line = line.split("\t")
        if len(split_line) == 2:
            word = split_line[0]
            tag = split_line[1]
            tag = tag.replace("\n", "")
            word = word.lower()
            if word in vocab:
                sent_idx.append(vocab[word])
            else:
                sent_idx.append(vocab["<unk>"])
            tag_idx = labels[tag]
            sent_labels.append(tag_idx)
        elif line=="\n":
            sent_idx = np.array(sent_idx)
            sent_labels = np.array(sent_labels)
            all_idx.append(sent_idx)
            all_labels.append(sent_labels)
            sent_idx = []
            sent_labels = []
        else:
            print(line)
    return np.asarray(all_idx, dtype=object), np.asarray(all_labels, dtype=object)

In [79]:
trainX, trainY = get_data(train_data)
valX, valY = get_data(val_data)
test1X, test1Y = get_data(test_data)
test2X, test2Y = get_data(test_data2)

In [80]:
from sklearn.utils import shuffle
test1X, test1Y = shuffle(test1X, test1Y, random_state=42)
test2X, test2Y = shuffle(test2X, test2Y, random_state=42)

In [81]:
# extend trainX with test1X and test2X
trainX = np.concatenate((trainX, test1X[:2000], test2X[:2000]), axis=0)
trainY = np.concatenate((trainY, test1Y[:2000], test2Y[:2000]), axis=0)

In [82]:
testX = test1X
testY = test1Y

In [83]:
trainData = []
valData = []
testData = []
for i in range(len(trainX)):
    trainData.append((trainX[i], trainY[i]))
for i in range(len(valX)):
    valData.append((valX[i], valY[i]))
for i in range(len(testX)):
    testData.append((testX[i], testY[i]))
trainData = np.array(trainData, dtype=object)
valData = np.array(valData, dtype=object)
testData = np.array(testData, dtype=object)

In [84]:
def custom_collate(data):
    
    batch_size = len(data)
    
    max_len = -1
    for i in range(batch_size):
        if len(data[i][0]) > max_len:
            max_len = len(data[i][0])
    
    seq_lengths = []
    for i in range(batch_size):
        seq_lengths.append(len(data[i][0]))
    
    padded_data = []
    padded_labels = []
    mask = []
    for i in range(batch_size):
        padded_data.append(np.pad(data[i][0], (0, max_len-len(data[i][0])), 'constant', constant_values=(vocab["<pad>"])))
        padded_labels.append(np.pad(data[i][1], (0, max_len-len(data[i][1])), 'constant', constant_values=["37"]))
        mask.append(np.pad(np.ones(len(data[i][0])), (0, max_len-len(data[i][0])), 'constant', constant_values=0).astype(bool))
    
    padded_data = torch.from_numpy(np.array(padded_data))
    padded_labels = torch.from_numpy(np.array(padded_labels))
    mask = torch.from_numpy(np.array(mask))

    return [padded_data, padded_labels, seq_lengths, mask]

In [85]:
trainDataLoader = DataLoader(trainData, batch_size=BATCH_SIZE, shuffle=False, collate_fn=custom_collate)
valDataLoader = DataLoader(valData, batch_size=BATCH_SIZE, shuffle=False, collate_fn=custom_collate)
testDataLoader = DataLoader(testData, batch_size=BATCH_SIZE, shuffle=False, collate_fn=custom_collate)

In [86]:
for batch in trainDataLoader:
    X, y, seq_lens, mask = batch

In [87]:
class BiLSTMCRF(nn.Module):
    def __init__(self, weights_matrix, hidden_dim, tagset_size):
        super(BiLSTMCRF, self).__init__()
        self.hidden_dim = hidden_dim
        self.embedding = nn.Embedding.from_pretrained(weights_matrix, freeze=False)
        embedding_dim = weights_matrix.shape[1]
        # self.embedding = nn.Embedding(len(vocab), embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True)
        self.dropout_layer = nn.Dropout(p=0.5)
        self.hidden2tag = nn.Linear(hidden_dim*2, tagset_size)
        self.crf = CRF(tagset_size, batch_first=True)
        

    def forward(self, sentence, labels, mask):
        embeds = self.embedding(sentence)
        lstm_out, _ = self.lstm(embeds)
        lstm_out = self.dropout_layer(lstm_out)
        emissions = self.hidden2tag(lstm_out)
        return -self.crf(emissions, labels, mask=mask)

    def predict(self, sentence, mask):
        embeds = self.embedding(sentence)
        lstm_out, _ = self.lstm(embeds)
        lstm_out = self.dropout_layer(lstm_out)
        scores = self.hidden2tag(lstm_out)
        return self.crf.decode(scores, mask=mask)

In [88]:
def train_one_epoch(model, iterator, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0
    model.train()
    for batch in tqdm(iterator):
        optimizer.zero_grad()
        X, y, seq_lens, mask = batch
        loss = model(X, y, mask)
        predictions = model.predict(X, mask)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(iterator)

In [89]:
def get_scores(preds, gold):
    flatten_preds = []
    flatten_gold = []
    for i in range(len(preds)):
        for j in range(len(preds[i])):
            flatten_preds.append(preds[i][j])
            flatten_gold.append(gold[i][j])
    idx = np.where(np.array(flatten_gold) != 0)[0]
    micro_f1 =  f1_score(np.array(flatten_preds)[idx], np.array(flatten_gold)[idx], average='micro')
    macro_f1 =  f1_score(np.array(flatten_preds)[idx], np.array(flatten_gold)[idx], average='macro')
    return micro_f1, macro_f1

In [90]:
def train_model(model,epochs):
    loss_function = nn.CrossEntropyLoss(ignore_index=37)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train_f1s = []
    val_f1s = []

    best_epoch = 0
    best_val_f1 = 0

    for epoch in (range(epochs)):
        print("Training Epoch {}".format(epoch))
        training_loss = train_one_epoch(model, trainDataLoader, optimizer, loss_function)
        print("Training Loss: {}".format(training_loss))

        model.eval()

        train_preds = []
        for batch in trainDataLoader:
            X, y, seq_lens, mask = batch
            predictions = model.predict(X, mask)
            train_preds.extend(predictions)
        train_preds = np.array(train_preds, dtype=object)

        train_micro_f1, train_macro_f1 = get_scores(train_preds, trainY)

        val_preds = []
        for batch in testDataLoader:
            X, y, seq_lens, mask = batch
            predictions = model.predict(X, mask)
            val_preds.extend(predictions)
        val_preds = np.array(val_preds, dtype=object)

        val_micro_f1, val_macro_f1 = get_scores(val_preds, testY)

        print("Training Micro F1: {}".format(train_micro_f1))
        print("Training Macro F1: {}".format(train_macro_f1))
        print("Validation Micro F1: {}".format(val_micro_f1))
        print("Validation Macro F1: {}".format(val_macro_f1))

        train_f1 = (train_micro_f1 + train_macro_f1) / 2
        val_f1 = (val_micro_f1 + val_macro_f1) / 2

        train_f1s.append(train_f1)
        val_f1s.append(val_f1)

        if val_f1 > best_val_f1:
            print("New Best Model at Epoch {}".format(epoch))
            print("Validation Micro F1: {}".format(val_micro_f1))
            print("Validation Macro F1: {}".format(val_macro_f1))
            best_val_f1 = val_f1
            best_epoch = epoch
            torch.save(model.state_dict(), 'best_model_2.pt')
        
        if epoch>=best_epoch + 3:
            break
        
        model.train()

    return model, train_f1s, val_f1s

In [91]:
ner = BiLSTMCRF(weights_matrix, 256, 38)

In [92]:
ner, train_f1s, val_f1s = train_model(ner, 30)

Training Epoch 0


100%|██████████| 421/421 [01:11<00:00,  5.93it/s]


Training Loss: 687.565470715883
Training Micro F1: 0.5682487320110511
Training Macro F1: 0.37532870547812275
Validation Micro F1: 0.5877438439398784
Validation Macro F1: 0.3827465527626986
New Best Model at Epoch 0
Validation Micro F1: 0.5877438439398784
Validation Macro F1: 0.3827465527626986
Training Epoch 1


100%|██████████| 421/421 [01:10<00:00,  5.99it/s]


Training Loss: 442.93714756184124
Training Micro F1: 0.6514782895550699
Training Macro F1: 0.49022810529211164
Validation Micro F1: 0.6640150303805564
Validation Macro F1: 0.4932199032966979
New Best Model at Epoch 1
Validation Micro F1: 0.6640150303805564
Validation Macro F1: 0.4932199032966979
Training Epoch 2


100%|██████████| 421/421 [01:10<00:00,  5.94it/s]


Training Loss: 360.6012865370073
Training Micro F1: 0.6962434538781906
Training Macro F1: 0.5553781388462004
Validation Micro F1: 0.7037895746722098
Validation Macro F1: 0.5531411752666571
New Best Model at Epoch 2
Validation Micro F1: 0.7037895746722098
Validation Macro F1: 0.5531411752666571
Training Epoch 3


100%|██████████| 421/421 [01:11<00:00,  5.93it/s]


Training Loss: 307.9717640638918
Training Micro F1: 0.7217021978475114
Training Macro F1: 0.5969014281884046
Validation Micro F1: 0.7209385992964503
Validation Macro F1: 0.5800306434618379
New Best Model at Epoch 3
Validation Micro F1: 0.7209385992964503
Validation Macro F1: 0.5800306434618379
Training Epoch 4


100%|██████████| 421/421 [01:10<00:00,  5.98it/s]


Training Loss: 272.11807087877867
Training Micro F1: 0.7403900870067214
Training Macro F1: 0.6305930089540499
Validation Micro F1: 0.7334905660377359
Validation Macro F1: 0.6037012330271807
New Best Model at Epoch 4
Validation Micro F1: 0.7334905660377359
Validation Macro F1: 0.6037012330271807
Training Epoch 5


100%|██████████| 421/421 [01:10<00:00,  5.96it/s]


Training Loss: 245.44087393210222
Training Micro F1: 0.755416271493959
Training Macro F1: 0.6561932654041195
Validation Micro F1: 0.7443236328749601
Validation Macro F1: 0.6259938239708429
New Best Model at Epoch 5
Validation Micro F1: 0.7443236328749601
Validation Macro F1: 0.6259938239708429
Training Epoch 6


100%|██████████| 421/421 [01:11<00:00,  5.87it/s]


Training Loss: 225.40367840474687
Training Micro F1: 0.7668879633829532
Training Macro F1: 0.6736473212388933
Validation Micro F1: 0.7535177486408698
Validation Macro F1: 0.6417733235858464
New Best Model at Epoch 6
Validation Micro F1: 0.7535177486408698
Validation Macro F1: 0.6417733235858464
Training Epoch 7


100%|██████████| 421/421 [01:12<00:00,  5.78it/s]


Training Loss: 209.14601796709727
Training Micro F1: 0.7786730444105397
Training Macro F1: 0.6906957749202605
Validation Micro F1: 0.7603133994243685
Validation Macro F1: 0.656676583214375
New Best Model at Epoch 7
Validation Micro F1: 0.7603133994243685
Validation Macro F1: 0.656676583214375
Training Epoch 8


100%|██████████| 421/421 [01:12<00:00,  5.77it/s]


Training Loss: 196.29039476263267
Training Micro F1: 0.788544802276195
Training Macro F1: 0.7054214960617262
Validation Micro F1: 0.7669491525423728
Validation Macro F1: 0.6719020453773218
New Best Model at Epoch 8
Validation Micro F1: 0.7669491525423728
Validation Macro F1: 0.6719020453773218
Training Epoch 9


100%|██████████| 421/421 [01:12<00:00,  5.78it/s]


Training Loss: 184.53266000634417
Training Micro F1: 0.7998515525132985
Training Macro F1: 0.7196659341511366
Validation Micro F1: 0.7737048289094979
Validation Macro F1: 0.676365267772749
New Best Model at Epoch 9
Validation Micro F1: 0.7737048289094979
Validation Macro F1: 0.676365267772749
Training Epoch 10


100%|██████████| 421/421 [01:13<00:00,  5.70it/s]


Training Loss: 174.06242463028062
Training Micro F1: 0.807809987217022
Training Macro F1: 0.731423424175392
Validation Micro F1: 0.779221298369044
Validation Macro F1: 0.6860263295215774
New Best Model at Epoch 10
Validation Micro F1: 0.779221298369044
Validation Macro F1: 0.6860263295215774
Training Epoch 11


100%|██████████| 421/421 [01:21<00:00,  5.15it/s]


Training Loss: 163.73352358856565
Training Micro F1: 0.8225392767308566
Training Macro F1: 0.7509206022036922
Validation Micro F1: 0.7860169491525424
Validation Macro F1: 0.7000575600657402
New Best Model at Epoch 11
Validation Micro F1: 0.7860169491525424
Validation Macro F1: 0.7000575600657402
Training Epoch 12


100%|██████████| 421/421 [01:28<00:00,  4.77it/s]


Training Loss: 153.3626814998527
Training Micro F1: 0.8346047585666571
Training Macro F1: 0.7644282696814588
Validation Micro F1: 0.7946514230892229
Validation Macro F1: 0.709956530872118
New Best Model at Epoch 12
Validation Micro F1: 0.7946514230892229
Validation Macro F1: 0.709956530872118
Training Epoch 13


100%|██████████| 421/421 [01:15<00:00,  5.60it/s]


Training Loss: 143.21144999234522
Training Micro F1: 0.8501834975877284
Training Macro F1: 0.7823852361980567
Validation Micro F1: 0.801566997121842
Validation Macro F1: 0.7166709992012198
New Best Model at Epoch 13
Validation Micro F1: 0.801566997121842
Validation Macro F1: 0.7166709992012198
Training Epoch 14


100%|██████████| 421/421 [01:15<00:00,  5.60it/s]


Training Loss: 132.13836958062905
Training Micro F1: 0.8626200981402828
Training Macro F1: 0.7995944624896161
Validation Micro F1: 0.8088423409018228
Validation Macro F1: 0.7268464751027046
New Best Model at Epoch 14
Validation Micro F1: 0.8088423409018228
Validation Macro F1: 0.7268464751027046
Training Epoch 15


100%|██████████| 421/421 [01:15<00:00,  5.58it/s]


Training Loss: 120.8475708587719
Training Micro F1: 0.8766566327161767
Training Macro F1: 0.821570332960966
Validation Micro F1: 0.8133994243684042
Validation Macro F1: 0.7320069692720843
New Best Model at Epoch 15
Validation Micro F1: 0.8133994243684042
Validation Macro F1: 0.7320069692720843
Training Epoch 16


100%|██████████| 421/421 [01:23<00:00,  5.01it/s]


Training Loss: 109.7004601394762
Training Micro F1: 0.8903880252360729
Training Macro F1: 0.8367560833917694
Validation Micro F1: 0.8194755356571796
Validation Macro F1: 0.7377741304611523
New Best Model at Epoch 16
Validation Micro F1: 0.8194755356571796
Validation Macro F1: 0.7377741304611523
Training Epoch 17


100%|██████████| 421/421 [01:23<00:00,  5.02it/s]


Training Loss: 98.70993022964007
Training Micro F1: 0.8997154756504886
Training Macro F1: 0.849266167272918
Validation Micro F1: 0.822673488967061
Validation Macro F1: 0.7441503674741822
New Best Model at Epoch 17
Validation Micro F1: 0.822673488967061
Validation Macro F1: 0.7441503674741822
Training Epoch 18


100%|██████████| 421/421 [01:16<00:00,  5.53it/s]


Training Loss: 88.76597588872117
Training Micro F1: 0.9095295039379819
Training Macro F1: 0.8597050136154345
Validation Micro F1: 0.8261512631915574
Validation Macro F1: 0.7460627137362164
New Best Model at Epoch 18
Validation Micro F1: 0.8261512631915574
Validation Macro F1: 0.7460627137362164
Training Epoch 19


100%|██████████| 421/421 [01:16<00:00,  5.53it/s]


Training Loss: 80.02194698632755
Training Micro F1: 0.9184693414704548
Training Macro F1: 0.8695366920349425
Validation Micro F1: 0.8296290374160538
Validation Macro F1: 0.7524586262248061
New Best Model at Epoch 19
Validation Micro F1: 0.8296290374160538
Validation Macro F1: 0.7524586262248061
Training Epoch 20


100%|██████████| 421/421 [01:15<00:00,  5.54it/s]


Training Loss: 71.57847680841658
Training Micro F1: 0.9269638365428231
Training Macro F1: 0.8797766680499356
Validation Micro F1: 0.8334266069715383
Validation Macro F1: 0.7538021365275719
New Best Model at Epoch 20
Validation Micro F1: 0.8334266069715383
Validation Macro F1: 0.7538021365275719
Training Epoch 21


100%|██████████| 421/421 [01:16<00:00,  5.51it/s]


Training Loss: 65.0120756054151
Training Micro F1: 0.935408849119624
Training Macro F1: 0.8931267696192693
Validation Micro F1: 0.8368244323632875
Validation Macro F1: 0.7716936634420214
New Best Model at Epoch 21
Validation Micro F1: 0.8368244323632875
Validation Macro F1: 0.7716936634420214
Training Epoch 22


100%|██████████| 421/421 [01:17<00:00,  5.43it/s]


Training Loss: 59.33639846674903
Training Micro F1: 0.9403241103459651
Training Macro F1: 0.9034760258153338
Validation Micro F1: 0.8379037416053725
Validation Macro F1: 0.7660412892217672
Training Epoch 23


100%|██████████| 421/421 [01:24<00:00,  4.97it/s]


Training Loss: 54.203698654356025
Training Micro F1: 0.9478619438373674
Training Macro F1: 0.9087890633723622
Validation Micro F1: 0.840701950751519
Validation Macro F1: 0.7625892653540993
Training Epoch 24


100%|██████████| 421/421 [01:24<00:00,  4.97it/s]


Training Loss: 49.70047352716079
Training Micro F1: 0.9484392396189848
Training Macro F1: 0.9105536357830867
Validation Micro F1: 0.8373440997761432
Validation Macro F1: 0.7640473543107122


In [106]:
# load best model
ner.load_state_dict(torch.load('best_model_2.pt'))

<All keys matched successfully>

In [94]:
torch.save(ner, "model.pt")

In [107]:
ner.eval()

BiLSTMCRF(
  (embedding): Embedding(8548, 50)
  (lstm): LSTM(50, 256, bidirectional=True)
  (dropout_layer): Dropout(p=0.5, inplace=False)
  (hidden2tag): Linear(in_features=512, out_features=38, bias=True)
  (crf): CRF(num_tags=38)
)

In [96]:
from sklearn.metrics import f1_score

In [97]:
# get validation predictions using valDataloader
val_preds = []
for batch in valDataLoader:
    X, y, seq_lens, mask = batch
    predictions = ner.predict(X, mask)
    val_preds.extend(predictions)
val_preds = np.array(val_preds, dtype=object)

In [98]:
flatten_val_preds = []
flatten_valY = []
for i in range(len(val_preds)):
    for j in range(len(val_preds[i])):
        flatten_val_preds.append(val_preds[i][j])
        flatten_valY.append(valY[i][j])

In [99]:
val_preds

array([list([6, 14, 15, 2, 3, 0, 0, 22, 0, 0, 26, 0]),
       list([6, 2, 0, 26, 0, 6, 14, 15, 0, 16, 17, 2, 0, 0, 0, 6, 0, 16, 17, 17, 2, 0, 0, 0]),
       list([6, 13, 14, 0, 2, 3, 3, 3, 0]), ...,
       list([6, 0, 22, 0, 11, 12, 0, 6, 2, 0, 0, 22, 0]),
       list([6, 4, 0, 6, 0, 6, 0, 22, 0]),
       list([6, 0, 18, 19, 0, 11, 12, 0, 0, 0, 4, 6, 0, 6, 0, 2, 0])],
      dtype=object)

In [100]:
# write predictions to file
with open('val_preds.txt', 'w') as f:
    for i in range(len(val_preds)):
        for j in range(len(val_preds[i])):
            f.write(labels_inv[val_preds[i][j]] + '\n')
        f.write("\n")

In [101]:
!python3 eval.py Data/dev.txt val_preds.txt

CLASSIFICATION Report
                 precision    recall  f1_score  true_entities  pred_entities
Reagent           0.959399  0.939003  0.949091         5033.0         4926.0
Action            0.927630  0.947253  0.937339         3640.0         3717.0
Modifier          0.765049  0.852352  0.806345         1998.0         2226.0
Location          0.947862  0.914027  0.930637         1989.0         1918.0
Amount            0.983658  0.910943  0.945905         1718.0         1591.0
Time              0.974600  0.942675  0.958372         1099.0         1063.0
Device            0.923077  0.930233  0.926641          903.0          910.0
Method            0.880044  0.894678  0.887301          902.0          917.0
Concentration     0.889039  0.927966  0.908086          708.0          739.0
Temperature       0.961708  0.974627  0.968125          670.0          679.0
Measure-Type      0.740672  0.923256  0.821946          430.0          536.0
Generic-Measure   0.986547  0.733333  0.841300        

In [108]:
testX, testY = get_data(test_data)
testData = []
for i in range(len(testX)):
    testData.append((testX[i], testY[i]))
testData = np.array(testData, dtype=object)
testDataLoader = DataLoader(testData, batch_size=BATCH_SIZE, shuffle=False, collate_fn=custom_collate)

In [109]:
# get validation predictions using valDataloader
test_preds = []
for batch in testDataLoader:
    X, y, seq_lens, mask = batch
    predictions = ner.predict(X, mask)
    test_preds.extend(predictions)
test_preds = np.array(test_preds, dtype=object)

In [110]:
# write predictions to file
with open('new_test_preds.txt', 'w') as f:
    for i in range(len(test_preds)):
        for j in range(len(test_preds[i])):
            f.write(labels_inv[test_preds[i][j]] + '\n')
        f.write("\n")

In [111]:
!python3 eval.py Data/test.txt new_test_preds.txt

CLASSIFICATION Report
                 precision    recall  f1_score  true_entities  pred_entities
Reagent           0.868080  0.830314  0.848777         6459.0         6178.0
Action            0.844101  0.844660  0.844381         4532.0         4535.0
Modifier          0.544426  0.579536  0.561433         2326.0         2476.0
Amount            0.929877  0.868142  0.897949         2169.0         2025.0
Location          0.774014  0.765500  0.769734         2000.0         1978.0
Time              0.940197  0.899705  0.919506         1695.0         1622.0
Method            0.616082  0.501845  0.553127         1084.0          883.0
Temperature       0.947053  0.919496  0.933071         1031.0         1001.0
Concentration     0.822835  0.846154  0.834331          988.0         1016.0
Device            0.721788  0.727477  0.724621          888.0          895.0
Measure-Type      0.551802  0.656836  0.599755          373.0          444.0
Speed             0.916409  0.907975  0.912173        