In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

import numpy as np
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE


device(type='cuda')

In [2]:
def read_conll(path):
    sentences, labels = [], []
    temp_x, temp_y = [], []

    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()

            # Sentence boundary
            if not line:
                if temp_x:
                    sentences.append(temp_x)
                    labels.append(temp_y)
                    temp_x, temp_y = [], []
                continue

            parts = line.split()

            # Last item = tag
            tag = parts[-1]

            # Everything except last = token (joined in case it's multi-word)
            token = " ".join(parts[:-1])

            temp_x.append(token)
            temp_y.append(tag)

    # Catch last sentence
    if temp_x:
        sentences.append(temp_x)
        labels.append(temp_y)

    return sentences, labels

train_sentences, train_labels = read_conll("/content/FindVehicle_train.txt")
test_sentences, test_labels = read_conll("/content/FindVehicle_test.txt")

print("Train sentences:", len(train_sentences))
print("Test sentences:", len(test_sentences))


Train sentences: 21565
Test sentences: 20777


In [3]:
PAD_TOKEN = "<PAD>"
UNK_TOKEN = "<UNK>"

def build_vocab(sentences):
    vocab = {PAD_TOKEN:0, UNK_TOKEN:1}
    for sent in sentences:
        for tok in sent:
            if tok not in vocab:
                vocab[tok] = len(vocab)
    return vocab

def build_tag_vocab(labels):
    tag2idx = {PAD_TOKEN:0}
    for seq in labels:
        for tag in seq:
            if tag not in tag2idx:
                tag2idx[tag] = len(tag2idx)
    return tag2idx

word2idx = build_vocab(train_sentences)
tag2idx = build_tag_vocab(train_labels)
idx2tag = {v:k for k,v in tag2idx.items()}

print("Vocab size:", len(word2idx))
print("Tag size:", len(tag2idx))


Vocab size: 3703
Tag size: 59


In [4]:
class NERDataset(Dataset):
    def __init__(self, sentences, labels, word2idx, tag2idx):
        self.sentences = sentences
        self.labels = labels
        self.word2idx = word2idx
        self.tag2idx = tag2idx

    def __len__(self):
        return len(self.sentences)

    def encode_sentence(self, sent):
        return torch.tensor([self.word2idx.get(t,1) for t in sent], dtype=torch.long)

    def encode_labels(self, tags):
        return torch.tensor([self.tag2idx[t] for t in tags], dtype=torch.long)

    def __getitem__(self, idx):
        x = self.encode_sentence(self.sentences[idx])
        y = self.encode_labels(self.labels[idx])
        return x, y, len(x)

def collate(batch):
    xs, ys, lens = zip(*batch)
    lens = torch.tensor(lens)

    xs_pad = pad_sequence(xs, batch_first=True, padding_value=0)
    ys_pad = pad_sequence(ys, batch_first=True, padding_value=0)

    lens_sorted, perm = lens.sort(descending=True)
    xs_pad = xs_pad[perm]
    ys_pad = ys_pad[perm]

    return xs_pad, ys_pad, lens_sorted, perm

train_dataset = NERDataset(train_sentences, train_labels, word2idx, tag2idx)
test_dataset  = NERDataset(test_sentences, test_labels, word2idx, tag2idx)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=collate)
test_loader  = DataLoader(test_dataset, batch_size=16, shuffle=False, collate_fn=collate)


# BiLSTM

In [32]:
#BiLSTM

vocab_size = len(word2idx)
tagset_size = len(tag2idx)
pad_idx = word2idx[PAD_TOKEN]

embedding_dim = 100
hidden_dim = 128
lstm_hidden = hidden_dim // 2   # because bidirectional

embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx).to(DEVICE)

bilstm = nn.LSTM(
    input_size=embedding_dim,
    hidden_size=lstm_hidden,
    batch_first=True,
    bidirectional=True
).to(DEVICE)

classifier = nn.Linear(hidden_dim, tagset_size).to(DEVICE)

def forward_bilstm(x, lens):
    emb = embedding(x)
    packed = pack_padded_sequence(emb, lens.cpu(), batch_first=True, enforce_sorted=False)
    packed_out, _ = bilstm(packed)
    out, _ = pad_packed_sequence(packed_out, batch_first=True)
    logits = classifier(out)
    return logits


# LSTM

In [33]:
# vocab_size = len(word2idx)
# tagset_size = len(tag2idx)
# pad_idx = word2idx[PAD_TOKEN]

# embedding_dim = 100
# hidden_dim = 128

# embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx).to(DEVICE)

# bilstm = nn.LSTM(
#     input_size=embedding_dim,
#     hidden_size=hidden_dim,
#     batch_first=True,
#     bidirectional=False
# ).to(DEVICE)

# classifier = nn.Linear(hidden_dim, tagset_size).to(DEVICE)

# def forward_bilstm(x, lens):
#     emb = embedding(x)
#     packed = pack_padded_sequence(emb, lens.cpu(), batch_first=True, enforce_sorted=False)
#     packed_out, _ = bilstm(packed)
#     out, _ = pad_packed_sequence(packed_out, batch_first=True)
#     logits = classifier(out)
#     return logits

In [34]:
criterion = nn.CrossEntropyLoss(ignore_index=tag2idx[PAD_TOKEN])
params = list(embedding.parameters()) + list(bilstm.parameters()) + list(classifier.parameters())
optimizer = torch.optim.Adam(params, lr=1e-3)

EPOCHS = 10

for epoch in range(EPOCHS):
    total = 0
    for x_batch, y_batch, lens, perm in train_loader:

        x_batch = x_batch.to(DEVICE)
        y_batch = y_batch.to(DEVICE)
        lens = lens.to(DEVICE)

        optimizer.zero_grad()
        logits = forward_bilstm(x_batch, lens)
        loss = criterion(logits.view(-1, tagset_size), y_batch.view(-1))

        loss.backward()
        optimizer.step()
        total += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {total/len(train_loader):.4f}")

Epoch 1/10 - Loss: 0.4031
Epoch 2/10 - Loss: 0.1071
Epoch 3/10 - Loss: 0.0589
Epoch 4/10 - Loss: 0.0331
Epoch 5/10 - Loss: 0.0196
Epoch 6/10 - Loss: 0.0127
Epoch 7/10 - Loss: 0.0089
Epoch 8/10 - Loss: 0.0061
Epoch 9/10 - Loss: 0.0051
Epoch 10/10 - Loss: 0.0036


In [35]:
def evaluate_model():
    true_all, pred_all = [], []

    with torch.no_grad():
        for x_batch, y_batch, lens, perm in test_loader:

            x_batch = x_batch.to(DEVICE)
            y_batch = y_batch.to(DEVICE)
            lens = lens.to(DEVICE)

            logits = forward_bilstm(x_batch, lens)
            preds = torch.argmax(logits, dim=-1)

            inv = torch.argsort(perm)
            preds = preds[inv]
            y_batch = y_batch[inv]
            lens = lens[inv]

            pad_tag = tag2idx[PAD_TOKEN]

            for i in range(preds.size(0)):
                L = lens[i].item()
                for t, p in zip(y_batch[i][:L], preds[i][:L]):
                    if t.item() != pad_tag:
                        true_all.append(t.item())
                        pred_all.append(p.item())

    acc = accuracy_score(true_all, pred_all)
    prec = precision_score(true_all, pred_all, average="macro", zero_division=0)
    rec  = recall_score(true_all, pred_all, average="macro", zero_division=0)
    f1   = f1_score(true_all, pred_all, average="macro", zero_division=0)

    print("Accuracy :", round(acc,4))
    print("Avg Precision :", round(prec,4))
    print("Avg Recall :", round(rec,4))
    print("Avg F1 :", round(f1,4))

evaluate_model()


Accuracy : 0.9961
Avg Precision : 0.9733
Avg Recall : 0.9683
Avg F1 : 0.9705


In [9]:
def predict(tokens):
    encoded = torch.tensor(
        [word2idx.get(t, word2idx[UNK_TOKEN]) for t in tokens],
        dtype=torch.long
    ).unsqueeze(0).to(DEVICE)

    lens = torch.tensor([len(tokens)], dtype=torch.long).to(DEVICE)

    with torch.no_grad():
        logits = forward_bilstm(encoded, lens)
        preds = torch.argmax(logits, dim=-1)[0].cpu().tolist()

    return list(zip(tokens, [idx2tag[p] for p in preds]))

example = ["Find", "the", "Silver", "van", "in", "the", "Top-Left"]
predict(example)


[('Find', 'O'),
 ('the', 'O'),
 ('Silver', 'B-vehicle_type'),
 ('van', 'B-vehicle_type'),
 ('in', 'O'),
 ('the', 'O'),
 ('Top-Left', 'B-vehicle_location')]

In [11]:
!pip install seqeval


Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16162 sha256=2545a5ebebd87e7a73203450ca62216b2f0b6872c7d6ee4453ead5a34f409f2d
  Stored in directory: /root/.cache/pip/wheels/5f/b8/73/0b2c1a76b701a677653dd79ece07cfabd7457989dbfbdcd8d7
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


In [14]:
from seqeval.metrics import (
    classification_report,
    f1_score as seq_f1,
    precision_score as seq_precision,
    recall_score as seq_recall,
    accuracy_score as seq_accuracy
)
from sklearn.metrics import accuracy_score

def evaluate_keras_style():
    all_true_tags = []
    all_pred_tags = []

    all_true_token = []
    all_pred_token = []

    PAD_idx = tag2idx[PAD_TOKEN]

    with torch.no_grad():
        for x_batch, y_batch, lens, perm in test_loader:

            x_batch = x_batch.to(DEVICE)
            y_batch = y_batch.to(DEVICE)
            lens = lens.to(DEVICE)

            logits = forward_bilstm(x_batch, lens)
            preds = torch.argmax(logits, dim=-1)

            # Restore original order
            inv = torch.argsort(perm)
            preds = preds[inv]
            y_batch = y_batch[inv]
            lens = lens[inv]

            for i in range(preds.size(0)):
                L = lens[i].item()

                seq_true_tags = []
                seq_pred_tags = []

                for t, p in zip(y_batch[i][:L], preds[i][:L]):
                    t = t.item()
                    p = p.item()

                    if t == PAD_idx:
                        continue

                    seq_true_tags.append(idx2tag[t])
                    seq_pred_tags.append(idx2tag[p])

                    # for token-level accuracy
                    all_true_token.append(t)
                    all_pred_token.append(p)

                all_true_tags.append(seq_true_tags)
                all_pred_tags.append(seq_pred_tags)

    # ============================
    #  ENTITY-LEVEL (SEQEVAL)
    # ============================
    print("\nEntity-level Evaluation (seqeval)\n")
    print(classification_report(all_true_tags, all_pred_tags, digits=4))

    ent_precision = seq_precision(all_true_tags, all_pred_tags)
    ent_recall    = seq_recall(all_true_tags, all_pred_tags)
    ent_f1        = seq_f1(all_true_tags, all_pred_tags)
    ent_accuracy  = seq_accuracy(all_true_tags, all_pred_tags)   # <-- entity-level accuracy

    print("Entity-level Precision:", ent_precision)
    print("Entity-level Recall:   ", ent_recall)
    print("Entity-level F1 Score: ", ent_f1)
    print("Entity-level Accuracy: ", ent_accuracy)

    # ============================
    #  TOKEN-LEVEL (SKLEARN)
    # ============================
    token_accuracy = accuracy_score(all_true_token, all_pred_token)
    print("\nToken-level Accuracy:", token_accuracy)

    return ent_precision, ent_recall, ent_f1, ent_accuracy, token_accuracy

evaluate_keras_style()


Entity-level Evaluation (seqeval)

                          precision    recall  f1-score   support

           vehicle_brand     1.0000    0.9999    0.9999     14396
           vehicle_color     1.0000    0.9999    0.9999     15776
        vehicle_location     1.0000    1.0000    1.0000      9287
           vehicle_model     0.9998    0.9999    0.9998     14395
     vehicle_orientation     1.0000    0.9998    0.9999      4987
           vehicle_range     1.0000    1.0000    1.0000       181
            vehicle_type     1.0000    0.9993    0.9997      1451
        vehicle_type-bus     0.8659    0.8634    0.8646       344
      vehicle_type-coupe     0.8335    0.8852    0.8586       854
 vehicle_type-estate_car     0.7395    0.8361    0.7848       421
  vehicle_type-hatchback     0.7792    0.8249    0.8014      1279
 vehicle_type-motorcycle     0.9759    0.9831    0.9794      2302
        vehicle_type-mpv     0.7421    0.8497    0.7923       386
   vehicle_type-roadster     0.6676    

(np.float64(0.9717948717948718),
 np.float64(0.9836804152740539),
 np.float64(0.9777015228673468),
 0.9940432856077152,
 0.9940432856077152)