In [1]:
import torch
import torch.nn as nn


from torch.utils.data import Dataset, DataLoader, ConcatDataset
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from tqdm import tqdm

import numpy as np

import pickle
import os

import warnings
warnings.filterwarnings('ignore')

In [2]:
def create_vocab(file_path):
    word_set = set()
    pos_set = set()
    chunk_set = set()
    tag_set = set()

    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line:
                word, pos, chunk, tag = line.split()
                word_set.add(word)
                pos_set.add(pos)
                chunk_set.add(chunk)
                tag_set.add(tag)

    word2idx = {word: idx + 2 for idx, word in enumerate(sorted(word_set))}
    word2idx["<PAD>"] = 0
    word2idx["<UNK>"] = 1

    pos2idx = {pos: idx for idx, pos in enumerate(sorted(pos_set))}
    chunk2idx = {chunk: idx for idx, chunk in enumerate(sorted(chunk_set))}
    tag2idx = {tag: idx + 1 for idx, tag in enumerate(sorted(tag_set))}
    tag2idx["O"] = 0 

    return word2idx, pos2idx, chunk2idx, tag2idx


In [3]:
class MultiLabelNERDataset(Dataset):
    def __init__(self, file_path, word2idx, pos2idx, chunk2idx, tag2idx, max_len):
        self.data = self._read_file(file_path)
        self.word2idx = word2idx
        self.pos2idx = pos2idx
        self.chunk2idx = chunk2idx
        self.tag2idx = tag2idx
        self.max_len = max_len
        self.pad_idx = word2idx["<PAD>"]

    def _read_file(self, file_path):
        """Read the data file and return a list of sentences with words, POS, chunk, and tags."""
        sentences = []
        with open(file_path, "r", encoding="utf-8") as f:
            words, pos_tags, chunks, tags = [], [], [], []
            for line in f:
                line = line.strip()
                if line:
                    word, pos, chunk, tag = line.split()
                    words.append(word)
                    pos_tags.append(pos)
                    chunks.append(chunk)
                    tags.append(tag)
                else:
                    if words:
                        sentences.append((words, pos_tags, chunks, tags))
                        words, pos_tags, chunks, tags = [], [], [], []
            # Append the last sentence if file doesn't end with a newline
            if words:
                sentences.append((words, pos_tags, chunks, tags))
        return sentences

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        words, pos_tags, chunks, tags = self.data[idx]

        # Convert words, pos, chunk, and tags to indices
        word_indices = [self.word2idx.get(w, self.word2idx["<UNK>"]) for w in words]
        pos_indices = [self.pos2idx.get(p, 0) for p in pos_tags]
        chunk_indices = [self.chunk2idx.get(c, 0) for c in chunks]
        tag_indices = [self.tag2idx.get(t, 0) for t in tags]

        # Padding/truncation
        word_indices = word_indices[:self.max_len] + [self.pad_idx] * (self.max_len - len(word_indices))
        pos_indices = pos_indices[:self.max_len] + [0] * (self.max_len - len(pos_indices))
        chunk_indices = chunk_indices[:self.max_len] + [0] * (self.max_len - len(chunk_indices))
        tag_indices = tag_indices[:self.max_len] + [0] * (self.max_len - len(tag_indices))

        return (
            torch.tensor(word_indices, dtype=torch.long),
            torch.tensor(pos_indices, dtype=torch.long),
            torch.tensor(chunk_indices, dtype=torch.long),
            torch.tensor(tag_indices, dtype=torch.long),
        )

# Collate function for DataLoader
def collate_fn(batch):
    words, pos_tags, chunks, tags = zip(*batch)
    return (
        torch.stack(words, dim=0),
        torch.stack(pos_tags, dim=0),
        torch.stack(chunks, dim=0),
        torch.stack(tags, dim=0),
    )

In [4]:
class BiLSTMNERMultiLabel(nn.Module):
    def __init__(self, vocab_size, pos_size, chunk_size, ner_size, embed_dim, hidden_dim):
        super(BiLSTMNERMultiLabel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, bidirectional=True, batch_first=True)

        # Linear layers for multi-label outputs
        self.fc_pos = nn.Linear(hidden_dim * 2, pos_size)  # POS head
        self.fc_chunk = nn.Linear(hidden_dim * 2, chunk_size)  # Chunk head
        self.fc_ner = nn.Linear(hidden_dim * 2, ner_size)  # NER head

    def forward(self, x):
        x = self.embedding(x)  # Shape: (batch_size, max_len, embed_dim)
        lstm_out, _ = self.lstm(x)  # Shape: (batch_size, max_len, hidden_dim * 2)

        # Multi-label outputs
        pos_out = self.fc_pos(lstm_out)  # Shape: (batch_size, max_len, pos_size)
        chunk_out = self.fc_chunk(lstm_out)  # Shape: (batch_size, max_len, chunk_size)
        ner_out = self.fc_ner(lstm_out)  # Shape: (batch_size, max_len, ner_size)

        return pos_out, chunk_out, ner_out

In [5]:
def compute_loss(pos_out, chunk_out, ner_out, pos_labels, chunk_labels, ner_labels, pos_weight=0.5, chunk_weight=0.5, ner_weight=1.0):
    criterion = nn.CrossEntropyLoss()
    pos_loss = criterion(pos_out.view(-1, pos_out.shape[-1]), pos_labels.view(-1))
    chunk_loss = criterion(chunk_out.view(-1, chunk_out.shape[-1]), chunk_labels.view(-1))
    ner_loss = criterion(ner_out.view(-1, ner_out.shape[-1]), ner_labels.view(-1))
    total_loss = pos_weight * pos_loss + chunk_weight * chunk_loss + ner_weight * ner_loss
    return total_loss


In [6]:
def train_model(model, train_loader, optimizer, device, pos_weight=1.0, chunk_weight=1.0, ner_weight=1.0):
    model.train()
    total_loss = 0
    for batch in tqdm(train_loader, desc="Training", unit="batch"):
        words, pos_labels, chunk_labels, ner_labels = batch
        words = words.to(device)
        pos_labels = pos_labels.to(device)
        chunk_labels = chunk_labels.to(device)
        ner_labels = ner_labels.to(device)

        optimizer.zero_grad()
        pos_out, chunk_out, ner_out = model(words)
        loss = compute_loss(pos_out, chunk_out, ner_out, pos_labels, chunk_labels, ner_labels, pos_weight, chunk_weight, ner_weight)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)


In [7]:
def flatten(list_of_lists):
    return [item for sublist in list_of_lists for item in sublist]

In [8]:
def evaluate_model(model, loader, idx2pos, idx2chunk, idx2tag, device):
    model.eval()
    all_preds_pos, all_labels_pos = [], []
    all_preds_chunk, all_labels_chunk = [], []
    all_preds_ner, all_labels_ner = [], []

    with torch.no_grad():
        for batch in loader:
            words, pos_tags, chunk_tags, ner_tags = batch
            words = words.to(device)
            pos_tags = pos_tags.to(device)
            chunk_tags = chunk_tags.to(device)
            ner_tags = ner_tags.to(device)
            
            # Forward pass
            outputs = model(words)
            pos_output, chunk_output, ner_output = outputs[0], outputs[1], outputs[2]
            
            # Convert predictions to labels
            pos_preds = torch.argmax(pos_output, dim=-1)
            chunk_preds = torch.argmax(chunk_output, dim=-1)
            ner_preds = torch.argmax(ner_output, dim=-1)
            
            all_preds_pos.extend(pos_preds.cpu().numpy().tolist())
            all_labels_pos.extend(pos_tags.cpu().numpy().tolist())

            all_preds_chunk.extend(chunk_preds.cpu().numpy().tolist())
            all_labels_chunk.extend(chunk_tags.cpu().numpy().tolist())

            all_preds_ner.extend(ner_preds.cpu().numpy().tolist())
            all_labels_ner.extend(ner_tags.cpu().numpy().tolist())
    
    # Flatten the lists
    all_preds_pos = flatten(all_preds_pos)
    all_labels_pos = flatten(all_labels_pos)

    all_preds_chunk = flatten(all_preds_chunk)
    all_labels_chunk = flatten(all_labels_chunk)

    all_preds_ner = flatten(all_preds_ner)
    all_labels_ner = flatten(all_labels_ner)

    # Exclude 'O' tag for NER
    ner_labels_filtered, ner_preds_filtered = [], []
    for label, pred in zip(all_labels_ner, all_preds_ner):
        if idx2tag[label] != "O":
            ner_labels_filtered.append(label)
            ner_preds_filtered.append(pred)

    # Generate classification reports
    pos_labels = list(idx2pos.keys())
    chunk_labels = list(idx2chunk.keys())
    ner_labels = [k for k, v in idx2tag.items() if v != "O"]

    pos_report = classification_report(all_labels_pos, all_preds_pos, labels=pos_labels, target_names=[idx2pos[i] for i in pos_labels])
    chunk_report = classification_report(all_labels_chunk, all_preds_chunk, labels=chunk_labels, target_names=[idx2chunk[i] for i in chunk_labels])
    ner_report = classification_report(ner_labels_filtered, ner_preds_filtered, labels=ner_labels, target_names=[idx2tag[i] for i in ner_labels])

    return pos_report, chunk_report, ner_report


In [9]:
def save_model(model, path, vocab):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    
    # Save model weights
    torch.save(model.state_dict(), f"{path}.pth")
    
    # Save vocab
    with open(f"{path}_vocab.pkl", "wb") as f:
        pickle.dump(vocab, f)
    
    print(f"Model and vocab saved to {path}.pth and {path}_vocab.pkl")

In [10]:
def load_model(model_class, path, device, embed_dim, hidden_dim):
    # Load vocab
    with open(f"{path}_vocab.pkl", "rb") as f:
        vocab = pickle.load(f)
    
    word2idx, pos2idx, chunk2idx, tag2idx = (
        vocab["word2idx"],
        vocab["pos2idx"],
        vocab["chunk2idx"],
        vocab["tag2idx"],
    )
    
    # Initialize and load model
    model = model_class(
        len(word2idx),
        len(pos2idx),
        len(chunk2idx),
        len(tag2idx),
        embed_dim,
        hidden_dim,
    )
    model.load_state_dict(torch.load(f"{path}.pth", map_location=device))
    model.to(device)
    model.eval()
    
    print("Model and vocab loaded successfully.")
    return model, word2idx, pos2idx, chunk2idx, tag2idx


In [11]:
def predict(model, text, word2idx, pos2idx, chunk2idx, idx2tag, max_len, device):
    words = text.split()
    word_indices = [word2idx.get(w, word2idx["<UNK>"]) for w in words]
    word_indices = word_indices[:max_len] + [word2idx["<PAD>"]] * (max_len - len(word_indices))
    
    model_input = torch.tensor([word_indices]).to(device)

    with torch.no_grad():
        _, _, ner_out = model(model_input) 
        predictions = torch.sigmoid(ner_out).cpu().numpy()
    
    predicted_tags = []
    for word_pred in predictions[0][:len(words)]:
        max_prob_idx = word_pred.argmax() 
        predicted_tags.append([idx2tag[max_prob_idx]])
    
    result = [(w, tags if tags else ["O"]) for w, tags in zip(words, predicted_tags)]
    return result


In [12]:
# Paths
train_file = "data/eng/eng.train"
val_file = "data/eng/eng.testa"
test_file = "data/eng/eng.testb"

# Build vocabulary
word2idx, pos2idx, chunk2idx, tag2idx = create_vocab(train_file)
idx2pos = {idx: pos for pos, idx in pos2idx.items()}
idx2chunk = {idx: chunk for chunk, idx in chunk2idx.items()}
idx2tag = {idx: tag for tag, idx in tag2idx.items()}

In [13]:
tag2idx

{'B-LOC': 1,
 'B-MISC': 2,
 'B-ORG': 3,
 'I-LOC': 4,
 'I-MISC': 5,
 'I-ORG': 6,
 'I-PER': 7,
 'O': 0}

In [14]:
embed_dim = 128
hidden_dim = 256
batch_size = 32
epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dataset = MultiLabelNERDataset(train_file, word2idx, pos2idx, chunk2idx, tag2idx, max_len=50)
val_dataset = MultiLabelNERDataset(val_file, word2idx, pos2idx, chunk2idx, tag2idx, max_len=50)
test_dataset = MultiLabelNERDataset(test_file, word2idx, pos2idx, chunk2idx, tag2idx, max_len=50)

train_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate_fn, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate_fn)

In [15]:
combined_dataset = ConcatDataset([train_dataset, val_dataset])
combined_loader = DataLoader(combined_dataset, batch_size=batch_size, shuffle=True, collate_fn=train_loader.collate_fn)

In [16]:
model = BiLSTMNERMultiLabel(len(word2idx), len(pos2idx), len(chunk2idx), len(tag2idx), embed_dim, hidden_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [17]:
for epoch in range(epochs):
    print(f"Epoch [{epoch + 1}/{epochs}]")
    train_loss = train_model(model, train_loader, optimizer, device)
    print(f"Training Loss: {train_loss:.4f}")

    print("Validation:")
    pos_report, chunk_report, ner_report = evaluate_model(model, val_loader, idx2pos, idx2chunk, idx2tag, device)
    print("NER Classification Report:\n", ner_report)

Epoch [1/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 79.24batch/s]


Training Loss: 0.8785
Validation:
NER Classification Report:
               precision    recall  f1-score   support

       B-LOC       0.00      0.00      0.00         0
      B-MISC       0.00      0.00      0.00         4
       B-ORG       0.00      0.00      0.00         0
       I-LOC       0.86      0.36      0.51      2088
      I-MISC       0.85      0.11      0.19      1258
       I-ORG       0.78      0.25      0.38      2085
       I-PER       0.89      0.37      0.52      3053

   micro avg       0.85      0.30      0.44      8488
   macro avg       0.48      0.16      0.23      8488
weighted avg       0.85      0.30      0.44      8488

Epoch [2/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 84.29batch/s]


Training Loss: 0.3334
Validation:
NER Classification Report:
               precision    recall  f1-score   support

       B-LOC       0.00      0.00      0.00         0
      B-MISC       0.00      0.00      0.00         4
       B-ORG       0.00      0.00      0.00         0
       I-LOC       0.89      0.60      0.72      2088
      I-MISC       0.84      0.50      0.62      1258
       I-ORG       0.78      0.52      0.62      2085
       I-PER       0.82      0.77      0.79      3053

   micro avg       0.83      0.63      0.71      8488
   macro avg       0.48      0.34      0.39      8488
weighted avg       0.83      0.63      0.71      8488

Epoch [3/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 83.26batch/s]


Training Loss: 0.2205
Validation:
NER Classification Report:
               precision    recall  f1-score   support

       B-LOC       0.00      0.00      0.00         0
      B-MISC       0.00      0.00      0.00         4
       B-ORG       0.00      0.00      0.00         0
       I-LOC       0.92      0.69      0.79      2088
      I-MISC       0.88      0.62      0.72      1258
       I-ORG       0.82      0.61      0.70      2085
       I-PER       0.84      0.85      0.84      3053

   micro avg       0.86      0.72      0.78      8488
   macro avg       0.50      0.39      0.44      8488
weighted avg       0.86      0.72      0.78      8488

Epoch [4/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 83.49batch/s]


Training Loss: 0.1550
Validation:
NER Classification Report:
               precision    recall  f1-score   support

       B-LOC       0.00      0.00      0.00         0
      B-MISC       0.00      0.00      0.00         4
       B-ORG       0.00      0.00      0.00         0
       I-LOC       0.94      0.73      0.82      2088
      I-MISC       0.92      0.65      0.76      1258
       I-ORG       0.78      0.72      0.75      2085
       I-PER       0.90      0.83      0.86      3053

   micro avg       0.88      0.75      0.81      8488
   macro avg       0.51      0.42      0.46      8488
weighted avg       0.88      0.75      0.81      8488

Epoch [5/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 82.86batch/s]


Training Loss: 0.1102
Validation:
NER Classification Report:
               precision    recall  f1-score   support

       B-LOC       0.00      0.00      0.00         0
      B-MISC       0.00      0.00      0.00         4
       B-ORG       0.00      0.00      0.00         0
       I-LOC       0.91      0.79      0.85      2088
      I-MISC       0.91      0.71      0.80      1258
       I-ORG       0.89      0.69      0.77      2085
       I-PER       0.87      0.89      0.88      3053

   micro avg       0.89      0.79      0.84      8488
   macro avg       0.51      0.44      0.47      8488
weighted avg       0.89      0.79      0.83      8488

Epoch [6/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 82.85batch/s]


Training Loss: 0.0775
Validation:
NER Classification Report:
               precision    recall  f1-score   support

       B-LOC       0.00      0.00      0.00         0
      B-MISC       0.00      0.00      0.00         4
       B-ORG       0.00      0.00      0.00         0
       I-LOC       0.89      0.85      0.87      2088
      I-MISC       0.91      0.74      0.82      1258
       I-ORG       0.87      0.73      0.79      2085
       I-PER       0.92      0.86      0.89      3053

   micro avg       0.90      0.81      0.85      8488
   macro avg       0.51      0.45      0.48      8488
weighted avg       0.90      0.81      0.85      8488

Epoch [7/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 82.45batch/s]


Training Loss: 0.0531
Validation:
NER Classification Report:
               precision    recall  f1-score   support

       B-LOC       0.00      0.00      0.00         0
      B-MISC       0.00      0.00      0.00         4
       B-ORG       0.00      0.00      0.00         0
       I-LOC       0.92      0.84      0.88      2088
      I-MISC       0.93      0.73      0.82      1258
       I-ORG       0.87      0.73      0.80      2085
       I-PER       0.92      0.85      0.89      3053

   micro avg       0.91      0.80      0.85      8488
   macro avg       0.52      0.45      0.48      8488
weighted avg       0.91      0.80      0.85      8488

Epoch [8/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 82.54batch/s]


Training Loss: 0.0352
Validation:
NER Classification Report:
               precision    recall  f1-score   support

       B-LOC       0.00      0.00      0.00         0
      B-MISC       0.00      0.00      0.00         4
       B-ORG       0.00      0.00      0.00         0
       I-LOC       0.93      0.81      0.87      2088
      I-MISC       0.91      0.74      0.82      1258
       I-ORG       0.88      0.74      0.80      2085
       I-PER       0.92      0.87      0.89      3053

   micro avg       0.91      0.80      0.85      8488
   macro avg       0.52      0.45      0.48      8488
weighted avg       0.91      0.80      0.85      8488

Epoch [9/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 81.79batch/s]


Training Loss: 0.0223
Validation:
NER Classification Report:
               precision    recall  f1-score   support

       B-LOC       0.00      0.00      0.00         0
      B-MISC       0.00      0.00      0.00         4
       B-ORG       0.00      0.00      0.00         0
       I-LOC       0.93      0.83      0.87      2088
      I-MISC       0.89      0.76      0.82      1258
       I-ORG       0.89      0.74      0.81      2085
       I-PER       0.93      0.84      0.89      3053

   micro avg       0.92      0.80      0.85      8488
   macro avg       0.52      0.45      0.48      8488
weighted avg       0.91      0.80      0.85      8488

Epoch [10/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 81.72batch/s]


Training Loss: 0.0137
Validation:
NER Classification Report:
               precision    recall  f1-score   support

       B-LOC       0.00      0.00      0.00         0
      B-MISC       0.20      0.25      0.22         4
       B-ORG       0.00      0.00      0.00         0
       I-LOC       0.92      0.84      0.88      2088
      I-MISC       0.91      0.75      0.82      1258
       I-ORG       0.88      0.74      0.80      2085
       I-PER       0.93      0.84      0.88      3053

   micro avg       0.91      0.80      0.85      8488
   macro avg       0.55      0.49      0.52      8488
weighted avg       0.91      0.80      0.85      8488



In [None]:
# save_model(model, "save/models/multilabel_bilstm_1", {
#     "word2idx": word2idx,
#     "pos2idx": pos2idx,
#     "chunk2idx": chunk2idx,
#     "tag2idx": tag2idx,
# })


Model and vocab saved to save/models/multilabel_bilstm_ok.pth and save/models/multilabel_bilstm_ok_vocab.pkl


In [None]:
# loaded_model, loaded_word2idx, loaded_pos2idx, loaded_chunk2idx, loaded_tag2idx = load_model(
#     BiLSTMNERMultiLabel,
#     "save/models/multilabel_bilstm",
#     device,
#     embed_dim,
#     hidden_dim
# )
# loaded_idx2tag = {idx: tag for tag, idx in loaded_tag2idx.items()}


Model and vocab loaded successfully.


In [20]:
def evaluate_on_test_exclude_o(model, test_loader, idx2tag, device):
    model.eval()
    all_preds_ner, all_labels_ner = [], []

    with torch.no_grad():
        for batch in test_loader:
            words, pos_tags, chunk_tags, ner_tags = batch
            words = words.to(device)
            pos_tags = pos_tags.to(device)
            chunk_tags = chunk_tags.to(device)
            ner_tags = ner_tags.to(device)
            
            # Forward pass
            outputs = model(words)
            _, _, ner_output = outputs  # Outputs: (POS, Chunk, NER)
            
            # Convert predictions to labels
            ner_preds = torch.argmax(ner_output, dim=-1)  # Shape: [batch_size, seq_len]

            # Extend lists with predictions and labels (remove padding tokens)
            for pred, label in zip(ner_preds.cpu().numpy(), ner_tags.cpu().numpy()):
                for p, l in zip(pred, label):
                    if idx2tag[l] != "O":  # Exclude 'O' tags
                        all_preds_ner.append(p)
                        all_labels_ner.append(l)
    
    # Convert to tags
    predicted_tags = [idx2tag[idx] for idx in all_preds_ner]
    true_tags = [idx2tag[idx] for idx in all_labels_ner]

    # Calculate accuracy
    accuracy = accuracy_score(true_tags, predicted_tags)

    return accuracy, predicted_tags, true_tags


In [21]:
accuracy, predicted_tags, true_tags = evaluate_on_test_exclude_o(model, test_loader, idx2tag, device)

In [22]:
accuracy

0.7390054972513743

In [23]:
list(zip(predicted_tags, true_tags))[:10]

[('I-ORG', 'I-LOC'),
 ('O', 'I-PER'),
 ('I-ORG', 'I-PER'),
 ('O', 'I-PER'),
 ('I-LOC', 'I-LOC'),
 ('I-LOC', 'I-LOC'),
 ('I-LOC', 'I-LOC'),
 ('I-ORG', 'I-LOC'),
 ('I-LOC', 'I-LOC'),
 ('I-MISC', 'I-MISC')]

In [24]:
for epoch in range(epochs):
    print(f"Epoch [{epoch + 1}/{epochs}]")
    train_loss = train_model(model, train_loader, optimizer, device)
    print(f"Training Loss: {train_loss:.4f}")

Epoch [1/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 82.35batch/s]


Training Loss: 0.0082
Epoch [2/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 82.20batch/s]


Training Loss: 0.0051
Epoch [3/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 82.38batch/s]


Training Loss: 0.0031
Epoch [4/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 82.60batch/s]


Training Loss: 0.0022
Epoch [5/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 83.65batch/s]


Training Loss: 0.0019
Epoch [6/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 82.53batch/s]


Training Loss: 0.0016
Epoch [7/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 82.37batch/s]


Training Loss: 0.0025
Epoch [8/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 82.57batch/s]


Training Loss: 0.0064
Epoch [9/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 82.47batch/s]


Training Loss: 0.0033
Epoch [10/10]


Training: 100%|██████████| 469/469 [00:05<00:00, 80.88batch/s]

Training Loss: 0.0012





In [None]:
# text = "The European Union is headquartered in Brussels"
# text = "EU rejects German call to boycott British lamb"
# predictions = predict(
#     loaded_model,
#     text,
#     loaded_word2idx,
#     loaded_pos2idx,
#     loaded_chunk2idx,
#     loaded_idx2tag,
#     max_len=50,
#     device=device
# )
# print(predictions)


[('EU', ['I-ORG']), ('rejects', ['O']), ('German', ['I-MISC']), ('call', ['O']), ('to', ['O']), ('boycott', ['O']), ('British', ['I-MISC']), ('lamb', ['O'])]


In [26]:
accuracy, predicted_tags, true_tags = evaluate_on_test_exclude_o(model, test_loader, idx2tag, device)

In [27]:
accuracy

0.7433783108445777

In [28]:
list(zip(predicted_tags, true_tags))[-10:]

[('I-LOC', 'I-LOC'),
 ('I-ORG', 'I-MISC'),
 ('I-LOC', 'I-LOC'),
 ('I-ORG', 'I-ORG'),
 ('I-ORG', 'I-ORG'),
 ('I-LOC', 'I-LOC'),
 ('O', 'I-MISC'),
 ('I-MISC', 'I-MISC'),
 ('I-MISC', 'I-MISC'),
 ('I-PER', 'I-PER')]

In [29]:
pos_report, chunk_report, ner_report = evaluate_model(model, test_loader, idx2pos, idx2chunk, idx2tag, device)
print("NER Classification Report:\n", ner_report)

NER Classification Report:
               precision    recall  f1-score   support

       B-LOC       0.00      0.00      0.00         6
      B-MISC       0.00      0.00      0.00         9
       B-ORG       0.00      0.00      0.00         5
       I-LOC       0.90      0.77      0.83      1905
      I-MISC       0.87      0.67      0.75       908
       I-ORG       0.84      0.72      0.77      2480
       I-PER       0.91      0.78      0.84      2691

   micro avg       0.88      0.74      0.81      8004
   macro avg       0.50      0.42      0.46      8004
weighted avg       0.88      0.74      0.80      8004

