In [12]:
from datasets import load_dataset
import spacy
from seqeval.metrics import classification_report
from spacy.tokens import DocBin

In [8]:
conll_dataset = load_dataset("conll2003", trust_remote_code=True)

#### spaCy (en_core_web_sm)

In [10]:
def convert_conll_to_spacy(dataset):
    spacy_format = []
    
    for example in dataset:
        words = example["tokens"]
        entities = []
        start_idx = 0
        
        for word, label in zip(words, example["ner_tags"]):
            if label != 0:
                label_text = dataset.features["ner_tags"].feature.int2str(label)
                end_idx = start_idx + len(word)
                entities.append((start_idx, end_idx, label_text))
            
            start_idx += len(word) + 1  
    
        spacy_format.append(( " ".join(words), {"entities": entities}))
    return spacy_format

train_data = convert_conll_to_spacy(conll_dataset["train"])
dev_data = convert_conll_to_spacy(conll_dataset["validation"])

In [11]:
train_data[0]

('EU rejects German call to boycott British lamb .',
 {'entities': [(0, 2, 'B-ORG'), (11, 17, 'B-MISC'), (34, 41, 'B-MISC')]})

In [38]:
nlp = spacy.blank("en")
def create_docbin(data):
    doc_bin = DocBin()
    for text, annotations in data:
        doc = nlp.make_doc(text)
        ents = []
        for start, end, label in annotations["entities"]:
            span = doc.char_span(start, end, label=label, alignment_mode="contract")
            if span is not None:
                ents.append(span)
        doc.ents = ents
        doc_bin.add(doc)
    return doc_bin

train_docbin = create_docbin(train_data)
dev_docbin = create_docbin(dev_data)

train_docbin.to_disk("../train.spacy")
dev_docbin.to_disk("../dev.spacy")

In [39]:
test_data = conll_dataset['test']

In [40]:
test_sentences = [" ".join(example["tokens"]) for example in test_data]
true_labels = [[test_data.features['ner_tags'].feature.int2str(tag) for tag in example["ner_tags"]] for example in test_data]

In [41]:
nlp_ner = spacy.load(r"C:\Users\shiva\OneDrive\Desktop\Projects\Named Entity Recognition\model-best")

In [42]:
predicted_labels = []

for sentence in test_sentences:
    doc = nlp_ner(sentence)
    pred_tags = ["O"] * len(sentence.split())

    for ent in doc.ents:
        start_idx = len(sentence[:ent.start_char].split())
        end_idx = len(sentence[:ent.end_char].split())
        for i in range(start_idx, end_idx):
            pred_tags[i] = ent.label_

    predicted_labels.append(pred_tags)


In [32]:
print("Evaluation Report:")
print(classification_report(true_labels, predicted_labels))

Evaluation Report:
              precision    recall  f1-score   support

         LOC       0.82      0.86      0.84      1668
        MISC       0.71      0.75      0.73       702
         ORG       0.72      0.74      0.73      1661
         PER       0.79      0.83      0.81      1617

   micro avg       0.77      0.80      0.78      5648
   macro avg       0.76      0.79      0.78      5648
weighted avg       0.77      0.80      0.78      5648



In [43]:
print("Evaluation Report:")
print(classification_report(true_labels, predicted_labels))

Evaluation Report:
              precision    recall  f1-score   support

         LOC       0.86      0.83      0.85      1668
        MISC       0.72      0.78      0.75       702
         ORG       0.76      0.72      0.74      1661
         PER       0.76      0.87      0.81      1617

   micro avg       0.78      0.80      0.79      5648
   macro avg       0.78      0.80      0.79      5648
weighted avg       0.79      0.80      0.79      5648

