In [None]:
import spacy
import random
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, accuracy_score


In [3]:
nlp = spacy.load("en_core_web_sm")
print("spaCy NER model loaded")

spaCy NER model loaded


In [2]:
texts = [
    "Apple CEO Tim Cook visited India in 2023.",
    "Elon Musk acquired Twitter for $44 billion.",
    "The Prime Minister of India met Google executives in New Delhi."
]


In [4]:
for text in texts:
    doc = nlp(text)
    print(text)
    for ent in doc.ents:
        print(ent.text, "->", ent.label_)
    print()


Apple CEO Tim Cook visited India in 2023.
Apple -> ORG
Tim Cook -> PERSON
India -> GPE
2023 -> DATE

Elon Musk acquired Twitter for $44 billion.
Elon Musk -> PERSON
Twitter -> PERSON
$44 billion -> MONEY

The Prime Minister of India met Google executives in New Delhi.
India -> GPE
Google -> ORG
New Delhi -> GPE



In [5]:
test_data = [
    ("Apple CEO Tim Cook visited India in 2023.",
     [(0, 5, "ORG"), (10, 18, "PERSON"), (27, 32, "GPE"), (36, 40, "DATE")]),

    ("Elon Musk acquired Twitter for $44 billion.",
     [(0, 9, "PERSON"), (19, 26, "ORG"), (31, 43, "MONEY")]),

    ("Google announced new AI features in New York.",
     [(0, 6, "ORG"), (39, 47, "GPE")])
]


In [6]:
y_true = []
y_pred = []

for text, annotations in test_data:
    doc = nlp(text)

    true_entities = {(s, e, l) for s, e, l in annotations}
    pred_entities = {(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents}

    # True entities
    for ent in true_entities:
        y_true.append(ent[2])
        y_pred.append(ent[2] if ent in pred_entities else "O")

    # Extra predicted entities
    for ent in pred_entities:
        if ent not in true_entities:
            y_true.append("O")
            y_pred.append(ent[2])


In [7]:
precision, recall, f1, _ = precision_recall_fscore_support(
    y_true, y_pred, average="macro", zero_division=0
)

accuracy = accuracy_score(y_true, y_pred)

print("Accuracy :", round(accuracy, 3))
print("Precision:", round(precision, 3))
print("Recall   :", round(recall, 3))
print("F1 Score :", round(f1, 3))


Accuracy : 0.462
Precision: 0.5
Recall   : 0.528
F1 Score : 0.5
