In [2]:
!pip install spacy scikit-learn
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m98.5 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [1]:
import spacy
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


In [2]:
nlp = spacy.load("en_core_web_sm")


In [3]:
texts = [
    "Apple CEO Tim Cook visited India in April.",
    "Elon Musk is the CEO of Tesla and SpaceX.",
    "Prime Minister Narendra Modi addressed the UN in New York.",
    "Google was founded in California."
]


In [4]:
true_entities = [
    [("Apple", "ORG"), ("Tim Cook", "PERSON"), ("India", "GPE"), ("April", "DATE")],
    [("Elon Musk", "PERSON"), ("Tesla", "ORG"), ("SpaceX", "ORG")],
    [("Narendra Modi", "PERSON"), ("UN", "ORG"), ("New York", "GPE")],
    [("Google", "ORG"), ("California", "GPE")]
]


In [5]:
predicted_entities = []

for text in texts:
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    predicted_entities.append(entities)

for i in range(len(texts)):
    print(f"\nText: {texts[i]}")
    print("Predicted:", predicted_entities[i])
    print("Actual   :", true_entities[i])



Text: Apple CEO Tim Cook visited India in April.
Predicted: [('Apple', 'ORG'), ('Tim Cook', 'PERSON'), ('India', 'GPE'), ('April', 'DATE')]
Actual   : [('Apple', 'ORG'), ('Tim Cook', 'PERSON'), ('India', 'GPE'), ('April', 'DATE')]

Text: Elon Musk is the CEO of Tesla and SpaceX.
Predicted: [('Elon Musk', 'PERSON'), ('Tesla', 'ORG')]
Actual   : [('Elon Musk', 'PERSON'), ('Tesla', 'ORG'), ('SpaceX', 'ORG')]

Text: Prime Minister Narendra Modi addressed the UN in New York.
Predicted: [('Narendra Modi', 'PERSON'), ('UN', 'ORG'), ('New York', 'GPE')]
Actual   : [('Narendra Modi', 'PERSON'), ('UN', 'ORG'), ('New York', 'GPE')]

Text: Google was founded in California.
Predicted: [('Google', 'ORG'), ('California', 'GPE')]
Actual   : [('Google', 'ORG'), ('California', 'GPE')]


In [6]:
y_true = []
y_pred = []

for i, text in enumerate(texts):
    doc = nlp(text)
    true_dict = dict(true_entities[i])

    for token in doc:
        y_true.append(true_dict.get(token.text, "O"))
        y_pred.append(token.ent_type_ if token.ent_type_ else "O")


In [7]:
precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
accuracy = accuracy_score(y_true, y_pred)

print("Accuracy :", accuracy)
print("Precision:", precision)
print("Recall   :", recall)
print("F1-score :", f1)


Accuracy : 0.7714285714285715
Precision: 0.7
Recall   : 0.7428571428571429
F1-score : 0.7
