In [None]:
# Install and load dependencies
!pip install -U spacy scikit-learn matplotlib seaborn
!python -m spacy download en_core_web_sm

Collecting spacy
  Downloading spacy-3.8.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (27 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading spacy-3.8.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (33.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.0/33.0 MB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading matplotlib-3.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: matplotlib, spacy
  Attempting uninstall: matplotlib
    Found existing installation: matplotlib 3.10.0
    Uninstalling matplotlib-3.10.0:
      Successfully uninstalled matplotlib-3.10.0
  Attempting uninstall: spacy
    Found existing installation: spacy 3.8.6
    Uninstalling 

In [None]:
# Imports
import pandas as pd
import spacy
from collections import defaultdict
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML

In [None]:
# Load NER test data
ner_df = pd.read_csv("/content/NER-test.tsv", sep="\t")

# Group tokens into full sentences using sentence_id
sentences = defaultdict(list)
for _, row in ner_df.iterrows():
    sentences[row["sentence_id"]].append((row["token"], row["BIO_NER_tag"]))

# Reconstruct full text sentences
text_sentences = [" ".join([token for token, _ in tokens]) for tokens in sentences.values()]

In [None]:
# Load spaCy NER model
nlp = spacy.load("en_core_web_sm")

# Predict entity tags
true_tags = []
predicted_tags = []
tokens_flat = []
sentence_ids = []

for sentence_id, text in enumerate(text_sentences):
    doc = nlp(text)
    entity_map = {(ent.start_char, ent.end_char): ent.label_ for ent in doc.ents}
    sentence_tokens = sentences[sentence_id]
    token_start = 0

    for token_text, true_tag in sentence_tokens:
        token_index = text.find(token_text, token_start)
        token_end = token_index + len(token_text)
        token_start = token_end

        predicted_label = "O"
        for (start, end), label in entity_map.items():
            if token_index == start:
                predicted_label = f"B-{label}"
                break
            elif start < token_index < end:
                predicted_label = f"I-{label}"
                break

        sentence_ids.append(sentence_id)
        tokens_flat.append(token_text)
        true_tags.append(true_tag)
        predicted_tags.append(predicted_label)

In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Get all unique tags from true and predicted values
all_tags = set(true_tags) | set(predicted_tags)

# Step 2: Keep only those that actually occur in the ground truth and are not 'O'
relevant_tags = [tag for tag in sorted(all_tags) if tag != "O" and true_tags.count(tag) > 0]

# Step 3: Print classification report (filtered)
print("===== spaCy Classification Report (Filtered) =====")
print(classification_report(true_tags, predicted_tags, labels=relevant_tags, digits=4, zero_division=0))

# Step 4: Print overall accuracy (including 'O')
acc = accuracy_score(true_tags, predicted_tags)
print(f"\nOverall Accuracy (including 'O'): {acc:.4f}")

===== spaCy Classification Report (Filtered) =====
               precision    recall  f1-score   support

   B-LOCATION     0.0000    0.0000    0.0000         3
        B-ORG     0.5000    0.5000    0.5000         8
     B-PERSON     0.7500    0.7500    0.7500        12
B-WORK_OF_ART     0.3333    0.1667    0.2222         6
   I-LOCATION     0.0000    0.0000    0.0000         2
        I-ORG     0.5714    0.8000    0.6667         5
     I-PERSON     0.8182    0.6923    0.7500        13
I-WORK_OF_ART     0.3333    0.1250    0.1818         8

    micro avg     0.6364    0.4912    0.5545        57
    macro avg     0.4133    0.3792    0.3838        57
 weighted avg     0.5467    0.4912    0.5065        57


Overall Accuracy (including 'O'): 0.8148


In [None]:
# Save predictions for poster/report
output_df = pd.DataFrame({
    "sentence_id": sentence_ids,
    "token": tokens_flat,
    "true_tag": true_tags,
    "predicted_tag": predicted_tags
})
output_df.to_csv("/content/spacy_ner_results.csv", index=False)
print("Results saved as 'spacy_ner_results.csv'")

Results saved as 'spacy_ner_results.csv'


In [None]:
# Optional: Visualize spaCy-predicted named entities
def highlight_entities(text, doc):
    html = ""
    last = 0
    for ent in doc.ents:
        html += text[last:ent.start_char]
        html += f"<mark style='background-color:#ffff99;' title='{ent.label_}'>{text[ent.start_char:ent.end_char]}</mark>"
        last = ent.end_char
    html += text[last:]
    return html

print("\n===== Highlighted NER Predictions (1–2 sentences) =====")
for sent in text_sentences[:2]:
    doc = nlp(sent)
    display(HTML(highlight_entities(sent, doc)))


===== Highlighted NER Predictions (1–2 sentences) =====
