In [2]:
# !pip install -U spacy-transformers

In [None]:
import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.scorer import Scorer
import tqdm

nlp = spacy.load("/content/drive/MyDrive/filtered_balanced_ner")

doc_bin = DocBin().from_disk("/content/drive/MyDrive/spacy_files_for_ner/balanced_ner/test.spacy")
gold_docs = list(doc_bin.get_docs(nlp.vocab))

tp, fp, fn = [], [], []

In [None]:
for gold_doc in tqdm.tqdm(gold_docs):
    pred_doc = nlp(gold_doc.text)
    ex = Example(pred_doc, gold_doc)

    gold_ents = {(ent.start_char, ent.end_char, ent.label_) for ent in ex.reference.ents if ent.label_ == "JOB"}
    pred_ents = {(ent.start_char, ent.end_char, ent.label_) for ent in ex.predicted.ents if ent.label_ == "JOB"}


    matched = gold_ents & pred_ents
    missed = gold_ents - pred_ents
    spurious = pred_ents - gold_ents

    # print("Matched:", matched)
    # print("Missed:", missed)
    # print("Spurious:", spurious)

    tp.extend([ent for ent in ex.predicted.ents if (ent.start_char, ent.end_char, ent.label_) in matched])
    fn.extend([ent for ent in ex.reference.ents if (ent.start_char, ent.end_char, ent.label_) in missed])
    fp.extend([ent for ent in ex.predicted.ents if (ent.start_char, ent.end_char, ent.label_) in spurious])

100%|██████████| 5308/5308 [42:49<00:00,  2.07it/s]


In [None]:
def save_entities(spans, out_path):
    with open(out_path, "w", encoding="utf-8") as fw:
        for span in spans:
            fw.write(span.text + "\n")

In [None]:
save_entities(tp, "/content/drive/MyDrive/evaluation_results/balanced_tp.txt")
save_entities(fn, "/content/drive/MyDrive/evaluation_results/balanced_fn.txt")
save_entities(fp, "/content/drive/MyDrive/evaluation_results/balanced_fp.txt")

print("True Positives → tp.txt")
print("False Negatives → fn.txt")
print("False Positives → fp.txt")

Saved:
  • True Positives → tp.txt
  • False Negatives → fn.txt
  • False Positives → fp.txt


In [None]:
import spacy
import spacy_transformers
from spacy.tokens import DocBin
from spacy.training import Example
from spacy.scorer import Scorer
import tqdm

nlp = spacy.load("/content/drive/MyDrive/retrained_ner_origin")

doc_bin = DocBin().from_disk("/content/drive/MyDrive/spacy_files_for_ner/balanced_ner/test.spacy")
gold_docs = list(doc_bin.get_docs(nlp.vocab))

tp, fp, fn = [], [], []

In [None]:
for gold_doc in tqdm.tqdm(gold_docs):
    pred_doc = nlp(gold_doc.text)
    ex = Example(pred_doc, gold_doc)

    gold_ents = {(ent.start_char, ent.end_char, ent.label_) for ent in ex.reference.ents if ent.label_ == "JOB"}
    pred_ents = {(ent.start_char, ent.end_char, ent.label_) for ent in ex.predicted.ents if ent.label_ == "JOB"}

    matched = gold_ents & pred_ents
    missed = gold_ents - pred_ents
    spurious = pred_ents - gold_ents

    tp.extend([ent for ent in ex.predicted.ents if (ent.start_char, ent.end_char, ent.label_) in matched])
    fn.extend([ent for ent in ex.reference.ents if (ent.start_char, ent.end_char, ent.label_) in missed])
    fp.extend([ent for ent in ex.predicted.ents if (ent.start_char, ent.end_char, ent.label_) in spurious])

In [None]:
save_entities(tp, "/content/drive/MyDrive/evaluation_results/orig_tp.txt")
save_entities(fn, "/content/drive/MyDrive/evaluation_results/orig_fn.txt")
save_entities(fp, "/content/drive/MyDrive/evaluation_results/orig_fp.txt")