# Import Required Libraries
This section imports the necessary libraries for working with SpaCy, visualization, and the test data conversion utilities.

In [1]:
import spacy
from spacy.tokens import DocBin
from spacy import displacy
import pandas as pd
import os
from pathlib import Path

# If your converter is in the module, import it as well
from webanno_spacy_converter.converters.webanno_to_spacy import AnnotationSentencesToDocBinConverterV2

# Load Test Data
Load a .spacy file generated from your test data. This file contains serialized SpaCy Doc objects.

In [None]:
# Path to the test .spacy file (adjust as needed)
docbin_path = Path() / "test_data" / "examplev2.spacy"

# Load the DocBin
nlp = spacy.load("my_nlp_el_cnn1")  
doc_bin = DocBin().from_disk(str(docbin_path))
docs = list(doc_bin.get_docs(nlp.vocab))
print(f"Loaded {len(docs)} SpaCy Doc objects from {docbin_path}")

OSError: [E052] Can't find model directory: ..\my_nlp_el_cnn1

# Convert Test Data to SpaCy Docs
If you have annotated sentences and want to convert them to SpaCy Docs, use the converter here. (This is optional if you already have a .spacy file.)

In [None]:
# Example (uncomment and adjust if you want to run conversion):
# from webanno_spacy_converter.parsers.tsv_parser_v3 import WebAnnoNELParser
# parser = WebAnnoNELParser("../test_data/output.tsv")
# parser.parse()
# converter = AnnotationSentencesToDocBinConverterV2(nlp)
# doc_bin = converter.convert(parser.sentences)
# docs = list(doc_bin.get_docs(nlp.vocab))

# Visualize SpaCy Docs
Use SpaCy's displacy to visualize named entities and dependency parses for the loaded documents.

In [None]:
# Visualize named entities in the first document (if any)
if docs and docs[0].ents:
    displacy.render(docs[0], style="ent", jupyter=True)
else:
    print("No named entities found in the first document.")

# Inspect Named Entities
Programmatically inspect the named entities, their labels, and any linked knowledge base IDs in the SpaCy Docs.

In [None]:
# Collect entity details from the first document
if docs:
    ents_data = [
        {
            "Text": ent.text,
            "Label": ent.label_,
            "KB_ID": getattr(ent, "kb_id_", None)
        }
        for ent in docs[0].ents
    ]
    if ents_data:
        df = pd.DataFrame(ents_data)
        display(df)
    else:
        print("No entities found in the first document.")
else:
    print("No documents loaded.")