In [None]:
import spacy
from spacy_ann import AnnLinker

# Load the spaCy model from the output_dir you used from the create_index command
model_dir = "models/ann_linker/"
nlp = spacy.load(model_dir)

# The NER component of the en_core_web_md model doesn't actually recognize the aliases as entities
# so we'll add a spaCy EntityRuler component for now to extract them.
ruler = nlp.create_pipe('entity_ruler')
patterns = [{"label": "SKILL", "pattern": alias} for alias in nlp.get_pipe('ann_linker').kb.get_alias_strings()]
ruler.add_patterns(patterns)
nlp.add_pipe(ruler, before="ann_linker")

In [None]:
doc = nlp("NLP is a highly researched subset of Machine learning.")
[(e.text, e.label_, e.kb_id_) for e in doc.ents]

In [None]:
doc.vector_norm

In [None]:
import srsly
import numpy as np
entities = list(srsly.read_jsonl('data/entities.jsonl'))
natl_doc = nlp.make_doc(entities[2]['description'])
neur_doc = nlp.make_doc(entities[3]['description']) 

In [None]:
entity_encodings = np.asarray([natl_doc.vector, neur_doc.vector])
entity_norm = np.linalg.norm(entity_encodings, axis=1)
entity_norm

In [None]:
sims = np.dot(entity_encodings, doc.vector.T) / (doc.vector_norm * entity_norm)
sims.argmax()

In [None]:
patterns = [
    {"label": "SKILL", "pattern": alias}
    for alias in nlp.get_pipe('ann_linker').kb.get_alias_strings()
]

In [None]:
print([(e.text, e.label_, e.kb_id_) for e in doc.ents])

In [None]:
nlp("More text about nlpe")

In [None]:
ent = list(doc.ents)[0]

In [None]:
getattr(ent._, None, 'default')