In [1]:
import spacy
nlp=spacy.load("en_core_web_lg")
text = "Tennis champion Emerson was expected to win Wimbledon."
doc = nlp(text)
for ent in doc.ents:
    print(f"Named entity: '{ent.text}'' with label '{ent.label_}'")

Named entity: 'Emerson'' with label 'PERSON'
Named entity: 'Wimbledon'' with label 'EVENT'


In [2]:
import csv
from pathlib import Path

def load_entities():
    entities_loc = Path.cwd()/"entities.csv"
    
    names=dict()
    descriptions=dict()
    with entities_loc.open("r", encoding="utf8") as csvfile:
        csvreader=csv.reader(csvfile, delimiter=",")
        for row in csvreader:
            qid=row[0]
            name=row[1]
            desc=row[2]
            names[qid] = name
            descriptions[qid] = desc
    return names,descriptions

In [3]:
name_dict,desc_dict = load_entities()
for QID in name_dict.keys():
    print(f"{QID}, name={name_dict[QID]}, desc={desc_dict[QID]}")

Q312545, name=Roy Stanley Emerson, desc=Australian tennis player
Q48226, name=Ralph Waldo Emerson, desc=American philosopher, essayist, and poet
Q215952, name=Emerson Ferreira da Rosa, desc=Brazilian footballer


In [4]:
from spacy.kb import KnowledgeBase
kb = KnowledgeBase(vocab=nlp.vocab, entity_vector_length=300)

In [5]:
for qid, desc in desc_dict.items():
    desc_doc = nlp(desc)
    print([(tes.text, tes.label_) for tes in desc_doc.ents])
    desc_enc = desc_doc.vector
    print(desc_enc)
    kb.add_entity(entity=qid, entity_vector=desc_enc, freq=342)

[('Australian', 'NORP')]
[-2.0126212   0.7371667   2.687867    0.713214    3.6237335   4.198847
 -1.6916332   1.5568967   0.44500002 -0.8755867   2.9772568  -0.02532667
 -4.169377   -1.6765167   0.94281    -0.46407     1.3376999   0.8645666
  4.414767    0.7207001   1.6133766  -1.14809    -0.16435502 -0.8926367
  3.2036133   0.20210898 -4.822533    0.87819666  4.4561      0.06700007
  0.67255     0.7562333   1.0755643  -2.0366      0.8213      2.2875865
 -0.06310002  2.0329502  -3.6845667  -2.1471002  -0.7505334   2.6117866
  1.2086767   1.7775999   1.3428      1.43422    -3.054433    0.27170005
 -1.5897166   1.0930433   1.2783333   1.7552333  -0.6505834  -0.9075567
  0.9924666   1.5230899  -2.1136334   0.56397337 -1.5153967   1.1140201
  1.3730665  -0.11553331 -3.0591      0.7963335   2.9485333   0.2562
 -3.7404335  -1.7351333  -2.0280466   3.6766598   2.7449      0.64237005
 -0.6825233  -0.02625433 -0.02886661  0.8666627  -3.1350667   5.1351
 -2.3292468   1.5682532  -1.6361799  -4.65