In [1]:
import requests
from gatenlp import Document
import numpy as np
import base64

In [14]:
'Smith has been charged with robbing a bank in London.'
# in Italian
text = 'Smith è stato accusato di aver rapinato una banca a Londra.'

In [15]:
doc = Document(text)
doc

In [16]:
# NER
res = requests.post('http://localhost:9000/api/spacyner', json=doc.to_dict())
assert res.ok

In [17]:
# tick the types to highlight the mentions in the text
doc = Document.from_dict(res.json())
doc

In [18]:
# NEL - biencoder
res = requests.post('http://localhost:9001/api/blink/biencoder/mention/doc', json=doc.to_dict())
assert res.ok

In [19]:
# the mention is embedded into a vector (encoded in base64)
def vector_decode(s, dtype=np.float32):
    buffer = base64.b64decode(s)
    v = np.frombuffer(buffer, dtype=dtype)
    return v

doc = Document.from_dict(res.json())
annset = doc.annset('entities_spacy_v0.1.0')
for annotation in annset:
    encoding = annotation.features['linking']['encoding']
    break

print('base64 encoding:', encoding[:100])
print('vector:', vector_decode(encoding)[:20])

base64 encoding: T+4evsELQb5tMMM+lxipPHklz74QjE++vyuKvgwSjTxHVNu+wlelPtGTCb51YBK/c2apvfXqJz4wFPS+zW1XP+kQu70EJjy/dR9w
vector: [-0.15520595 -0.1885214   0.38122883  0.02064161 -0.40458277 -0.20268273
 -0.269865    0.01722052 -0.42837736  0.32293516 -0.13435294 -0.5717843
 -0.08271494  0.16398223 -0.47671652  0.8415192  -0.09134085 -0.7349551
 -0.234495    0.00331359]


In [20]:
# NEL - indexer for dense retrieval
res = requests.post('http://localhost:9002/api/indexer/search/doc', json=doc.to_dict())
assert res.ok

In [21]:
doc = Document.from_dict(res.json())
annset = doc.annset('entities_spacy_v0.1.0')
for annotation in annset:
    mention = doc.text[annotation.start:annotation.end]
    print(mention, '-->', annotation.features['linking']['top_candidate']['title'], annotation.features['linking']['top_candidate']['url'])

Smith --> Norman Smith https://it.wikipedia.org/wiki?curid=1048312
Londra --> Londra https://it.wikipedia.org/wiki?curid=2279266


In [22]:
# NIL prediction
res = requests.post('http://localhost:9003/api/nilprediction/doc', json=doc.to_dict())
assert res.ok

In [28]:
doc = Document.from_dict(res.json())
annset = doc.annset('entities_spacy_v0.1.0')
for annotation in annset:
    mention = doc.text[annotation.start:annotation.end]
    is_nil = annotation.features['linking']['is_nil']
    nil_score = annotation.features['linking']['nil_score']
    print(nil_score)
    if is_nil:
        print(mention, '--> NIL.', 'score =', nil_score, '(NIL=0 <--> 1=not-NIL)')
    else:
        print(mention, '-->', annotation.features['linking']['top_candidate']['title'], annotation.features['linking']['top_candidate']['url'])

0.3406641822630679
Smith --> NIL. score = 0.3406641822630679 (NIL=0 <--> 1=not-NIL)
0.7235149579419616
Londra --> Londra https://it.wikipedia.org/wiki?curid=2279266
