# RE benchmark
infer relations, plus graph inference: REBEL, OpenNRE, qwikidata, etc.
  * ThuNLP `OpenNRE`
  * Babelscape `REBEL`
  * ZS-BERT

In [4]:
import json

import spacy
from zshot import PipelineConfig, displacy
from zshot.utils.data_models import Entity, Relation
from zshot.relation_extractor import RelationsExtractorZSRC
from zshot.mentions_extractor import MentionsExtractorSpacy
from zshot.linker import LinkerRegen

In [5]:
with open("../data/wiki_guardians.json", "r") as fh:
    text: str = json.load(fh)["text"]
    paragraph = text.split("\n\n\n")[0]

In [6]:
nlp = spacy.load("en_core_web_sm")

nlp.add_pipe("span_marker", config={"model": "tomaarsen/span-marker-mbert-base-multinerd"})

config = PipelineConfig(
    entities=[
        Entity(name="PERSON", description="People, including fictional"),
        Entity(name="WORK OF ART", description="Titles of books, songs, etc."),
        Entity(name="ORG", description="Companies, agencies, institutions, organizations, etc.")
    ],
    mentions_extractor = MentionsExtractorSpacy(),
    linker=LinkerRegen(),
    relations=[
        Relation(name="takes place in fictional universe", description="the subject is a work describing a fictional universe, i.e. whose plot occurs in this universe"), # https://www.wikidata.org/wiki/Property:P1434
        Relation(name="present in work", description="this (fictional or fictionalized) entity, place, or person appears in that work as part of the narration"), # https://www.wikidata.org/wiki/Property:P1441
        Relation(name="performer", description="actor, musician, band or other performer associated with this role or musical work"), # https://www.wikidata.org/wiki/Property:P175
        Relation(name="director", description="director(s) of film, TV-series, stageplay, video game or similar"), # https://www.wikidata.org/wiki/Property:P57
        Relation(name="followed by", description="immediately following item in a series of which the subject is a part"), # https://www.wikidata.org/wiki/Property:P156
    ], 
    relations_extractor=RelationsExtractorZSRC(thr=0.8),
)
nlp.add_pipe("zshot", config=config, last=True)

config.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 762/762 [00:00<00:00, 1.66MB/s]
Downloading zsrc: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.25G/1.25G [02:46<00:00, 8.10MB/s]


<zshot.zshot.Zshot at 0x2d53b7ad0>

In [7]:
doc = nlp(paragraph)

tokenizer_config.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.37k/2.37k [00:00<00:00, 11.5MB/s]
tokenizer.json: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.42M/2.42M [00:00<00:00, 3.49MB/s]
special_tokens_map.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 147/147 [00:00<00:00, 1.46MB/s]
config.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.50k/1.50k [00:00<00:00, 13.2MB/s]
model.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.95G/2.95G [06:26<00:00, 7.64MB/s]


ValueError: `prefix_allowed_tokens_fn` returned an empty list for batch ID 0.This means that the constraint is unsatisfiable. Please check your implementationof `prefix_allowed_tokens_fn` 

In [None]:
spacy.displacy.render(doc, style="rel", jupyter=True)