In [1]:
import spacy
from buzz_el.entity_matcher import FuzzyRuler

In [2]:
# create a minimum spaCy Language (i.e., a pipeline)
nlp = spacy.load(
        "en_core_web_sm",
        exclude=["tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer", "ner"], # we make the pipeline as small as possible for our little demo
    )

In [3]:
text = "I like chese and met."
doc=nlp(text)

In [4]:
ruler = FuzzyRuler(nlp)
patterns = [
    {"label":"KG_ENT","pattern":"cheese","id":"CHEESE"},
    {"label":"KG_ENT","pattern":"meat","id":"MEAT"}
]
ruler.add_patterns(patterns)

In [5]:
ruler(doc)

I like chese and met.

In [6]:
for span in doc.spans['fuzzy']:
    print(span.text, span.start, span.end, span.id_)

chese 2 3 CHEESE
met 4 5 MEAT


----

In [7]:
from pathlib import Path

from buzz_el.graph import RDFGraphLoader
from buzz_el.entity_matcher import EntityMatcher

In [8]:
pizza_kg_filepath = Path("./data/pizzas_bisou_sample.ttl")

In [15]:
doc = "I like ricotta cream base pizza but ricota cream base can be fat so sometimes I eat spinach but spinack are not so good."

In [16]:
nlp = spacy.load(
        "en_core_web_sm",
        exclude=["tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer", "ner"], # we make the pipeline as small as possible for our little demo
    )

doc = nlp(doc)

In [17]:
kg_loader = RDFGraphLoader(
    kg_file_path=pizza_kg_filepath,
    label_properties={"skos:altLabel", "rdfs:label", "skos:prefLabel"}, # define what are the annotation properties in RDF KG
    lang_filter_tag="en" # optionally define a language to focus on
)

pizza_kg = kg_loader.build_knowledge_graph()

In [18]:
entity_matcher = EntityMatcher(
    knowledge_graph=pizza_kg,
    spacy_model=nlp,
    use_fuzzy=True
)

In [19]:
doc = entity_matcher(doc)

In [20]:
doc.spans

{'string': [ricotta cream base, spinach], 'fuzzy': [ricotta cream base, ricota cream base, spinach, spinack]}