In [36]:
import spacy
from spacy.matcher import Matcher

nlp = spacy.load("spacy/model-best")
nlp.add_pipe("merge_entities")
matcher = Matcher(nlp.vocab)
# Add match ID "HelloWorld" with no callback and one pattern
pattern = [{"ENT_TYPE": "TECH"},
           {"LOWER": "such"},
           {"LOWER": "as"},
           {"ENT_TYPE": "TECH"}]
matcher.add("Hyponym", [pattern])

doc = nlp("I used DATA CENTER such as DATA CENTER")
matches = matcher(doc)
for match_id, start, end in matches:
    string_id = nlp.vocab.strings[match_id]  # Get string representation
    span = doc[start:end]  # The matched span
    print(span.ents)
    print(match_id, string_id, start, end, span.text)

colors = {"TECH": "#F67DE3"}
options = {"colors": colors}
spacy.displacy.render(doc, style="ent", options=options, jupyter=True)

[DATA CENTER, DATA CENTER]
617219114354912078 Hyponym 2 6 DATA CENTER such as DATA CENTER


In [57]:
def compute_relations(patents, patterns_lhypernym, patterns_rhypernym):
  nlp = spacy.load("spacy/model-best")
  nlp.add_pipe("merge_entities")
  matcher = Matcher(nlp.vocab)

  matcher.add("LHypernym", patterns_lhypernym)
  matcher.add("RHypernym", patterns_rhypernym)

  doc = nlp(patents)
  matches = matcher(doc)
  print(matches)

  res = {"Hypernyms": []}

  for match_id, start, end in matches:
    string_id = nlp.vocab.strings[match_id]
    span = doc[start:end]  # The matched span

    ent1 = span.ents[0]

    for word in span.ents[1:]:
      if string_id == "LHypernym":
        res["Hypernyms"].append((ent1, word))
    
      elif string_id == "RHypernym":
        res["Hypernyms"].append((word, ent1))
      
      else:
        raise ValueError(f"Unexpected match id: {string_id}")

  return res

In [67]:
patterns_rhypernym = [[{POS: "NOUN"},
                        # Optional (0 or 1 matches) of comma
                        {OP: '?', ORTH: ","},
                        # matches if token.lower_ == 'such'
                        {LOWER: "such"},
                        {LOWER: "as"},
                        {POS: "NOUN"}]]

patterns_lhypernym = [[{"ENT_TYPE": "TECH"},
                     {"LOWER": "including"},
                     {"ENT_TYPE": "TECH"}]]

compute_relations("I used DATA CENTER is an equipment rack. DATA CENTER including thermal mass", patterns_lhypernym, patterns_rhypernym)

[(11423821630223946117, 2, 6), (17216739248533801034, 7, 10)]


{'Hypernyms': [(equipment rack, DATA CENTER), (DATA CENTER, thermal mass)]}