* ⊕ [PhraseMatcher · spaCy API Documentation](https://spacy.io/api/phrasematcher)
* ⊕ [Rule-based matching · spaCy Usage Documentation](https://spacy.io/usage/rule-based-matching#matcher)


In [5]:
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("I live in NewYork")

In [6]:
from spacy.pipeline import EntityRuler
ruler = EntityRuler(nlp)
ruler.add_patterns([{"label": "ORG", "pattern": "Apple"}])
nlp.add_pipe(ruler, before="ner")

In [7]:
# https://spacy.io/usage/v2-1
# Matches "love cats" or "likes flowers"
pattern1 = [{"LEMMA": {"IN": ["like", "love"]}}, {"POS": "NOUN"}]
# Matches tokens of length >= 10
pattern2 = [{"LENGTH": {">=": 10}}]
# Matches custom attribute with regex
pattern3 = [{"_": {"country": {"REGEX": "^([Uu](\.?|nited) ?[Ss](\.?|tates)"}}}]

In [9]:
from spacy.matcher import PhraseMatcher

matcher = PhraseMatcher(nlp.vocab, attr="POS")
matcher.add("PATTERN", None, nlp(u"I love cats"))
doc = nlp(u"You like dogs")
matches = matcher(doc)
matches

[(11920309760829426267, 0, 3)]

In [10]:
import spacy
from spacy.matcher import Matcher

# nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab)
# Add match ID "HelloWorld" with no callback and one pattern
pattern = [{"LOWER": "hello"}, {"IS_PUNCT": True}, {"LOWER": "world"}]
matcher.add("HelloWorld", None, pattern)

doc = nlp(u"Hello, world! Hello world!")
matches = matcher(doc)
for match_id, start, end in matches:
    string_id = nlp.vocab.strings[match_id]  # Get string representation
    span = doc[start:end]  # The matched span
    print(match_id, string_id, start, end, span.text)

15578876784678163569 HelloWorld 0 3 Hello, world
