In [1]:
import spacy

In [2]:
nlp = spacy.load("en_core_web_sm")

In [3]:
from spacy.matcher import Matcher 

In [4]:
matcher = Matcher(nlp.vocab)

In [5]:
# SolarPower
# Solar-Power
# Solar power
pattern1 = [{'LOWER': 'solarpower'}]
pattern2 = [{'LOWER': 'solar'}, {"IS_PUNCT": True}, {'LOWER': 'power'}]
pattern3 = [{"LOWER": 'solar'}, {"LOWER": 'power'}]

In [6]:
matcher.add('SolarPower', None, pattern1, pattern2, pattern3)

In [10]:
doc = nlp(u"The solar power industry continues to grow as solarpower \
increases. Solar-Power is a great thing")


In [11]:
found_matches= matcher(doc)

In [12]:
print(found_matches)

[(8656102463236116519, 1, 3), (8656102463236116519, 8, 9), (8656102463236116519, 11, 14)]


In [14]:
for match_id, start, end in found_matches:
    string_id = nlp.vocab.strings[match_id]  # get string representation
    span = doc[start:end]                    # get the matched span
    print(match_id, string_id, start, end, span.text)

8656102463236116519 SolarPower 1 3 solar power
8656102463236116519 SolarPower 8 9 solarpower
8656102463236116519 SolarPower 11 14 Solar-Power


In [15]:
"""Removing pattern from a matcher """

'Removing pattern from a matcher '

In [16]:
matcher.remove("SolarPower")

In [19]:
patter1 = [{"LOWER": "solarpower"}]
pattern2 = [{'LOWER': 'solar'}, {"IS_PUNCT": True, 'OP': "*"}, {'LOWER': 'power'}]

In [20]:
matcher.add('SolarPower', None, pattern1, pattern2)

In [26]:
doc2 = nlp("Solar--Power is solarpower yay!!")

In [28]:
found_matches = matcher(doc2)

In [29]:
print(found_matches)

[(8656102463236116519, 0, 3), (8656102463236116519, 4, 5)]


In [30]:
from spacy.matcher import PhraseMatcher

In [31]:
matcher = PhraseMatcher(nlp.vocab)

In [40]:
with open('reaganomics.txt') as f:
    doc3 = nlp(f.read())

In [41]:
phrase_list = ['voodoo economics', 'supply-side economics', 
               'trickle-down economics', 'free-market economics']

In [43]:
phrase_patterns =[nlp(text) for text in phrase_list]

In [44]:
matcher.add('EconMatcher', None, *phrase_patterns)

In [45]:
found_matches = matcher(doc3)

In [46]:
found_matches

[(3680293220734633682, 41, 45),
 (3680293220734633682, 49, 53),
 (3680293220734633682, 54, 56),
 (3680293220734633682, 61, 65),
 (3680293220734633682, 673, 677),
 (3680293220734633682, 2984, 2988)]

In [49]:
for match_id, start, end in found_matches:
    string_id = nlp.vocab.strings[match_id]  # get string representation
    span = doc3[start-5:end+4]                    # get the matched span
    print(match_id, string_id, start, end, span.text)

3680293220734633682 EconMatcher 41 45 policies are commonly associated with supply-side economics, referred to as
3680293220734633682 EconMatcher 49 53 economics, referred to as trickle-down economics or voodoo economics by
3680293220734633682 EconMatcher 54 56 trickle-down economics or voodoo economics by political opponents,
3680293220734633682 EconMatcher 61 65 by political opponents, and free-market economics by political advocates.
3680293220734633682 EconMatcher 673 677 attracted a following from the supply-side economics movement, which formed
3680293220734633682 EconMatcher 2984 2988 became widely known as "trickle-down economics", due to


In [50]:
"""Viewing Matches
There are a few ways to fetch the text surrounding a match. The
simplest is to grab a slice of tokens from the doc that is wider
than the match:"""

'Viewing Matches\nThere are a few ways to fetch the text surrounding a match. The\nsimplest is to grab a slice of tokens from the doc that is wider\nthan the match:'

In [52]:
start= 673
end= 677

doc3[start-5: end+10]

attracted a following from the supply-side economics movement, which formed in opposition to Keynesian demand-

In [53]:
sents = [sent for sent in doc3.sents]

In [55]:
print(sents[0].start, sents[0].end)

0 35


In [60]:
"""To print the sentance which contains that phrase"""

'To print the sentance which contains that phrase'

In [56]:
for sent in sents:
    if found_matches[4][1] < sent.end:
        print(sent)
        break

At the same time he attracted a following from the supply-side economics movement, which formed in opposition to Keynesian demand-stimulus economics.
