Skip to content
This repository has been archived by the owner on Jun 9, 2021. It is now read-only.

Commit

Permalink
Adds a method to run a SpaCy matcher over the doc.
Browse files Browse the repository at this point in the history
  • Loading branch information
dodijk committed Aug 28, 2018
1 parent e7c3ae7 commit efd3a10
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions textpipe/doc.py
Expand Up @@ -161,6 +161,19 @@ def find_ents(self, model_name=None):
lang = self.hint_language if self.language == 'un' else self.language
return list({(ent.text, ent.label_) for ent in self._load_spacy_doc(lang, model_name).ents})

def match(self, matcher):
"""
Run a SpaCy matcher over the cleaned content
>>> import spacy.matcher
>>> matcher = spacy.matcher.Matcher(spacy.lang.en.English().vocab)
>>> matcher.add('HASHTAG', None, [{'ORTH': '#'}, {'IS_ASCII': True}])
>>> Doc('Test with #hashtag').match(matcher)
[('#hashtag', 'HASHTAG')]
"""
return [(self._spacy_doc[start:end].text, matcher.vocab.strings[match_id])
for match_id, start, end in matcher(self._spacy_doc)]

@property
def nsents(self):
"""
Expand Down

0 comments on commit efd3a10

Please sign in to comment.