Skip to content

Commit

Permalink
💫 Fix issue explosion#3839: Incorrect entity IDs from Matcher with op…
Browse files Browse the repository at this point in the history
…erators (explosion#3949)

* Add regression test for issue explosion#3541

* Add comment on bugfix

* Remove incorrect test

* Un-xfail test
  • Loading branch information
honnibal authored and polm committed Aug 18, 2019
1 parent c846c27 commit aede1ee
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 5 deletions.
8 changes: 4 additions & 4 deletions spacy/matcher/matcher.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -262,13 +262,13 @@ cdef find_matches(TokenPatternC** patterns, int n, Doc doc, extensions=None,


cdef attr_t get_ent_id(const TokenPatternC* pattern) nogil:
# There have been a few bugs here.
# The code was originally designed to always have pattern[1].attrs.value
# be the ent_id when we get to the end of a pattern. However, Issue #2671
# showed this wasn't the case when we had a reject-and-continue before a
# match. I still don't really understand what's going on here, but this
# workaround does resolve the issue.
while pattern.attrs.attr != ID and \
(pattern.nr_attr > 0 or pattern.nr_extra_attr > 0 or pattern.nr_py > 0):
# match.
# The patch to #2671 was wrong though, which came up in #3839.
while pattern.attrs.attr != ID:
pattern += 1
return pattern.attrs.value

Expand Down
1 change: 0 additions & 1 deletion spacy/tests/regression/test_issue3839.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from spacy.tokens import Doc


@pytest.mark.xfail
def test_issue3839(en_vocab):
"""Test that match IDs returned by the matcher are correct, are in the string """
doc = Doc(en_vocab, words=["terrific", "group", "of", "people"])
Expand Down

0 comments on commit aede1ee

Please sign in to comment.