💫 Fix issue explosion#3839: Incorrect entity IDs from Matcher with op…

…erators (explosion#3949) * Add regression test for issue explosion#3541 * Add comment on bugfix * Remove incorrect test * Un-xfail test
polm · Aug 18, 2019 · aede1ee · aede1ee
1 parent c846c27
commit aede1ee
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 5 deletions.
diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx
@@ -262,13 +262,13 @@ cdef find_matches(TokenPatternC** patterns, int n, Doc doc, extensions=None,
 
 
 cdef attr_t get_ent_id(const TokenPatternC* pattern) nogil:
+    # There have been a few bugs here.
     # The code was originally designed to always have pattern[1].attrs.value
     # be the ent_id when we get to the end of a pattern. However, Issue #2671
     # showed this wasn't the case when we had a reject-and-continue before a
-    # match. I still don't really understand what's going on here, but this
-    # workaround does resolve the issue.
-    while pattern.attrs.attr != ID and \
-            (pattern.nr_attr > 0 or pattern.nr_extra_attr > 0 or pattern.nr_py > 0):
+    # match.
+    # The patch to #2671 was wrong though, which came up in #3839.
+    while pattern.attrs.attr != ID:
         pattern += 1
     return pattern.attrs.value
 

diff --git a/spacy/tests/regression/test_issue3839.py b/spacy/tests/regression/test_issue3839.py
@@ -6,7 +6,6 @@
 from spacy.tokens import Doc
 
 
-@pytest.mark.xfail
 def test_issue3839(en_vocab):
     """Test that match IDs returned by the matcher are correct, are in the string """
     doc = Doc(en_vocab, words=["terrific", "group", "of", "people"])