Skip to content

Commit

Permalink
#340 . Provide checks for entity borders intersections with the left …
Browse files Browse the repository at this point in the history
…and right sentence borders.
  • Loading branch information
nicolay-r committed Jun 18, 2022
1 parent bfa8493 commit 877b8e2
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 8 deletions.
25 changes: 17 additions & 8 deletions arekit/contrib/source/brat/news_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ class BratDocumentSentencesReader(object):

@staticmethod
def from_file(input_file, entities, line_handler=None, skip_entity_func=None):
assert(isinstance(entities, EntityCollection))
assert(callable(skip_entity_func) or skip_entity_func is None)
assert (isinstance(entities, EntityCollection))
assert (callable(skip_entity_func) or skip_entity_func is None)

sentences = BratDocumentSentencesReader.__parse_sentences(input_file=input_file,
line_handler=line_handler)
Expand All @@ -18,7 +18,7 @@ def from_file(input_file, entities, line_handler=None, skip_entity_func=None):

while s_ind < len(sentences) and e_ind < len(entities):
e = entities.get_entity_by_index(e_ind)
assert(isinstance(e, BratEntity))
assert (isinstance(e, BratEntity))

s = sentences[s_ind]

Expand All @@ -35,12 +35,21 @@ def from_file(input_file, entities, line_handler=None, skip_entity_func=None):
e_ind += 1
continue

raise Exception("e_i:{} e:('{}',{},{}), s_i:{}".format(
e_ind,
e.Value.encode('utf-8'), e.CharIndexBegin, e.CharIndexEnd,
s_ind))
if e.CharIndexEnd > s.EndBound:
# Intersects with the right border of sentence
s_ind += 1
continue

assert(e_ind == len(entities))
if e.CharIndexBegin < s.BeginBound:
# Intersects with the left border of sentence
e_ind += 1
continue

raise Exception("e_i:{} e:('{}',{},{}), s_i:{}, s_b: [{} {}]".format(
e_ind,
e.Value, e.CharIndexBegin, e.CharIndexEnd,
s_ind,
s.BeginBound, s.EndBound))

return sentences

Expand Down
8 changes: 8 additions & 0 deletions arekit/contrib/source/brat/sentence.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ def is_entity_goes_after(self, entity):
assert(isinstance(entity, BratEntity))
return entity.CharIndexBegin > self.__end

@property
def BeginBound(self):
return self.__begin

@property
def EndBound(self):
return self.__end

# endregion

# region overriden methods
Expand Down

0 comments on commit 877b8e2

Please sign in to comment.