Skip to content

Commit

Permalink
#437 -- refactored. Improved rows parser. #415 -- removed case of men…
Browse files Browse the repository at this point in the history
…tioned `,` in value.
  • Loading branch information
nicolay-r committed Jan 16, 2023
1 parent 730d535 commit 395671f
Showing 1 changed file with 22 additions and 15 deletions.
37 changes: 22 additions & 15 deletions arekit/contrib/source/brat/annot.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,40 @@ def __non_prefixed_id(value):

@staticmethod
def handle_entity(args):
""" T2 Location 10 23 South America
T1 Location 0 5;16 23 North America
"""
assert(len(args) == 3)

if len(args) < 4:
return None
e_id = int(BratAnnotationParser.__non_prefixed_id(args[0]))
entity_params = args[1].split()

if not str.isdigit(args[2]) or not str.isdigit(args[3]):
if len(entity_params) > 3:
# We do not support the case of a non-continuous entity mentions.
return None

e_id = int(BratAnnotationParser.__non_prefixed_id(args[0]))
e_str_type = args[1]
e_begin = int(args[2])
e_end = int(args[3])
e_value = " ".join([arg.strip().replace(',', '') for arg in args[4:]])
e_str_type, e_begin, e_end = entity_params

return BratEntity(id_in_doc=e_id,
e_type=e_str_type,
index_begin=e_begin,
index_end=e_end,
value=e_value)
index_begin=int(e_begin),
index_end=int(e_end),
value=args[2].strip())

@staticmethod
def handle_relation(args):
""" Example:
R1 Origin Arg1:T3 Arg2:T4
"""

# Parse identifier index.
e_id = args[0][1:]

rel_type = args[1]
source_id = args[2].split(':')[1]
target_id = args[3].split(':')[1]
# Parse relation arguments.
rel_type, source, target = args[1].split()

source_id = source.split(':')[1]
target_id = target.split(':')[1]

return BratRelation(id_in_doc=e_id,
source_id=int(BratAnnotationParser.__non_prefixed_id(source_id)),
Expand All @@ -57,7 +64,7 @@ def parse_annotations(input_file, encoding='utf-8'):
for line in input_file.readlines():
line = line.decode(encoding)

args = line.split()
args = line.split('\t')

record_type = args[0][0]

Expand Down

0 comments on commit 395671f

Please sign in to comment.