Skip to content

Commit

Permalink
#416 refactoring, fixed missed cases of the original implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Nov 25, 2022
1 parent cbc5efd commit 0b3e187
Showing 1 changed file with 12 additions and 13 deletions.
25 changes: 12 additions & 13 deletions arekit/contrib/utils/pipelines/items/text/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,22 @@ def apply_core(self, input_data, pipeline_ctx):
assert(isinstance(pipeline_ctx, PipelineContext))
assert(isinstance(input_data, list))

entities = []
content = []
origin_entities = []
origin_entity_ind = []
parts_to_join = []

for _, part in enumerate(input_data):
if isinstance(part, str):
if isinstance(part, str) and part.strip():
parts_to_join.append(part)
elif isinstance(part, Entity):
# Register first the prior parts were merged.
content.append(" ".join(parts_to_join))
content.append(part.Value)
parts_to_join.clear()
entities.append(part)
# Register entities information for further restoration.
origin_entity_ind.append(len(content))
origin_entities.append(part)
content.append(part.Value)

if len(parts_to_join) > 0:
content.append(" ".join(parts_to_join))
Expand All @@ -45,14 +49,9 @@ def apply_core(self, input_data, pipeline_ctx):
translated_parts = [part.text for part in
self.translator.translate(content, dest=self.__dest, src=self.__src)]

# NOTE: entities always are 1, 3, 5, 7 ... indexed
for part_index in range(len(translated_parts)):
if part_index % 2 == 0:
continue
# Pick up the related entity.
entity = entities[int((part_index-1)/2)]
entity.set_caption(translated_parts[part_index])

translated_parts[part_index] = entity
for entity_ind, entity_part_ind in enumerate(origin_entity_ind):
entity = origin_entities[entity_ind]
entity.set_caption(translated_parts[entity_part_ind])
translated_parts[entity_part_ind] = entity

return translated_parts

0 comments on commit 0b3e187

Please sign in to comment.