Skip to content

Commit

Permalink
Speed up PO compiling when there are a lot of entries to remove
Browse files Browse the repository at this point in the history
polib implements deserialized po files as subclasses of `list`. Thus,
hen you want to remove an entry, you can either use `.remove` or `del`.
`.remove` (the one we were using so far) does a serial search in the po
file until it finds the entry to remove. If the po file is big and you
are doing lots of removals, this can become unbearable.

This implementation instead uses `del`. While iterating through the po
file, we add the indexes of the entries we want to remove in a list.
After the iteration is finished, we go through that list in reverse
order and use `del` to remove the entries. Since `del` does not do a
serial search, it is noticeably faster.
  • Loading branch information
kbairak committed Mar 19, 2018
1 parent 4ff2d73 commit 14fa8f7
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions openformats/formats/po.py
Expand Up @@ -111,7 +111,6 @@ def _get_string_data(self, entry):
'flags': ', '.join(entry.flags),
'occurrences': occurrences if occurrences else None,
'developer_comment': entry_comments,
'context': entry.msgctxt if entry.msgctxt else ""
}

string = self._get_string(entry, pluralized)
Expand Down Expand Up @@ -270,7 +269,8 @@ def compile(self, template, stringset, **kwargs):
next_string = next(stringset, None)

po = polib.pofile(template)
for entry in list(po):
indexes_to_remove = []
for i, entry in enumerate(po):
if next_string is not None:
is_plural = True if entry.msgid_plural.strip() else False
if is_plural:
Expand All @@ -280,7 +280,8 @@ def compile(self, template, stringset, **kwargs):
if compiled:
next_string = next(stringset, None)
continue
po.remove(entry)
indexes_to_remove.append(i)
self._smart_remove(po, indexes_to_remove)
return unicode(po)

def _compile_entry(self, entry, next_string):
Expand Down Expand Up @@ -309,6 +310,10 @@ def _compile_plural_entry(self, entry, next_string):
return True
return False

def _smart_remove(self, po, indexes_to_remove):
for i in reversed(indexes_to_remove):
del po[i]

@staticmethod
def _format_occurrences(occurrences):
"""Format the occurrences and return them.
Expand Down

0 comments on commit 14fa8f7

Please sign in to comment.