Skip to content

Commit

Permalink
Fixes APSL#68 -- improve performance of data migration
Browse files Browse the repository at this point in the history
Tested against a 13GB SQL dump of the email table (~23K records),
within a cgroup with 300M memory limit. The migration completed
in just under 60s.

* Use iterator to avoid loading entire table in memory (for the queryset cache)
* Defer the message content, as it may contain attachments causing excessive
  memory usage - it is not used in the migration anyway.
* Replace loop over log table with 3 separate SQL update queries
  • Loading branch information
sergei-maertens committed Oct 5, 2023
1 parent 2a0d59a commit 624f5b8
Showing 1 changed file with 4 additions and 9 deletions.
13 changes: 4 additions & 9 deletions django_yubin/migrations/0007_auto_20200319_1158.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def migrate_to_queues(apps, schema_editor):
Log = apps.get_model('django_yubin', 'Log')

# Messages without a QueueMessage ara sent.
for message in Message.objects.all():
for message in Message.objects.defer("encoded_message").iterator():
queued = QueuedMessage.objects.filter(message=message).only('date_queued').first()
if queued:
message.status = DBMessage.STATUS_QUEUED
Expand All @@ -35,14 +35,9 @@ def migrate_to_queues(apps, schema_editor):
message.save()

# Set Log actions based on its result
for log in Log.objects.all():
if log.result == RESULT_SENT:
log.action = DBMessage.STATUS_SENT
elif log.result == RESULT_FAILED:
log.action = DBMessage.STATUS_FAILED
elif log.result == RESULT_SKIPPED:
log.action = DBMessage.STATUS_DISCARDED
log.save()
Log.objects.filter(result=RESULT_SENT).update(action=DBMessage.STATUS_SENT)
Log.objects.filter(result=RESULT_FAILED).update(action=DBMessage.STATUS_FAILED)
Log.objects.filter(result=RESULT_SKIPPED).update(action=DBMessage.STATUS_DISCARDED)


class Migration(migrations.Migration):
Expand Down

0 comments on commit 624f5b8

Please sign in to comment.