From e9ed52153f98bbe4a62c40d3e84b01c01560c041 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Fri, 8 Apr 2022 13:21:51 +0200 Subject: [PATCH] some perf metrics --- README.md | 64 ++++++++++++++++--- example/example/settings.py | 2 +- example/exampleapp/management/__init__.py | 0 .../management/commands/__init__.py | 0 .../exampleapp/management/commands/perf.py | 56 ++++++++++++++++ 5 files changed, 113 insertions(+), 9 deletions(-) create mode 100644 example/exampleapp/management/__init__.py create mode 100644 example/exampleapp/management/commands/__init__.py create mode 100644 example/exampleapp/management/commands/perf.py diff --git a/README.md b/README.md index 4fe4d62..b9e79a6 100644 --- a/README.md +++ b/README.md @@ -42,21 +42,69 @@ MyModel.objects.fast_update(bunch_of_instances, ['field_a', 'field_b', 'field_c' Note that with `fast_update` f-expressions cannot be used anymore. This is a design decision to not penalize update performance by some swiss-army-knife functionality. -If you have f-expressions in your update data, consider re-grouping the update steps and update -fields with expression values with `update` or `bulk_update` instead. +If you have f-expressions in your update data, consider re-grouping the update steps and update those +fields with `update` or `bulk_update` instead. ### copy_update ### -The package also contains an early draft `copy_update`, for table updates with `COPY FROM` -for PostgreSQL. This is not yet fully implemented, currently missing pieces: -- array support -- hstore, range fields -- derive transport encoding from psycopg2 connection settings +This is a PostgreSQL only update implementation based on `COPY FROM`. This runs even faster +than `fast_update` for medium to big changesets. + +Note that this will probably never leave the alpha/PoC-state, as psycopg3 brings great COPY support, +which does a more secure value conversion and even runs faster in the C-version. + +TODO - describe usage and limitations... ### Status ### Currently alpha, left to do: -- finish `copy_update` +- finish `copy_update` (array null cascading, some tests) - some better docs + + +### Performance ### + +There is a management command in the example app testing performance of updates on the `FieldUpdate` model. +Here are some numbers from my laptop (tested with `settings.DEBUG=False`, +db engines freshly bootstrapped from docker as mentioned in `settings.py`): + + +| Postgres | bulk_update | fast_update | bulk/fast | copy_update | bulk/copy | fast/copy | +|----------|-------------|--------------|-----------|-------------|-----------|-----------| +| 10 | 0.0471 | 0.0044 | 10.7 | 0.0083 | 5.7 | 0.5 | +| 100 | 0.4095 | 0.0222 | 18.4 | 0.0216 | 18.9 | 1.0 | +| 1000 | 4.4909 | 0.1571 | 28.6 | 0.0906 | 49.6 | 1.7 | +| 10000 | 86.89 | 1.49 | 58.3 | 0.70 | 124.1 | 2.1 | + +| SQLite | bulk_update | fast_update | ratio | +|--------|-------------|--------------|-------| +| 10 | 0.0443 | 0.0018 | 24.6 | +| 100 | 0.4408 | 0.0108 | 40.8 | +| 1000 | 4.0178 | 0.0971 | 41.4 | +| 10000 | 40.90 | 0.97 | 42.2 | + +| MariaDB | bulk_update | fast_update | ratio | +|---------|-------------|--------------|-------| +| 10 | 0.0448 | 0.0049 | 9.1 | +| 100 | 0.4069 | 0.0252 | 16.1 | +| 1000 | 5.0570 | 0.1759 | 28.7 | +| 10000 | 139.20 | 1.74 | 80.0 | + +| MySQL8 | bulk_update | fast_update | ratio | +|--------|-------------|--------------|-------| +| 10 | 0.0442 | 0.0055 | 8.0 | +| 100 | 0.4132 | 0.0278 | 14.9 | +| 1000 | 5.2495 | 0.2115 | 24.8 | +| 10000 | 136.61 | 1.99 | 68.6 | + + +`fast_update` is at least 8 times faster than `bulk_update`, and keeps making ground for bigger changesets. +This indicates different runtime complexity. `fast_update` grows almost linear for very big numbers of rows +(tested during some perf series against `copy_update` up to 10M), while `bulk_update` grows much faster +(looks quadratic to me, did not further investigate this). + +For very big changesets `copy_update` is the clear winner, and even shows a substantial increase in updated rows/s +(within my test range, as upper estimate this of course cannot grow slower than linear, +as the data pumping will saturate to linear). diff --git a/example/example/settings.py b/example/example/settings.py index 3fce74f..7f09aec 100644 --- a/example/example/settings.py +++ b/example/example/settings.py @@ -27,7 +27,7 @@ SECRET_KEY = 'django-insecure-^smos84&-v=$$z(30ef07@^+mn5dvo=h8*_f3#=qt9=)x-5o-g' # SECURITY WARNING: don't run with debug turned on in production! -# TODO: switch off for perf testing +# NOTE: switch off for perf testing DEBUG = True ALLOWED_HOSTS = [] diff --git a/example/exampleapp/management/__init__.py b/example/exampleapp/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/example/exampleapp/management/commands/__init__.py b/example/exampleapp/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/example/exampleapp/management/commands/perf.py b/example/exampleapp/management/commands/perf.py new file mode 100644 index 0000000..775830b --- /dev/null +++ b/example/exampleapp/management/commands/perf.py @@ -0,0 +1,56 @@ +from django.core.management.base import BaseCommand +from exampleapp.models import FieldUpdate +from exampleapp.tests import EXAMPLE, FIELDS +from time import time, sleep +from django.db import transaction, connection + + +def tester(f, n=10): + runs = [] + for _ in range(n): + # some sleep to put db at rest + sleep(1) + with transaction.atomic(): + FieldUpdate.objects.bulk_create([FieldUpdate() for _ in range(10000)]) + objs = FieldUpdate.objects.all() + changeset = [FieldUpdate(pk=o.pk, **EXAMPLE) for o in objs] + start = time() + f(changeset) + runs.append(time() - start) + FieldUpdate.objects.all().delete() + return sum(runs) / len(runs) + + +class Command(BaseCommand): + def handle(self, *args, **options): + if connection.vendor == 'postgresql': + print('10 instances') + print('bulk_update:', tester(lambda ch : FieldUpdate.objects.bulk_update(ch[:10], FIELDS))) + print('fast_update:', tester(lambda ch : FieldUpdate.objects.fast_update(ch[:10], FIELDS))) + print('copy_update:', tester(lambda ch : FieldUpdate.objects.copy_update(ch[:10], FIELDS))) + print('100 instances') + print('bulk_update:', tester(lambda ch : FieldUpdate.objects.bulk_update(ch[:100], FIELDS))) + print('fast_update:', tester(lambda ch : FieldUpdate.objects.fast_update(ch[:100], FIELDS))) + print('copy_update:', tester(lambda ch : FieldUpdate.objects.copy_update(ch[:100], FIELDS))) + print('1000 instances') + print('bulk_update:', tester(lambda ch : FieldUpdate.objects.bulk_update(ch[:1000], FIELDS))) + print('fast_update:', tester(lambda ch : FieldUpdate.objects.fast_update(ch[:1000], FIELDS))) + print('copy_update:', tester(lambda ch : FieldUpdate.objects.copy_update(ch[:1000], FIELDS))) + print('10000 instances') + print('bulk_update:', tester(lambda ch : FieldUpdate.objects.bulk_update(ch, FIELDS), 2)) + print('fast_update:', tester(lambda ch : FieldUpdate.objects.fast_update(ch, FIELDS), 2)) + print('copy_update:', tester(lambda ch : FieldUpdate.objects.copy_update(ch, FIELDS), 2)) + + else: + print('10 instances') + print('bulk_update:', tester(lambda ch : FieldUpdate.objects.bulk_update(ch[:10], FIELDS))) + print('fast_update:', tester(lambda ch : FieldUpdate.objects.fast_update(ch[:10], FIELDS))) + print('100 instances') + print('bulk_update:', tester(lambda ch : FieldUpdate.objects.bulk_update(ch[:100], FIELDS))) + print('fast_update:', tester(lambda ch : FieldUpdate.objects.fast_update(ch[:100], FIELDS))) + print('1000 instances') + print('bulk_update:', tester(lambda ch : FieldUpdate.objects.bulk_update(ch[:1000], FIELDS))) + print('fast_update:', tester(lambda ch : FieldUpdate.objects.fast_update(ch[:1000], FIELDS))) + print('10000 instances') + print('bulk_update:', tester(lambda ch : FieldUpdate.objects.bulk_update(ch, FIELDS), 2)) + print('fast_update:', tester(lambda ch : FieldUpdate.objects.fast_update(ch, FIELDS), 2))