From b0c614e267ab515cf14902d51f97994797ccb647 Mon Sep 17 00:00:00 2001 From: pacien Date: Fri, 30 Jun 2023 22:45:09 +0200 Subject: [PATCH] devices: use combined ANY clause for faster cleanup Old device entries for the same user were being removed in individual SQL commands, making the batch take way longer than necessary. This combines the commands into a single one with a IN/ANY clause. Example of log entry before the change, regularly observed with "log_min_duration_statement = 10000" in PostgreSQL's config: LOG: duration: 42538.282 ms statement: DELETE FROM device_lists_stream WHERE user_id = '@someone' AND device_id = 'someid1' AND stream_id < 123456789 ; DELETE FROM device_lists_stream WHERE user_id = '@someone' AND device_id = 'someid2' AND stream_id < 123456789 ; [repeated for each device ID of that user, potentially a lot...] With the patch applied on my instance for the past couple of days, I no longer notice overly long statements of that particular kind. Signed-off-by: pacien --- changelog.d/15861.misc | 1 + synapse/storage/databases/main/devices.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 changelog.d/15861.misc diff --git a/changelog.d/15861.misc b/changelog.d/15861.misc new file mode 100644 index 000000000000..6f320eab81c9 --- /dev/null +++ b/changelog.d/15861.misc @@ -0,0 +1 @@ +Optimised cleanup of old entries in device_lists_stream. diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index f677d048aafb..d9df437e518a 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1950,12 +1950,16 @@ def _add_device_change_to_stream_txn( # Delete older entries in the table, as we really only care about # when the latest change happened. - txn.execute_batch( - """ + cleanup_obsolete_stmt = """ DELETE FROM device_lists_stream - WHERE user_id = ? AND device_id = ? AND stream_id < ? - """, - [(user_id, device_id, min_stream_id) for device_id in device_ids], + WHERE user_id = ? AND stream_id < ? AND %s + """ + device_ids_clause, device_ids_args = make_in_list_sql_clause( + txn.database_engine, "device_id", device_ids + ) + txn.execute( + cleanup_obsolete_stmt % (device_ids_clause,), + [user_id, min_stream_id] + device_ids_args, ) self.db_pool.simple_insert_many_txn(