matrix-org · clokep · Feb 1, 2023 · Jan 25, 2023 · Jan 25, 2023 · Jan 25, 2023
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.70.0 where the background updates to add non-thread unique indexes on receipts could fail when upgrading from 1.67.0 or earlier.
@@ -941,34 +941,39 @@ async def _background_receipts_linearized_unique_index(
         receipts."""
 
         def _remote_duplicate_receipts_txn(txn: LoggingTransaction) -> None:
+            if isinstance(self.database_engine, PostgresEngine):
+                ROW_ID_NAME = "ctid"
+            else:
+                ROW_ID_NAME = "rowid"
+
             # Identify any duplicate receipts arising from
             # https://github.com/matrix-org/synapse/issues/14406.
             # We expect the following query to use the per-thread receipt index and take
             # less than a minute.
-            sql = """
-                SELECT MAX(stream_id), room_id, receipt_type, user_id
+            sql = f"""
+                SELECT MAX(stream_id), MAX({ROW_ID_NAME}), room_id, receipt_type, user_id
                 FROM receipts_linearized
                 WHERE thread_id IS NULL
                 GROUP BY room_id, receipt_type, user_id
                 HAVING COUNT(*) > 1
             """
             txn.execute(sql)
-            duplicate_keys = cast(List[Tuple[int, str, str, str]], list(txn))
+            duplicate_keys = cast(List[Tuple[int, int, str, str, str]], list(txn))
 
             # Then remove duplicate receipts, keeping the one with the highest
-            # `stream_id`. There should only be a single receipt with any given
-            # `stream_id`.
-            for max_stream_id, room_id, receipt_type, user_id in duplicate_keys:
-                sql = """
+            # `stream_id`. Since there might be duplicate rows with the same
+            # `stream_id`, we delete by the rowid instead.
+            for _, row_id, room_id, receipt_type, user_id in duplicate_keys:
+                sql = f"""
                     DELETE FROM receipts_linearized
                     WHERE
                         room_id = ? AND
                         receipt_type = ? AND
                         user_id = ? AND
                         thread_id IS NULL AND
-                        stream_id < ?
+                        {ROW_ID_NAME} != ?
                 """
-                txn.execute(sql, (room_id, receipt_type, user_id, max_stream_id))
+                txn.execute(sql, (room_id, receipt_type, user_id, row_id))
 
         await self.db_pool.runInteraction(
             self.RECEIPTS_LINEARIZED_UNIQUE_INDEX_UPDATE_NAME,

@@ -168,7 +168,9 @@ def test_background_receipts_linearized_unique_index(self) -> None:
                     {"stream_id": 6, "event_id": "$some_event"},
                 ],
                 (self.other_room_id, "m.read", self.user_id): [
-                    {"stream_id": 7, "event_id": "$some_event"}
+                    # It is possible for stream IDs to be duplicated.
+                    {"stream_id": 7, "event_id": "$some_event"},
+                    {"stream_id": 7, "event_id": "$some_event"},
                 ],
             },
             expected_unique_receipts={