Skip to content
This repository has been archived by the owner on Dec 13, 2023. It is now read-only.

Correctly filter out extremities with soft failed prevs #5274

Merged
merged 5 commits into from May 29, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/5274.bugfix
@@ -0,0 +1 @@
Fix bug where we leaked extremities when we soft failed events, leading to performance degradation.
69 changes: 67 additions & 2 deletions synapse/storage/events.py
Expand Up @@ -558,6 +558,11 @@ def _calculate_new_extremities(self, room_id, event_contexts, latest_event_ids):
existing_prevs = yield self._get_events_which_are_prevs(result)
result.difference_update(existing_prevs)

existing_prevs = yield self._get_prevs_before_rejected(
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
e_id for event in new_events for e_id in event.prev_event_ids()
)
result.difference_update(existing_prevs)

defer.returnValue(result)

@defer.inlineCallbacks
Expand All @@ -573,7 +578,7 @@ def _get_events_which_are_prevs(self, event_ids):
"""
results = []

def _get_events(txn, batch):
def _get_events_which_are_prevs_txn(txn, batch):
sql = """
SELECT prev_event_id, internal_metadata
FROM event_edges
Expand All @@ -596,10 +601,70 @@ def _get_events(txn, batch):
)

for chunk in batch_iter(event_ids, 100):
yield self.runInteraction("_get_events_which_are_prevs", _get_events, chunk)
yield self.runInteraction(
"_get_events_which_are_prevs", _get_events_which_are_prevs_txn,
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
chunk,
)

defer.returnValue(results)

@defer.inlineCallbacks
def _get_prevs_before_rejected(self, event_ids):
"""Given a set of events recursively find all prev events that have
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my understanding is that we start by checking the events themselves, rather than their prev events, for rejections. This description implies otherwise.

Also, the idea is that the first line is one-line description of the function (https://www.python.org/dev/peps/pep-0257/#multi-line-docstrings). Can you try and make a one-liner, even if it's a bit opaque? ("Find transitive predecessors of some events, via rejections." ?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my understanding is that we start by checking the events themselves, rather than their prev events, for rejections. This description implies otherwise.

So this starts with the events we're about to persist and checks their prev events, and their prev events, etc. This is the opposite to _get_events_which_are_prevs which works in the other direction. Since we haven't persisted the new event yet we have to pass in their prev events and start from there.

been soft-failed or rejected. Then return those soft failed events
and their prev events.
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved

This is used to find extremities that are ancestors of new events, but
are separated by soft failed events.

Args:
event_ids (Iterable[str]): Events to find prev events for. Note
that these must have already been persisted.

Returns:
Deferred[set[str]]
"""
existing_prevs = set()
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved

def _get_prevs_before_rejected_txn(txn, batch):
to_recursively_check = batch

while to_recursively_check:
sql = """
SELECT
event_id, prev_event_id, internal_metadata,
rejections.event_id IS NOT NULL
FROM event_edges
INNER JOIN events USING (event_id)
LEFT JOIN rejections USING (event_id)
LEFT JOIN event_json USING (event_id)
WHERE
event_id IN (%s)
AND NOT events.outlier
""" % (
",".join("?" for _ in to_recursively_check),
)

txn.execute(sql, to_recursively_check)
to_recursively_check = []

for event_id, prev_event_id, metadata, rejected in txn:
if prev_event_id in existing_prevs:
continue

soft_failed = json.loads(metadata).get("soft_failed")
if soft_failed or rejected:
to_recursively_check.append(prev_event_id)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

surely we should only do this if the event is not in existing_prevs, to save duplication

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a check above if prev_event_id in existing_prevs?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

<- idiot

existing_prevs.add(prev_event_id)

for chunk in batch_iter(event_ids, 100):
yield self.runInteraction(
"_get_prevs_before_rejected", _get_prevs_before_rejected_txn,
chunk,
)

defer.returnValue(existing_prevs)

@defer.inlineCallbacks
def _get_new_state_after_events(
self, room_id, events_context, old_latest_event_ids, new_latest_event_ids
Expand Down