Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 30 additions & 28 deletions src/moin/storage/middleware/indexing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright: 2011 MoinMoin:RonnyPfannschmidt
# Copyright: 2011 MoinMoin:ThomasWaldmann
# Copyright: 2011 MoinMoin:MichaelMayorov
# Copyright: 2024 MoinMoin:UlrichB
# Copyright: 2024-2025 MoinMoin:UlrichB
# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.

"""
Expand Down Expand Up @@ -105,29 +105,6 @@
INDEXER_TIMEOUT = 20.0


def get_indexer(fn, **kw):
"""
Return a valid indexer or raise a KeyError.

Under heavy loads, the Whoosh AsyncWriter writer may be delayed in writing
indexes to storage. Try several times before failing.

FIXME: runs into timeout for a non-existing revid

:param fn: the indexer function
:param **kw: "revid" is required, index name optional
"""
until = time.time() + INDEXER_TIMEOUT
while True:
indexer = fn(**kw)
if indexer is not None:
break
time.sleep(2)
if time.time() > until:
raise KeyError(kw.get("revid", "") + " - server overload or corrupt index")
return indexer


def parent_names(names):
"""
Compute list of parent names (same order as in names, but no dupes)
Expand Down Expand Up @@ -1045,6 +1022,31 @@ def existing_item(self, **query):
"""
return Item.existing(self, **query)

def get_document(self, revid: str, retry: bool = False, **kwargs):
"""
Return a valid indexer document or raise a KeyError.

Under heavy loads, the Whoosh AsyncWriter writer may be delayed in writing
indexes to storage. Try several times before failing.

:param revid: revision to search
:param retry: retry backend search if document not found, required when server load is high
:param kwargs: idx_name, name of index used for searching (optional)
"""
until = time.monotonic() + INDEXER_TIMEOUT
while True:
doc = self._document(revid=revid, **kwargs)
if doc is not None:
break
if not retry:
msg = f"revid: {revid} not found. Please check meta data and indexes"
raise KeyError(msg)
if time.monotonic() > until:
msg = f"revid: {revid} - Server overload may have corrupted the index; rebuild it."
raise KeyError(msg)
time.sleep(2)
return doc


class PropertiesMixin:
"""
Expand Down Expand Up @@ -1357,9 +1359,9 @@ def store_revision(
self.indexer.index_revision(meta, content, backend_name, force_latest=not overwrite)
gc.collect() # triggers close of index files from is_latest search
if not overwrite:
self._current = get_indexer(self.indexer._document, revid=revid)
self._current = self.indexer.get_document(revid=revid, retry=True)
if return_rev:
return Revision(self, revid)
return Revision(self, revid, retry=True)

def store_all_revisions(self, meta, data):
"""
Expand Down Expand Up @@ -1404,13 +1406,13 @@ class Revision(PropertiesMixin):
An existing revision (exists in the backend).
"""

def __init__(self, item: Item, revid: str, doc=None, name=None):
def __init__(self, item: Item, revid: str, doc=None, name=None, retry=False):
is_current = revid == CURRENT
if doc is None:
if is_current:
doc = item._current
else:
doc = get_indexer(item.indexer._document, idx_name=ALL_REVS, revid=revid)
doc = item.indexer.get_document(idx_name=ALL_REVS, revid=revid, retry=retry)

if is_current:
revid = doc.get(REVID)
Expand Down