From 61bbbcaf3022822d705bcb74620838f4c9f42e9e Mon Sep 17 00:00:00 2001 From: UlrichB22 <97119703+UlrichB22@users.noreply.github.com> Date: Sat, 27 Sep 2025 22:00:15 +0200 Subject: [PATCH 1/3] Rename and rework get_indexer function --- src/moin/storage/middleware/indexing.py | 34 ++++++++++++++----------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/moin/storage/middleware/indexing.py b/src/moin/storage/middleware/indexing.py index 54ed61318..11709668a 100644 --- a/src/moin/storage/middleware/indexing.py +++ b/src/moin/storage/middleware/indexing.py @@ -1,7 +1,7 @@ # Copyright: 2011 MoinMoin:RonnyPfannschmidt # Copyright: 2011 MoinMoin:ThomasWaldmann # Copyright: 2011 MoinMoin:MichaelMayorov -# Copyright: 2024 MoinMoin:UlrichB +# Copyright: 2024-2025 MoinMoin:UlrichB # License: GNU GPL v2 (or any later version), see LICENSE.txt for details. """ @@ -105,27 +105,31 @@ INDEXER_TIMEOUT = 20.0 -def get_indexer(fn, **kw): +def get_doc(fn, revid, retry=False, **kwargs): """ - Return a valid indexer or raise a KeyError. + Return a valid document or raise a KeyError. Under heavy loads, the Whoosh AsyncWriter writer may be delayed in writing indexes to storage. Try several times before failing. - FIXME: runs into timeout for a non-existing revid - :param fn: the indexer function - :param **kw: "revid" is required, index name optional + :param revid: revision to search + :param retry: retry backend search if document not found, required when server load is high + :param kwargs: idx_name, name of index used for searching (optional) """ until = time.time() + INDEXER_TIMEOUT while True: - indexer = fn(**kw) - if indexer is not None: + doc = fn(revid=revid, **kwargs) + if doc is not None: break - time.sleep(2) + if not retry: + msg = f"revid: {revid} not found. Please check meta data and indexes" + raise KeyError(msg) if time.time() > until: - raise KeyError(kw.get("revid", "") + " - server overload or corrupt index") - return indexer + msg = f"revid: {revid} - Server overload may have corrupted the index; rebuild it." + raise KeyError(msg) + time.sleep(2) + return doc def parent_names(names): @@ -1357,9 +1361,9 @@ def store_revision( self.indexer.index_revision(meta, content, backend_name, force_latest=not overwrite) gc.collect() # triggers close of index files from is_latest search if not overwrite: - self._current = get_indexer(self.indexer._document, revid=revid) + self._current = get_doc(self.indexer._document, revid=revid, retry=True) if return_rev: - return Revision(self, revid) + return Revision(self, revid, retry=True) def store_all_revisions(self, meta, data): """ @@ -1404,13 +1408,13 @@ class Revision(PropertiesMixin): An existing revision (exists in the backend). """ - def __init__(self, item: Item, revid: str, doc=None, name=None): + def __init__(self, item: Item, revid: str, doc=None, name=None, retry=False): is_current = revid == CURRENT if doc is None: if is_current: doc = item._current else: - doc = get_indexer(item.indexer._document, idx_name=ALL_REVS, revid=revid) + doc = get_doc(item.indexer._document, idx_name=ALL_REVS, revid=revid, retry=retry) if is_current: revid = doc.get(REVID) From 54fb0b618555db39f461d59ea882f87273567375 Mon Sep 17 00:00:00 2001 From: UlrichB22 <97119703+UlrichB22@users.noreply.github.com> Date: Wed, 1 Oct 2025 21:13:47 +0200 Subject: [PATCH 2/3] Rename get_doc to get_document and other code improvements --- src/moin/storage/middleware/indexing.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/moin/storage/middleware/indexing.py b/src/moin/storage/middleware/indexing.py index 11709668a..3b413d01f 100644 --- a/src/moin/storage/middleware/indexing.py +++ b/src/moin/storage/middleware/indexing.py @@ -105,27 +105,27 @@ INDEXER_TIMEOUT = 20.0 -def get_doc(fn, revid, retry=False, **kwargs): +def get_document(indexer: IndexingMiddleware, revid: str, retry: bool = False, **kwargs): """ - Return a valid document or raise a KeyError. + Return a valid indexer document or raise a KeyError. Under heavy loads, the Whoosh AsyncWriter writer may be delayed in writing indexes to storage. Try several times before failing. - :param fn: the indexer function + :param indexer: instance of IndexingMiddleware :param revid: revision to search :param retry: retry backend search if document not found, required when server load is high :param kwargs: idx_name, name of index used for searching (optional) """ - until = time.time() + INDEXER_TIMEOUT + until = time.monotonic() + INDEXER_TIMEOUT while True: - doc = fn(revid=revid, **kwargs) + doc = indexer._document(revid=revid, **kwargs) if doc is not None: break if not retry: msg = f"revid: {revid} not found. Please check meta data and indexes" raise KeyError(msg) - if time.time() > until: + if time.monotonic() > until: msg = f"revid: {revid} - Server overload may have corrupted the index; rebuild it." raise KeyError(msg) time.sleep(2) @@ -1361,7 +1361,7 @@ def store_revision( self.indexer.index_revision(meta, content, backend_name, force_latest=not overwrite) gc.collect() # triggers close of index files from is_latest search if not overwrite: - self._current = get_doc(self.indexer._document, revid=revid, retry=True) + self._current = get_document(self.indexer, revid=revid, retry=True) if return_rev: return Revision(self, revid, retry=True) @@ -1414,7 +1414,7 @@ def __init__(self, item: Item, revid: str, doc=None, name=None, retry=False): if is_current: doc = item._current else: - doc = get_doc(item.indexer._document, idx_name=ALL_REVS, revid=revid, retry=retry) + doc = get_document(item.indexer, idx_name=ALL_REVS, revid=revid, retry=retry) if is_current: revid = doc.get(REVID) From a391485d3b5e408e4475b833a5fa03e3fe53d653 Mon Sep 17 00:00:00 2001 From: UlrichB22 <97119703+UlrichB22@users.noreply.github.com> Date: Wed, 1 Oct 2025 21:15:28 +0200 Subject: [PATCH 3/3] Change get_document to a method of IndexingMiddleware --- src/moin/storage/middleware/indexing.py | 56 ++++++++++++------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/moin/storage/middleware/indexing.py b/src/moin/storage/middleware/indexing.py index 3b413d01f..76950c7a1 100644 --- a/src/moin/storage/middleware/indexing.py +++ b/src/moin/storage/middleware/indexing.py @@ -105,33 +105,6 @@ INDEXER_TIMEOUT = 20.0 -def get_document(indexer: IndexingMiddleware, revid: str, retry: bool = False, **kwargs): - """ - Return a valid indexer document or raise a KeyError. - - Under heavy loads, the Whoosh AsyncWriter writer may be delayed in writing - indexes to storage. Try several times before failing. - - :param indexer: instance of IndexingMiddleware - :param revid: revision to search - :param retry: retry backend search if document not found, required when server load is high - :param kwargs: idx_name, name of index used for searching (optional) - """ - until = time.monotonic() + INDEXER_TIMEOUT - while True: - doc = indexer._document(revid=revid, **kwargs) - if doc is not None: - break - if not retry: - msg = f"revid: {revid} not found. Please check meta data and indexes" - raise KeyError(msg) - if time.monotonic() > until: - msg = f"revid: {revid} - Server overload may have corrupted the index; rebuild it." - raise KeyError(msg) - time.sleep(2) - return doc - - def parent_names(names): """ Compute list of parent names (same order as in names, but no dupes) @@ -1049,6 +1022,31 @@ def existing_item(self, **query): """ return Item.existing(self, **query) + def get_document(self, revid: str, retry: bool = False, **kwargs): + """ + Return a valid indexer document or raise a KeyError. + + Under heavy loads, the Whoosh AsyncWriter writer may be delayed in writing + indexes to storage. Try several times before failing. + + :param revid: revision to search + :param retry: retry backend search if document not found, required when server load is high + :param kwargs: idx_name, name of index used for searching (optional) + """ + until = time.monotonic() + INDEXER_TIMEOUT + while True: + doc = self._document(revid=revid, **kwargs) + if doc is not None: + break + if not retry: + msg = f"revid: {revid} not found. Please check meta data and indexes" + raise KeyError(msg) + if time.monotonic() > until: + msg = f"revid: {revid} - Server overload may have corrupted the index; rebuild it." + raise KeyError(msg) + time.sleep(2) + return doc + class PropertiesMixin: """ @@ -1361,7 +1359,7 @@ def store_revision( self.indexer.index_revision(meta, content, backend_name, force_latest=not overwrite) gc.collect() # triggers close of index files from is_latest search if not overwrite: - self._current = get_document(self.indexer, revid=revid, retry=True) + self._current = self.indexer.get_document(revid=revid, retry=True) if return_rev: return Revision(self, revid, retry=True) @@ -1414,7 +1412,7 @@ def __init__(self, item: Item, revid: str, doc=None, name=None, retry=False): if is_current: doc = item._current else: - doc = get_document(item.indexer, idx_name=ALL_REVS, revid=revid, retry=retry) + doc = item.indexer.get_document(idx_name=ALL_REVS, revid=revid, retry=retry) if is_current: revid = doc.get(REVID)