From a01bb83bc4e3946999d42cd123985253216d0ada Mon Sep 17 00:00:00 2001
From: Alvaro Fuentes <afu@odoo.com>
Date: Tue, 9 Sep 2025 16:34:18 +0200
Subject: [PATCH] [IMP] util/orm: `recompute_fields` add query parameter

In most cases when we pass a list of `ids` to `recompute_fields` we just
run a query to get them. The main downside of this approach is when the
list is too big: it can cause MemoryErrors. Instead we can let
`recompute_fields` use a query to get the ids and take the necessary
precautions to not fetch millions of entries all at once. As a nice side
effect code becomes easier to review since we don't need to check the
fetching of ids is done correctly.
---
 src/util/orm.py | 46 ++++++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/src/util/orm.py b/src/util/orm.py
index 489032c31..0c97b9943 100644
--- a/src/util/orm.py
+++ b/src/util/orm.py
@@ -251,7 +251,7 @@ def wrapper(*args, **kwargs):
 
 
 @no_selection_cache_validation
-def recompute_fields(cr, model, fields, ids=None, logger=_logger, chunk_size=256, strategy="auto"):
+def recompute_fields(cr, model, fields, ids=None, logger=_logger, chunk_size=256, strategy="auto", query=None):
     """
     Recompute field values.
 
@@ -271,31 +271,45 @@ def recompute_fields(cr, model, fields, ids=None, logger=_logger, chunk_size=256
     :param str model: name of the model to recompute
     :param list(str) fields: list of the name of the fields to recompute
     :param list(int) or None ids: list of the IDs of the records to recompute, when `None`
-                                  recompute *all* records
+                                  recompute *all* records, unless `query` is also set (see
+                                  below)
     :param logger: logger used to report the progress
     :type logger: :class:`logging.Logger`
     :param int chunk_size: number of records per chunk - used to split the processing
     :param str strategy: strategy used to process the re-computation
+    :param str query: query to get the IDs of records to recompute, it is an error to set
+                      both `ids` and `query`
     """
-    assert strategy in {"flush", "commit", "auto"}
+    if strategy not in {"flush", "commit", "auto"}:
+        raise ValueError("Invalid strategy {!r}".format(strategy))
+    if ids is not None and query is not None:
+        raise ValueError("Cannot set both `ids` and `query`")
     Model = env(cr)[model] if isinstance(model, basestring) else model
     model = Model._name
 
-    def get_ids():
-        if ids is not None:
-            for id_ in ids:
-                yield id_
+    if ids is None:
+        query = format_query(cr, "SELECT id FROM {} ORDER BY id", table_of_model(cr, model)) if query is None else query
+        cr.execute(query)
+        count = cr.rowcount
+        if count < 2**21:  # avoid the overhead of a named cursor unless we have at least two chunks
+            ids_ = (id_ for (id_,) in cr.fetchall())
         else:
-            with named_cursor(cr, itersize=2**20) as ncr:
-                ncr.execute(format_query(cr, "SELECT id FROM {t} ORDER BY id", t=table_of_model(cr, model)))
-                for (id_,) in ncr:
-                    yield id_
 
-    if ids is None:
-        cr.execute(format_query(cr, "SELECT COUNT(id) FROM {t}", t=table_of_model(cr, model)))
-        (count,) = cr.fetchone()
+            def get_ids():
+                with named_cursor(cr, itersize=2**20) as ncr:
+                    ncr.execute(query)
+                    for (id_,) in ncr:
+                        yield id_
+
+            ids_ = get_ids()
     else:
         count = len(ids)
+        ids_ = ids
+
+    if not count:
+        return
+
+    _logger.info("Computing fields %s of %r on %d records", fields, model, count)
 
     if strategy == "auto":
         big_table = count > BIG_TABLE_THRESHOLD
@@ -303,8 +317,8 @@ def get_ids():
         strategy = "commit" if big_table and any_tracked_field else "flush"
 
     size = (count + chunk_size - 1) / chunk_size
-    qual = "%s %d-bucket" % (model, chunk_size) if chunk_size != 1 else model
-    for subids in log_progress(chunks(get_ids(), chunk_size, list), logger, qualifier=qual, size=size):
+    qual = "{} {:d}-bucket".format(model, chunk_size) if chunk_size != 1 else model
+    for subids in log_progress(chunks(ids_, chunk_size, list), logger, qualifier=qual, size=size):
         records = Model.browse(subids)
         for field_name in fields:
             field = records._fields[field_name]