From 64f536f0b66712b6d21d41d1e1c9d07e96042a9b Mon Sep 17 00:00:00 2001
From: Weijie Guo <reswqa@163.com>
Date: Thu, 11 Jan 2024 17:14:01 +0800
Subject: [PATCH 1/2] docs(python): Clarify documentation for the `agg_list`
 argument in `Expr.map_batches`

---
 py-polars/polars/expr/expr.py | 45 ++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py
index 7326d296f22b..501f9dd508c1 100644
--- a/py-polars/polars/expr/expr.py
+++ b/py-polars/polars/expr/expr.py
@@ -3991,7 +3991,10 @@ def map_batches(
             If set to true this can run in the streaming engine, but may yield
             incorrect results in group-by. Ensure you know what you are doing!
         agg_list
-            Aggregate list.
+            Collect groups to a list and then apply. This parameter only works for
+            group-by context.
+            If set to true, the function is invoked only once on a list of groups.
+            Otherwise, the function is invoked per-group.
 
         Warnings
         --------
@@ -4020,6 +4023,46 @@ def map_batches(
         ╞══════╪════════╡
         │ 1    ┆ 0      │
         └──────┴────────┘
+
+        >>> df = pl.DataFrame(
+        ...     {
+        ...         "a": [0, 1, 0, 1],
+        ...         "b": [1, 2, 3, 4],
+        ...     }
+        ... )
+
+        The function is applied per-group, and the input of the function is a
+        Series[i64].
+        >>> (
+        ...     df.group_by("a").agg(
+        ...         pl.col("b").map_batches(lambda x: x.max(), agg_list=False)
+        ...     )
+        ... )  # doctest: +IGNORE_RESULT
+        shape: (2, 2)
+        ┌─────┬───────────┐
+        │ a   ┆ b         │
+        │ --- ┆ ---       │
+        │ i64 ┆ list[i64] │
+        ╞═════╪═══════════╡
+        │ 1   ┆ [4]       │
+        │ 0   ┆ [3]       │
+        └─────┴───────────┘
+        The function is applied only once on a list of groups, and the input of
+        the function is a Series[list[i64]].
+        >>> (
+        ...     df.group_by("a").agg(
+        ...         pl.col("b").map_batches(lambda x: x.list.max(), agg_list=True)
+        ...     )
+        ... )  # doctest: +IGNORE_RESULT
+        shape: (2, 2)
+        ┌─────┬─────┐
+        │ a   ┆ b   │
+        │ --- ┆ --- │
+        │ i64 ┆ i64 │
+        ╞═════╪═════╡
+        │ 0   ┆ 3   │
+        │ 1   ┆ 4   │
+        └─────┴─────┘
         """
         if return_dtype is not None:
             return_dtype = py_type_to_dtype(return_dtype)

From 9f551c4118ba520eadc5efde2c1991acc1d7a69a Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Fri, 12 Jan 2024 11:16:57 +0100
Subject: [PATCH 2/2] Update wording / formatting

---
 py-polars/polars/expr/expr.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py
index 501f9dd508c1..679f99a50196 100644
--- a/py-polars/polars/expr/expr.py
+++ b/py-polars/polars/expr/expr.py
@@ -3991,10 +3991,10 @@ def map_batches(
             If set to true this can run in the streaming engine, but may yield
             incorrect results in group-by. Ensure you know what you are doing!
         agg_list
-            Collect groups to a list and then apply. This parameter only works for
-            group-by context.
-            If set to true, the function is invoked only once on a list of groups.
-            Otherwise, the function is invoked per-group.
+            Aggregate the values of the expression into a list before applying the
+            function. This parameter only works in a group-by context.
+            The function will be invoked only once on a list of groups, rather than
+            once per group.
 
         Warnings
         --------
@@ -4024,19 +4024,19 @@ def map_batches(
         │ 1    ┆ 0      │
         └──────┴────────┘
 
+        In a group-by context, the `agg_list` parameter can improve performance if used
+        correctly. The following example has `agg_list` set to `False`, which causes
+        the function to be applied once per group. The input of the function is a
+        Series of type `Int64`. This is less efficient.
+
         >>> df = pl.DataFrame(
         ...     {
         ...         "a": [0, 1, 0, 1],
         ...         "b": [1, 2, 3, 4],
         ...     }
         ... )
-
-        The function is applied per-group, and the input of the function is a
-        Series[i64].
-        >>> (
-        ...     df.group_by("a").agg(
-        ...         pl.col("b").map_batches(lambda x: x.max(), agg_list=False)
-        ...     )
+        >>> df.group_by("a").agg(
+        ...     pl.col("b").map_batches(lambda x: x.max(), agg_list=False)
         ... )  # doctest: +IGNORE_RESULT
         shape: (2, 2)
         ┌─────┬───────────┐
@@ -4047,12 +4047,12 @@ def map_batches(
         │ 1   ┆ [4]       │
         │ 0   ┆ [3]       │
         └─────┴───────────┘
-        The function is applied only once on a list of groups, and the input of
-        the function is a Series[list[i64]].
-        >>> (
-        ...     df.group_by("a").agg(
-        ...         pl.col("b").map_batches(lambda x: x.list.max(), agg_list=True)
-        ...     )
+
+        Using `agg_list=True` would be more efficient. In this example, the input of
+        the function is a Series of type `List(Int64)`.
+
+        >>> df.group_by("a").agg(
+        ...     pl.col("b").map_batches(lambda x: x.list.max(), agg_list=True)
         ... )  # doctest: +IGNORE_RESULT
         shape: (2, 2)
         ┌─────┬─────┐