From ca4828e8a67ba5c7be731823537507f9a473a5ab Mon Sep 17 00:00:00 2001 From: Navya Srivastava Date: Sun, 12 Oct 2025 09:50:47 -0700 Subject: [PATCH 1/3] Replace @Appender with inline docstring for DataFrame.groupby --- pandas/core/frame.py | 138 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 119 insertions(+), 19 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 643974db5f2bf..50b318c1ea30d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9346,9 +9346,123 @@ def update( # ---------------------------------------------------------------------- # Data reshaping - @Appender( - dedent( - """ + @deprecate_nonkeyword_arguments( + Pandas4Warning, allowed_ards=["self", "by", "level"], name="groupby" + ) + def groupby( + self, + by=None, + level: IndexLabel | None = None, + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + observed: bool = True, + dropna: bool = True, + ) -> DataFrameGroupBy: + """ + Group DataFrame using a mapper or by a Series of columns. + + A groupby operation involves some combination of splitting the + object, applying a function, and combining the results. This can be + used to group large amounts of data and compute operations on these + groups. + + Parameters + ---------- + by : mapping, function, label, pd.Grouper or list of such + Used to determine the groups for the groupby. + If ``by`` is a function, it's called on each value of the object's + index. If a dict or Series is passed, the Series or dict VALUES + will be used to determine the groups (the Series' values are first + aligned; see ``.align()`` method). If a list or ndarray of length + equal to the selected axis is passed (see the `groupby user guide + `_), + the values are used as-is to determine the groups. A label or list + of labels may be passed to group by the columns in ``self``. + Notice that a tuple is interpreted as a (single) key. + level : int, level name, or sequence of such, default None + If the axis is a MultiIndex (hierarchical), group by a particular + level or levels. Do not specify both ``by`` and ``level``. + as_index : bool, default True + Return object with group labels as the + index. Only relevant for DataFrame input. as_index=False is + effectively "SQL-style" grouped output. This argument has no effect + on filtrations (see the `filtrations in the user guide + `_), + such as ``head()``, ``tail()``, ``nth()`` and in transformations + (see the `transformations in the user guide + `_). + sort : bool, default True + Sort group keys. Get better performance by turning this off. + Note this does not influence the order of observations within each + group. Groupby preserves the order of rows within each group. If False, + the groups will appear in the same order as they did in the original DataFrame. + This argument has no effect on filtrations (see the `filtrations in the user guide + `_), + such as ``head()``, ``tail()``, ``nth()`` and in transformations + (see the `transformations in the user guide + `_). + + .. versionchanged:: 2.0.0 + + Specifying ``sort=False`` with an ordered categorical grouper will no + longer sort the values. + + group_keys : bool, default True + When calling apply and the ``by`` argument produces a like-indexed + (i.e. :ref:`a transform `) result, add group keys to + index to identify pieces. By default group keys are not included + when the result's index (and column) labels match the inputs, and + are included otherwise. + + .. versionchanged:: 1.5.0 + + Warns that ``group_keys`` will no longer be ignored when the + result from ``apply`` is a like-indexed Series or DataFrame. + Specify ``group_keys`` explicitly to include the group keys or + not. + + .. versionchanged:: 2.0.0 + + ``group_keys`` now defaults to ``True``. + + observed : bool, default True + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. + + .. versionchanged:: 3.0.0 + + The default value is now ``True``. + + dropna : bool, default True + If True, and if group keys contain NA values, NA values together + with row/column will be dropped. + If False, NA values will also be treated as the key in groups. + + Returns + ------- + pandas.api.typing.DataFrameGroupBy + Returns a groupby object that contains information about the groups. + + See Also + -------- + resample : Convenience method for frequency conversion and resampling + of time series. + + Notes + ----- + See the `user guide + `__ for more + detailed usage and examples, including splitting an object into groups, + iterating through groups, selecting a group, aggregation, and more. + + The implementation of groupby is hash-based, meaning in particular that + objects that compare as equal will be considered to be in the same group. + An exception to this is that pandas has special handling of NA values: + any NA values will be collapsed to a single group, regardless of how + they compare. See the user guide linked above for more details. + Examples -------- >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon', @@ -9450,22 +9564,6 @@ def update( 2 24.0 3 26.0 """ - ) - ) - @Appender(_shared_docs["groupby"] % _shared_doc_kwargs) - @deprecate_nonkeyword_arguments( - Pandas4Warning, allowed_args=["self", "by", "level"], name="groupby" - ) - def groupby( - self, - by=None, - level: IndexLabel | None = None, - as_index: bool = True, - sort: bool = True, - group_keys: bool = True, - observed: bool = True, - dropna: bool = True, - ) -> DataFrameGroupBy: from pandas.core.groupby.generic import DataFrameGroupBy if level is None and by is None: @@ -9482,6 +9580,8 @@ def groupby( dropna=dropna, ) + + _shared_docs["pivot"] = """ Return reshaped DataFrame organized by given index / column values. From 337e7dc2786001da2fe6c61c8906aed242275f12 Mon Sep 17 00:00:00 2001 From: Navya Srivastava Date: Sun, 12 Oct 2025 10:25:29 -0700 Subject: [PATCH 2/3] Replace @Appender with inline docstring for DataFrame.groupby --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 50b318c1ea30d..d88595523e689 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9347,7 +9347,7 @@ def update( # ---------------------------------------------------------------------- # Data reshaping @deprecate_nonkeyword_arguments( - Pandas4Warning, allowed_ards=["self", "by", "level"], name="groupby" + Pandas4Warning, allowed_args=["self", "by", "level"], name="groupby" ) def groupby( self, From 1f91a9a25bc53ab83aaea4a56d0be8fa4a1a84c4 Mon Sep 17 00:00:00 2001 From: Navya Srivastava Date: Sun, 12 Oct 2025 19:27:54 -0700 Subject: [PATCH 3/3] Replace @Appender with inline docstring for DataFrame.groupby --- pandas/core/frame.py | 47 +++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d88595523e689..9c41b82bbbc8e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9347,7 +9347,7 @@ def update( # ---------------------------------------------------------------------- # Data reshaping @deprecate_nonkeyword_arguments( - Pandas4Warning, allowed_args=["self", "by", "level"], name="groupby" + Pandas4Warning, allowed_args=["self", "by", "level"], name="groupby" ) def groupby( self, @@ -9396,8 +9396,10 @@ def groupby( Sort group keys. Get better performance by turning this off. Note this does not influence the order of observations within each group. Groupby preserves the order of rows within each group. If False, - the groups will appear in the same order as they did in the original DataFrame. - This argument has no effect on filtrations (see the `filtrations in the user guide + the groups will appear in the same order as they did in the original + DataFrame. + This argument has no effect on filtrations (see the `filtrations + in the user guide `_), such as ``head()``, ``tail()``, ``nth()`` and in transformations (see the `transformations in the user guide @@ -9465,16 +9467,19 @@ def groupby( Examples -------- - >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon', - ... 'Parrot', 'Parrot'], - ... 'Max Speed': [380., 370., 24., 26.]}) + >>> df = pd.DataFrame( + ... { + ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"], + ... "Max Speed": [380.0, 370.0, 24.0, 26.0], + ... } + ... ) >>> df Animal Max Speed 0 Falcon 380.0 1 Falcon 370.0 2 Parrot 24.0 3 Parrot 26.0 - >>> df.groupby(['Animal']).mean() + >>> df.groupby(["Animal"]).mean() Max Speed Animal Falcon 375.0 @@ -9485,11 +9490,12 @@ def groupby( We can groupby different levels of a hierarchical index using the `level` parameter: - >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], - ... ['Captive', 'Wild', 'Captive', 'Wild']] - >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) - >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]}, - ... index=index) + >>> arrays = [ + ... ["Falcon", "Falcon", "Parrot", "Parrot"], + ... ["Captive", "Wild", "Captive", "Wild"], + ... ] + >>> index = pd.MultiIndex.from_arrays(arrays, names=("Animal", "Type")) + >>> df = pd.DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index) >>> df Max Speed Animal Type @@ -9527,7 +9533,7 @@ def groupby( 2.0 2 5 NaN 1 4 - >>> arr = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]] + >>> arr = [["a", 12, 12], [None, 12.3, 33.0], ["b", 12.3, 123], ["a", 1, 1]] >>> df = pd.DataFrame(arr, columns=["a", "b", "c"]) >>> df.groupby(by="a").sum() @@ -9546,10 +9552,13 @@ def groupby( When using ``.apply()``, use ``group_keys`` to include or exclude the group keys. The ``group_keys`` argument defaults to ``True`` (include). - >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon', - ... 'Parrot', 'Parrot'], - ... 'Max Speed': [380., 370., 24., 26.]}) - >>> df.groupby("Animal", group_keys=True)[['Max Speed']].apply(lambda x: x) + >>> df = pd.DataFrame( + ... { + ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"], + ... "Max Speed": [380.0, 370.0, 24.0, 26.0], + ... } + ... ) + >>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x) Max Speed Animal Falcon 0 380.0 @@ -9557,7 +9566,7 @@ def groupby( Parrot 2 24.0 3 26.0 - >>> df.groupby("Animal", group_keys=False)[['Max Speed']].apply(lambda x: x) + >>> df.groupby("Animal", group_keys=False)[["Max Speed"]].apply(lambda x: x) Max Speed 0 380.0 1 370.0 @@ -9580,8 +9589,6 @@ def groupby( dropna=dropna, ) - - _shared_docs["pivot"] = """ Return reshaped DataFrame organized by given index / column values.