From 8f114dc019ba1e5c36dfcdb528b16a884f9c0d41 Mon Sep 17 00:00:00 2001 From: aokizy <14817191+aokizy2@user.noreply.gitee.com> Date: Tue, 14 Oct 2025 11:01:46 +0800 Subject: [PATCH 1/5] Inline docstrings in pandas/core/window/resample.py --- pandas/core/resample.py | 359 +++++++++++++++++++++++++++++++++++----- 1 file changed, 319 insertions(+), 40 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index e8803b6f30fce..3ef20c88a67e7 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -28,11 +28,7 @@ ) from pandas._typing import NDFrameT from pandas.errors import AbstractMethodError -from pandas.util._decorators import ( - Appender, - Substitution, - doc, -) + from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.dtypes import ( @@ -53,12 +49,12 @@ ) from pandas.core.generic import ( NDFrame, - _shared_docs, + ) from pandas.core.groupby.groupby import ( BaseGroupBy, GroupBy, - _pipe_template, + get_groupby, ) from pandas.core.groupby.grouper import Grouper @@ -263,33 +259,70 @@ def pipe( ) -> T: ... @final - @Substitution( - klass="Resampler", - examples=""" - >>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, - ... index=pd.date_range('2012-08-02', periods=4)) - >>> df - A - 2012-08-02 1 - 2012-08-03 2 - 2012-08-04 3 - 2012-08-05 4 - - To get the difference between each 2-day period's maximum and minimum - value in one pass, you can do - - >>> df.resample('2D').pipe(lambda x: x.max() - x.min()) - A - 2012-08-02 1 - 2012-08-04 1""", - ) - @Appender(_pipe_template) def pipe( self, func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str], *args: Any, **kwargs: Any, ) -> T: + """ + Apply a function with arguments to this resampler object. + + Parameters + ---------- + func : callable or tuple of (callable, str) + Function to apply to this resampler object or, alternatively, + a (callable, data_key) tuple where data_key is a string + indicating the keyword of callable that expects the resampler + object. + *args : iterable, optional + Positional arguments passed into `func`. + **kwargs : dict, optional + A dictionary of keyword arguments passed into `func`. + + Returns + ------- + object + The return value of `func`. + + See Also + -------- + Resampler.apply : Apply a function to each group. + Resampler.aggregate : Aggregate using one or more operations. + + Notes + ----- + Use `.pipe` when you want to improve readability by chaining + together functions that expect resamplers. Instead of writing + + >>> f(g(h(df.resample()), arg1=a), arg2=b, arg3=c) + + You can write + + >>> (df.resample() + ... .pipe(h) + ... .pipe(g, arg1=a) + ... .pipe(f, arg2=b, arg3=c)) + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, + ... index=pd.date_range('2012-08-02', periods=4)) + >>> df + A + 2012-08-02 1 + 2012-08-03 2 + 2012-08-04 3 + 2012-08-05 4 + + To get the difference between each 2-day period's maximum and minimum + value in one pass, you can do + + >>> df.resample('2D').pipe(lambda x: x.max() - x.min()) + A + 2012-08-02 1 + 2012-08-04 1 + """ return super().pipe(func, *args, **kwargs) _agg_see_also_doc = dedent( @@ -349,14 +382,87 @@ def pipe( ) @final - @doc( - _shared_docs["aggregate"], - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, - klass="DataFrame", - axis="", - ) def aggregate(self, func=None, *args, **kwargs): + """ + Aggregate using one or more operations over the specified axis. + + Parameters + ---------- + func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a Series or when passed to Series.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. + + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + Series or DataFrame + + The return can be: + + * Series : when DataFrame.agg is called with a single function + * DataFrame : when DataFrame.agg is called with several functions + + Return Series and DataFrame with the transformed values. + + See Also + -------- + DataFrame.groupby.aggregate : Aggregate using callable, string, dict, + or list of string/callables. + DataFrame.resample.transform : Transforms the Series on each group + based on the given function. + DataFrame.aggregate: Aggregate using one or more + operations over the specified axis. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4, 5], + ... index=pd.date_range('20130101', periods=5, freq='s')) + >>> s + 2013-01-01 00:00:00 1 + 2013-01-01 00:00:01 2 + 2013-01-01 00:00:02 3 + 2013-01-01 00:00:03 4 + 2013-01-01 00:00:04 5 + Freq: s, dtype: int64 + + >>> r = s.resample('2s') + + >>> r.agg("sum") + 2013-01-01 00:00:00 3 + 2013-01-01 00:00:02 7 + 2013-01-01 00:00:04 5 + Freq: 2s, dtype: int64 + + >>> r.agg(['sum', 'mean', 'max']) + sum mean max + 2013-01-01 00:00:00 3 1.5 2 + 2013-01-01 00:00:02 7 3.5 4 + 2013-01-01 00:00:04 5 5.0 5 + + >>> r.agg({'result': lambda x: x.mean() / x.std(), + ... 'total': "sum"}) + result total + 2013-01-01 00:00:00 2.121320 3 + 2013-01-01 00:00:02 4.949747 7 + 2013-01-01 00:00:04 NaN 5 + + >>> r.agg(average="mean", total="sum") + average total + 2013-01-01 00:00:00 1.5 3 + 2013-01-01 00:00:02 3.5 7 + 2013-01-01 00:00:04 5.0 5 + """ result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() if result is None: how = func @@ -1236,25 +1342,113 @@ def max( return self._downsample("max", numeric_only=numeric_only, min_count=min_count) @final - @doc(GroupBy.first) def first( self, numeric_only: bool = False, min_count: int = 0, skipna: bool = True, ): + """ + Compute the first non-null entry of each column. + + Parameters + ---------- + numeric_only : bool, default False + Include only float, int, boolean columns. + + .. versionchanged:: 2.0.0 + numeric_only no longer accepts ``None``. + + min_count : int, default 0 + The required number of valid values to perform the operation. + If fewer than ``min_count`` non-NA values are present the result will be NA. + + skipna : bool, default True + Exclude NA/null values when computing the result. + + Returns + ------- + Series or DataFrame + The first non-null values in each group. + + See Also + -------- + core.resample.Resampler.last : Compute the last non-null value of each column. + core.resample.Resampler.max : Compute max value of group. + core.resample.Resampler.min : Compute min value of group. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4], + ... index=pd.date_range('2023-01-01', periods=4, freq='D')) + >>> s + 2023-01-01 1 + 2023-01-02 2 + 2023-01-03 3 + 2023-01-04 4 + Freq: D, dtype: int64 + + >>> s.resample('2D').first() + 2023-01-01 1 + 2023-01-03 3 + Freq: 2D, dtype: int64 + """ return self._downsample( "first", numeric_only=numeric_only, min_count=min_count, skipna=skipna ) @final - @doc(GroupBy.last) def last( self, numeric_only: bool = False, min_count: int = 0, skipna: bool = True, ): + """ + Compute the last non-null entry of each column. + + Parameters + ---------- + numeric_only : bool, default False + Include only float, int, boolean columns. + + .. versionchanged:: 2.0.0 + numeric_only no longer accepts ``None``. + + min_count : int, default 0 + The required number of valid values to perform the operation. + If fewer than ``min_count`` non-NA values are present the result will be NA. + + skipna : bool, default True + Exclude NA/null values when computing the result. + + Returns + ------- + Series or DataFrame + The last non-null values in each group. + + See Also + -------- + core.resample.Resampler.first : Compute the first non-null value of each column. + core.resample.Resampler.max : Compute max value of group. + core.resample.Resampler.min : Compute min value of group. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4], + ... index=pd.date_range('2023-01-01', periods=4, freq='D')) + >>> s + 2023-01-01 1 + 2023-01-02 2 + 2023-01-03 3 + 2023-01-04 4 + Freq: D, dtype: int64 + + >>> s.resample('2D').last() + 2023-01-01 2 + 2023-01-03 4 + Freq: 2D, dtype: int64 + """ return self._downsample( "last", numeric_only=numeric_only, min_count=min_count, skipna=skipna ) @@ -1544,8 +1738,39 @@ def sem( return self._downsample("sem", ddof=ddof, numeric_only=numeric_only) @final - @doc(GroupBy.ohlc) def ohlc(self): + """ + Compute open, high, low and close values of a group, excluding missing values. + + Returns + ------- + DataFrame + The OHLC values for each group. + + See Also + -------- + core.resample.Resampler.first : Compute the first non-null value of each column. + core.resample.Resampler.last : Compute the last non-null value of each column. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4, 5, 6], + ... index=pd.date_range('2023-01-01', periods=6, freq='D')) + >>> s + 2023-01-01 1 + 2023-01-02 2 + 2023-01-03 3 + 2023-01-04 4 + 2023-01-05 5 + 2023-01-06 6 + Freq: D, dtype: int64 + + >>> s.resample('2D').ohlc() + open high low close + 2023-01-01 1 2 1 2 + 2023-01-03 3 4 3 4 + 2023-01-05 5 6 5 6 + """ ax = self.ax obj = self._obj_with_exclusions if len(ax) == 0: @@ -1600,8 +1825,35 @@ def nunique(self): return self._downsample("nunique") @final - @doc(GroupBy.size) def size(self): + """ + Compute group sizes. + + Returns + ------- + Series + Number of rows in each group. + + See Also + -------- + core.resample.Resampler.count : Compute count of group, excluding missing values. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4], + ... index=pd.date_range('2023-01-01', periods=4, freq='D')) + >>> s + 2023-01-01 1 + 2023-01-02 2 + 2023-01-03 3 + 2023-01-04 4 + Freq: D, dtype: int64 + + >>> s.resample('2D').size() + 2023-01-01 2 + 2023-01-03 2 + Freq: 2D, dtype: int64 + """ result = self._downsample("size") # If the result is a non-empty DataFrame we stack to get a Series @@ -1620,8 +1872,35 @@ def size(self): return result @final - @doc(GroupBy.count) def count(self): + """ + Compute count of group, excluding missing values. + + Returns + ------- + Series or DataFrame + Count of values within each group. + + See Also + -------- + core.resample.Resampler.size : Compute group sizes. + + Examples + -------- + >>> s = pd.Series([1, 2, None, 4], + ... index=pd.date_range('2023-01-01', periods=4, freq='D')) + >>> s + 2023-01-01 1.0 + 2023-01-02 2.0 + 2023-01-03 NaN + 2023-01-04 4.0 + Freq: D, dtype: float64 + + >>> s.resample('2D').count() + 2023-01-01 2 + 2023-01-03 1 + Freq: 2D, dtype: int64 + """ result = self._downsample("count") if not len(self.ax): if self._selected_obj.ndim == 1: From cb48bc06e1005749dc601f60e011642b17bffff9 Mon Sep 17 00:00:00 2001 From: aokizy <14817191+aokizy2@user.noreply.gitee.com> Date: Tue, 14 Oct 2025 13:57:24 +0800 Subject: [PATCH 2/5] CLN:Inline docstrings in pandas/core/window/resample.py --- pandas/core/resample.py | 254 ++++++++++++++++++++++------------------ 1 file changed, 139 insertions(+), 115 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 3ef20c88a67e7..d9b17d92c91f5 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -49,12 +49,10 @@ ) from pandas.core.generic import ( NDFrame, - ) from pandas.core.groupby.groupby import ( BaseGroupBy, GroupBy, - get_groupby, ) from pandas.core.groupby.grouper import Grouper @@ -266,43 +264,55 @@ def pipe( **kwargs: Any, ) -> T: """ - Apply a function with arguments to this resampler object. + Apply a ``func`` with arguments to this Resampler object and return its result. + + Use `.pipe` when you want to improve readability by chaining together + functions that expect Series, DataFrames, GroupBy or Resampler objects. + Instead of writing + + >>> h = lambda x, arg2, arg3: x + 1 - arg2 * arg3 + >>> g = lambda x, arg1: x * 5 / arg1 + >>> f = lambda x: x ** 4 + >>> df = pd.DataFrame([["a", 4], ["b", 5]], columns=["group", "value"]) + >>> h(g(f(df.groupby('group')), arg1=1), arg2=2, arg3=3) # doctest: +SKIP + + You can write + + >>> (df.groupby('group') + ... .pipe(f) + ... .pipe(g, arg1=1) + ... .pipe(h, arg2=2, arg3=3)) # doctest: +SKIP + + which is much more readable. Parameters ---------- func : callable or tuple of (callable, str) - Function to apply to this resampler object or, alternatively, - a (callable, data_key) tuple where data_key is a string - indicating the keyword of callable that expects the resampler - object. + Function to apply to this Resampler object or, alternatively, + a `(callable, data_keyword)` tuple where `data_keyword` is a + string indicating the keyword of `callable` that expects the + Resampler object. *args : iterable, optional Positional arguments passed into `func`. **kwargs : dict, optional - A dictionary of keyword arguments passed into `func`. + A dictionary of keyword arguments passed into `func`. Returns ------- - object - The return value of `func`. + any + The result of applying ``func`` to the Resampler object. See Also -------- - Resampler.apply : Apply a function to each group. - Resampler.aggregate : Aggregate using one or more operations. + Series.pipe : Apply a function with arguments to a series. + DataFrame.pipe: Apply a function with arguments to a dataframe. + apply : Apply function to each group instead of to the + full Resampler object. Notes ----- - Use `.pipe` when you want to improve readability by chaining - together functions that expect resamplers. Instead of writing - - >>> f(g(h(df.resample()), arg1=a), arg2=b, arg3=c) - - You can write - - >>> (df.resample() - ... .pipe(h) - ... .pipe(g, arg1=a) - ... .pipe(f, arg2=b, arg3=c)) + See more `here + `_ Examples -------- @@ -324,7 +334,7 @@ def pipe( 2012-08-04 1 """ return super().pipe(func, *args, **kwargs) - + _agg_see_also_doc = dedent( """ See Also @@ -390,7 +400,7 @@ def aggregate(self, func=None, *args, **kwargs): ---------- func : function, str, list or dict Function to use for aggregating the data. If a function, must either - work when passed a Series or when passed to Series.apply. + work when passed a DataFrame or when passed to DataFrame.apply. Accepted combinations are: @@ -398,7 +408,6 @@ def aggregate(self, func=None, *args, **kwargs): - string function name - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - dict of axis labels -> functions, function names or list of such. - *args Positional arguments to pass to `func`. **kwargs @@ -406,15 +415,14 @@ def aggregate(self, func=None, *args, **kwargs): Returns ------- - Series or DataFrame + scalar, Series or DataFrame The return can be: + * scalar : when Series.agg is called with single function * Series : when DataFrame.agg is called with a single function * DataFrame : when DataFrame.agg is called with several functions - Return Series and DataFrame with the transformed values. - See Also -------- DataFrame.groupby.aggregate : Aggregate using callable, string, dict, @@ -424,6 +432,25 @@ def aggregate(self, func=None, *args, **kwargs): DataFrame.aggregate: Aggregate using one or more operations over the specified axis. + Notes + ----- + The aggregation operations are always performed over an axis, either the + index (default) or the column axis. This behavior is different from + `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, + `var`), where the default is to compute the aggregation of the flattened + array, e.g., ``numpy.mean(arr_2d)`` as opposed to + ``numpy.mean(arr_2d, axis=0)``. + + `agg` is an alias for `aggregate`. Use the alias. + + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + A passed user-defined-function will be passed a Series for evaluation. + + If ``func`` defines an index relabeling, ``axis`` must be ``0`` or ``index``. + Examples -------- >>> s = pd.Series([1, 2, 3, 4, 5], @@ -1355,43 +1382,38 @@ def first( ---------- numeric_only : bool, default False Include only float, int, boolean columns. - - .. versionchanged:: 2.0.0 - numeric_only no longer accepts ``None``. - min_count : int, default 0 - The required number of valid values to perform the operation. - If fewer than ``min_count`` non-NA values are present the result will be NA. - + The required number of valid values to perform the operation. If fewer + than ``min_count`` non-NA values are present the result will be NA. skipna : bool, default True - Exclude NA/null values when computing the result. + Exclude NA/null values. If an entire group is NA, the result will be NA. Returns ------- Series or DataFrame - The first non-null values in each group. + First values within each group. See Also -------- - core.resample.Resampler.last : Compute the last non-null value of each column. - core.resample.Resampler.max : Compute max value of group. - core.resample.Resampler.min : Compute min value of group. + core.resample.Resampler.last : Compute the last non-null value in each group. + core.resample.Resampler.mean : Compute mean of groups, excluding missing values. Examples -------- >>> s = pd.Series([1, 2, 3, 4], - ... index=pd.date_range('2023-01-01', periods=4, freq='D')) + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... )) >>> s 2023-01-01 1 - 2023-01-02 2 - 2023-01-03 3 - 2023-01-04 4 - Freq: D, dtype: int64 - - >>> s.resample('2D').first() + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> s.resample("MS").first() 2023-01-01 1 - 2023-01-03 3 - Freq: 2D, dtype: int64 + 2023-02-01 3 + Freq: MS, dtype: int64 """ return self._downsample( "first", numeric_only=numeric_only, min_count=min_count, skipna=skipna @@ -1411,43 +1433,38 @@ def last( ---------- numeric_only : bool, default False Include only float, int, boolean columns. - - .. versionchanged:: 2.0.0 - numeric_only no longer accepts ``None``. - min_count : int, default 0 - The required number of valid values to perform the operation. - If fewer than ``min_count`` non-NA values are present the result will be NA. - + The required number of valid values to perform the operation. If fewer + than ``min_count`` non-NA values are present the result will be NA. skipna : bool, default True - Exclude NA/null values when computing the result. + Exclude NA/null values. If an entire group is NA, the result will be NA. Returns ------- Series or DataFrame - The last non-null values in each group. + Last of values within each group. See Also -------- - core.resample.Resampler.first : Compute the first non-null value of each column. - core.resample.Resampler.max : Compute max value of group. - core.resample.Resampler.min : Compute min value of group. + core.resample.Resampler.first : Compute the first non-null value in each group. + core.resample.Resampler.mean : Compute mean of groups, excluding missing values. Examples -------- >>> s = pd.Series([1, 2, 3, 4], - ... index=pd.date_range('2023-01-01', periods=4, freq='D')) + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... )) >>> s 2023-01-01 1 - 2023-01-02 2 - 2023-01-03 3 - 2023-01-04 4 - Freq: D, dtype: int64 - - >>> s.resample('2D').last() + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> s.resample("MS").last() 2023-01-01 2 - 2023-01-03 4 - Freq: 2D, dtype: int64 + 2023-02-01 4 + Freq: MS, dtype: int64 """ return self._downsample( "last", numeric_only=numeric_only, min_count=min_count, skipna=skipna @@ -1745,31 +1762,33 @@ def ohlc(self): Returns ------- DataFrame - The OHLC values for each group. + Open, high, low and close values within each group. See Also -------- - core.resample.Resampler.first : Compute the first non-null value of each column. - core.resample.Resampler.last : Compute the last non-null value of each column. + DataFrame.agg : Aggregate using one or more operations over the specified axis. + DataFrame.resample : Resample time-series data. + DataFrame.groupby : Group DataFrame using a mapper or by a Series of columns. Examples -------- - >>> s = pd.Series([1, 2, 3, 4, 5, 6], - ... index=pd.date_range('2023-01-01', periods=6, freq='D')) - >>> s - 2023-01-01 1 - 2023-01-02 2 - 2023-01-03 3 - 2023-01-04 4 - 2023-01-05 5 - 2023-01-06 6 - Freq: D, dtype: int64 - - >>> s.resample('2D').ohlc() + >>> ser = pd.Series( + ... [1, 3, 2, 4, 3, 5], + ... index=pd.DatetimeIndex( + ... [ + ... "2023-01-01", + ... "2023-01-10", + ... "2023-01-15", + ... "2023-02-01", + ... "2023-02-10", + ... "2023-02-15", + ... ] + ... ), + ... ) + >>> ser.resample("MS").ohlc() open high low close - 2023-01-01 1 2 1 2 - 2023-01-03 3 4 3 4 - 2023-01-05 5 6 5 6 + 2023-01-01 1 3 1 2 + 2023-02-01 4 5 3 5 """ ax = self.ax obj = self._obj_with_exclusions @@ -1836,23 +1855,24 @@ def size(self): See Also -------- - core.resample.Resampler.count : Compute count of group, excluding missing values. + Series.groupby : Apply a function groupby to a Series. + DataFrame.groupby : Apply a function groupby to each row or column of a DataFrame. Examples -------- - >>> s = pd.Series([1, 2, 3, 4], - ... index=pd.date_range('2023-01-01', periods=4, freq='D')) - >>> s + >>> ser = pd.Series( + ... [1, 2, 3], + ... index=pd.DatetimeIndex(["2023-01-01", "2023-01-15", "2023-02-01"]), + ... ) + >>> ser 2023-01-01 1 - 2023-01-02 2 - 2023-01-03 3 - 2023-01-04 4 - Freq: D, dtype: int64 - - >>> s.resample('2D').size() + 2023-01-15 2 + 2023-02-01 3 + dtype: int64 + >>> ser.resample("MS").size() 2023-01-01 2 - 2023-01-03 2 - Freq: 2D, dtype: int64 + 2023-02-01 1 + Freq: MS, dtype: int64 """ result = self._downsample("size") @@ -1883,23 +1903,27 @@ def count(self): See Also -------- - core.resample.Resampler.size : Compute group sizes. + Series.groupby : Apply a function groupby to a Series. + DataFrame.groupby : Apply a function groupby to each row or column of a DataFrame. Examples -------- - >>> s = pd.Series([1, 2, None, 4], - ... index=pd.date_range('2023-01-01', periods=4, freq='D')) - >>> s - 2023-01-01 1.0 - 2023-01-02 2.0 - 2023-01-03 NaN - 2023-01-04 4.0 - Freq: D, dtype: float64 - - >>> s.resample('2D').count() + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + >>> ser.resample("MS").count() 2023-01-01 2 - 2023-01-03 1 - Freq: 2D, dtype: int64 + 2023-02-01 2 + Freq: MS, dtype: int64 """ result = self._downsample("count") if not len(self.ax): @@ -3160,4 +3184,4 @@ def _asfreq_compat(index: FreqIndexT, freq) -> FreqIndexT: new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name) else: # pragma: no cover raise TypeError(type(index)) - return new_index + return new_index \ No newline at end of file From 058fcab65f98ca4077a45f130a4a996ff97d0c2a Mon Sep 17 00:00:00 2001 From: aokizy <14817191+aokizy2@user.noreply.gitee.com> Date: Tue, 14 Oct 2025 16:32:09 +0800 Subject: [PATCH 3/5] CLN:Inline docstrings in pandas/core/window/resample.py --- pandas/core/resample.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index d9b17d92c91f5..e8a34932ff990 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -109,6 +109,8 @@ Series, ) + from pandas.core.generic import NDFrame + _shared_docs_kwargs: dict[str, str] = {} @@ -1856,7 +1858,8 @@ def size(self): See Also -------- Series.groupby : Apply a function groupby to a Series. - DataFrame.groupby : Apply a function groupby to each row or column of a DataFrame. + DataFrame.groupby : Apply a function groupby to each row + or column of a DataFrame. Examples -------- @@ -1904,7 +1907,8 @@ def count(self): See Also -------- Series.groupby : Apply a function groupby to a Series. - DataFrame.groupby : Apply a function groupby to each row or column of a DataFrame. + DataFrame.groupby : Apply a function groupby to each row + or column of a DataFrame. Examples -------- @@ -3184,4 +3188,4 @@ def _asfreq_compat(index: FreqIndexT, freq) -> FreqIndexT: new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name) else: # pragma: no cover raise TypeError(type(index)) - return new_index \ No newline at end of file + return new_index From f04c994bfdd562a74fd18f09b24c21998d298a10 Mon Sep 17 00:00:00 2001 From: aokizy <14817191+aokizy2@user.noreply.gitee.com> Date: Tue, 14 Oct 2025 16:58:46 +0800 Subject: [PATCH 4/5] DOC: Remove unused variables and fix formatting in resample.py --- pandas/core/resample.py | 112 +++++++++++----------------------------- 1 file changed, 30 insertions(+), 82 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index e8a34932ff990..77aa1695c02b0 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1,7 +1,7 @@ from __future__ import annotations import copy -from textwrap import dedent + from typing import ( TYPE_CHECKING, Concatenate, @@ -274,16 +274,15 @@ def pipe( >>> h = lambda x, arg2, arg3: x + 1 - arg2 * arg3 >>> g = lambda x, arg1: x * 5 / arg1 - >>> f = lambda x: x ** 4 + >>> f = lambda x: x**4 >>> df = pd.DataFrame([["a", 4], ["b", 5]], columns=["group", "value"]) - >>> h(g(f(df.groupby('group')), arg1=1), arg2=2, arg3=3) # doctest: +SKIP + >>> h(g(f(df.groupby("group")), arg1=1), arg2=2, arg3=3) # doctest: +SKIP You can write - >>> (df.groupby('group') - ... .pipe(f) - ... .pipe(g, arg1=1) - ... .pipe(h, arg2=2, arg3=3)) # doctest: +SKIP + >>> ( + ... df.groupby("group").pipe(f).pipe(g, arg1=1).pipe(h, arg2=2, arg3=3) + ... ) # doctest: +SKIP which is much more readable. @@ -318,8 +317,9 @@ def pipe( Examples -------- - >>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, - ... index=pd.date_range('2012-08-02', periods=4)) + >>> df = pd.DataFrame( + ... {"A": [1, 2, 3, 4]}, index=pd.date_range("2012-08-02", periods=4) + ... ) >>> df A 2012-08-02 1 @@ -330,68 +330,12 @@ def pipe( To get the difference between each 2-day period's maximum and minimum value in one pass, you can do - >>> df.resample('2D').pipe(lambda x: x.max() - x.min()) + >>> df.resample("2D").pipe(lambda x: x.max() - x.min()) A 2012-08-02 1 2012-08-04 1 """ return super().pipe(func, *args, **kwargs) - - _agg_see_also_doc = dedent( - """ - See Also - -------- - DataFrame.groupby.aggregate : Aggregate using callable, string, dict, - or list of string/callables. - DataFrame.resample.transform : Transforms the Series on each group - based on the given function. - DataFrame.aggregate: Aggregate using one or more - operations over the specified axis. - """ - ) - - _agg_examples_doc = dedent( - """ - Examples - -------- - >>> s = pd.Series([1, 2, 3, 4, 5], - ... index=pd.date_range('20130101', periods=5, freq='s')) - >>> s - 2013-01-01 00:00:00 1 - 2013-01-01 00:00:01 2 - 2013-01-01 00:00:02 3 - 2013-01-01 00:00:03 4 - 2013-01-01 00:00:04 5 - Freq: s, dtype: int64 - - >>> r = s.resample('2s') - - >>> r.agg("sum") - 2013-01-01 00:00:00 3 - 2013-01-01 00:00:02 7 - 2013-01-01 00:00:04 5 - Freq: 2s, dtype: int64 - - >>> r.agg(['sum', 'mean', 'max']) - sum mean max - 2013-01-01 00:00:00 3 1.5 2 - 2013-01-01 00:00:02 7 3.5 4 - 2013-01-01 00:00:04 5 5.0 5 - - >>> r.agg({'result': lambda x: x.mean() / x.std(), - ... 'total': "sum"}) - result total - 2013-01-01 00:00:00 2.121320 3 - 2013-01-01 00:00:02 4.949747 7 - 2013-01-01 00:00:04 NaN 5 - - >>> r.agg(average="mean", total="sum") - average total - 2013-01-01 00:00:00 1.5 3 - 2013-01-01 00:00:02 3.5 7 - 2013-01-01 00:00:04 5.0 5 - """ - ) @final def aggregate(self, func=None, *args, **kwargs): @@ -455,8 +399,9 @@ def aggregate(self, func=None, *args, **kwargs): Examples -------- - >>> s = pd.Series([1, 2, 3, 4, 5], - ... index=pd.date_range('20130101', periods=5, freq='s')) + >>> s = pd.Series( + ... [1, 2, 3, 4, 5], index=pd.date_range("20130101", periods=5, freq="s") + ... ) >>> s 2013-01-01 00:00:00 1 2013-01-01 00:00:01 2 @@ -465,7 +410,7 @@ def aggregate(self, func=None, *args, **kwargs): 2013-01-01 00:00:04 5 Freq: s, dtype: int64 - >>> r = s.resample('2s') + >>> r = s.resample("2s") >>> r.agg("sum") 2013-01-01 00:00:00 3 @@ -473,14 +418,13 @@ def aggregate(self, func=None, *args, **kwargs): 2013-01-01 00:00:04 5 Freq: 2s, dtype: int64 - >>> r.agg(['sum', 'mean', 'max']) + >>> r.agg(["sum", "mean", "max"]) sum mean max 2013-01-01 00:00:00 3 1.5 2 2013-01-01 00:00:02 7 3.5 4 2013-01-01 00:00:04 5 5.0 5 - >>> r.agg({'result': lambda x: x.mean() / x.std(), - ... 'total': "sum"}) + >>> r.agg({"result": lambda x: x.mean() / x.std(), "total": "sum"}) result total 2013-01-01 00:00:00 2.121320 3 2013-01-01 00:00:02 4.949747 7 @@ -1402,10 +1346,12 @@ def first( Examples -------- - >>> s = pd.Series([1, 2, 3, 4], - ... index=pd.DatetimeIndex( - ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] - ... )) + >>> s = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> s 2023-01-01 1 2023-01-15 2 @@ -1453,10 +1399,12 @@ def last( Examples -------- - >>> s = pd.Series([1, 2, 3, 4], - ... index=pd.DatetimeIndex( - ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] - ... )) + >>> s = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> s 2023-01-01 1 2023-01-15 2 @@ -1859,7 +1807,7 @@ def size(self): -------- Series.groupby : Apply a function groupby to a Series. DataFrame.groupby : Apply a function groupby to each row - or column of a DataFrame. + or column of a DataFrame. Examples -------- @@ -1908,7 +1856,7 @@ def count(self): -------- Series.groupby : Apply a function groupby to a Series. DataFrame.groupby : Apply a function groupby to each row - or column of a DataFrame. + or column of a DataFrame. Examples -------- From 4292bc20b23a705e7c59a547eb7fb376d94029eb Mon Sep 17 00:00:00 2001 From: aokizy <14817191+aokizy2@user.noreply.gitee.com> Date: Tue, 14 Oct 2025 17:11:28 +0800 Subject: [PATCH 5/5] DOC: Remove unused variables and fix formatting in resample.py --- pandas/core/resample.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 77aa1695c02b0..44304bcc7f388 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1,7 +1,6 @@ from __future__ import annotations import copy - from typing import ( TYPE_CHECKING, Concatenate, @@ -28,7 +27,6 @@ ) from pandas._typing import NDFrameT from pandas.errors import AbstractMethodError - from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.dtypes import ( @@ -108,7 +106,6 @@ DataFrame, Series, ) - from pandas.core.generic import NDFrame _shared_docs_kwargs: dict[str, str] = {}