From cb134518ac9da9efc6750a3d3a8ca4f7f8657f16 Mon Sep 17 00:00:00 2001 From: wdyy20041223 <2795352227@qq,com> Date: Mon, 13 Oct 2025 00:27:06 +0800 Subject: [PATCH 1/5] remove-doc-decorators-pandas\core\generic.py --- pandas/core/generic.py | 1577 ++++++++++++++++++++++++++++++++++------ 1 file changed, 1374 insertions(+), 203 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0154087b18399..3c558ec6ac27d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -98,7 +98,6 @@ from pandas.errors.cow import _chained_assignment_method_msg from pandas.util._decorators import ( deprecate_kwarg, - doc, ) from pandas.util._exceptions import find_stack_level from pandas.util._validators import ( @@ -209,7 +208,6 @@ from pandas.core.indexers.objects import BaseIndexer from pandas.core.resample import Resampler -import textwrap # goal is to be able to define the docs close to function, while still being # able to share @@ -775,10 +773,9 @@ def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: self._mgr.set_axis(axis, labels) @final - @doc(klass=_shared_doc_kwargs["klass"]) def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: """ - Return {klass} with requested index / column level(s) removed. + Return Series/DataFrame with requested index / column level(s) removed. Parameters ---------- @@ -787,7 +784,7 @@ def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: If list-like, elements must be names or positional indexes of levels. - axis : {{0 or 'index', 1 or 'columns'}}, default 0 + axis : {0 or 'index', 1 or 'columns'}, default 0 Axis along which the level(s) is removed: * 0 or 'index': remove level(s) in column. @@ -797,8 +794,8 @@ def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: Returns ------- - {klass} - {klass} with requested index / column level(s) removed. + Series/DataFrame + Series/DataFrame with requested index / column level(s) removed. See Also -------- @@ -2144,19 +2141,6 @@ def _repr_data_resource_(self): # I/O Methods @final - @doc( - klass="object", - storage_options=_shared_docs["storage_options"], - storage_options_versionadded="1.2.0", - encoding_parameter="", - verbose_parameter="", - extra_parameters=textwrap.dedent( - """\ - engine_kwargs : dict, optional - Arbitrary keyword arguments passed to excel engine. - """ - ), - ) def to_excel( self, excel_writer: FilePath | WriteExcelBuffer | ExcelWriter, @@ -2178,9 +2162,9 @@ def to_excel( engine_kwargs: dict[str, Any] | None = None, ) -> None: """ - Write {klass} to an Excel sheet. + Write object to an Excel sheet. - To write a single {klass} to an Excel .xlsx file it is only necessary to + To write a single object to an Excel .xlsx file it is only necessary to specify a target file name. To write to multiple sheets it is necessary to create an `ExcelWriter` object with a target file name, and specify a sheet in the file to write to. @@ -2224,18 +2208,26 @@ def to_excel( merge_cells : bool or 'columns', default False If True, write MultiIndex index and columns as merged cells. If 'columns', merge MultiIndex column cells only. - {encoding_parameter} inf_rep : str, default 'inf' Representation for infinity (there is no native representation for infinity in Excel). - {verbose_parameter} freeze_panes : tuple of int (length 2), optional Specifies the one-based bottommost row and rightmost column that is to be frozen. - {storage_options} - - .. versionadded:: {storage_options_versionadded} - {extra_parameters} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. + + .. versionadded:: 1.2.0 + engine_kwargs : dict, optional + Arbitrary keyword arguments passed to excel engine. + See Also -------- to_csv : Write DataFrame to a comma-separated values (csv) file. @@ -2321,10 +2313,6 @@ def to_excel( ) @final - @doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "path_or_buf", - ) def to_json( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, @@ -2409,7 +2397,21 @@ def to_json( If 'orient' is 'records' write out line-delimited json format. Will throw ValueError if incorrect 'orient' since others are not list-like. - {compression_options} + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and 'path_or_buf' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and + other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster compression and to create + a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. .. versionchanged:: 1.4.0 Zstandard support. @@ -2422,7 +2424,15 @@ def to_json( indent : int, optional Length of whitespace used to indent each record. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. mode : str, default 'w' (writing) Specify the IO mode for output when supplying a path_or_buf. @@ -3047,10 +3057,6 @@ def to_sql( ) @final - @doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "path", - ) def to_pickle( self, path: FilePath | WriteBuffer[bytes], @@ -3068,7 +3074,21 @@ def to_pickle( String, path object (implementing ``os.PathLike[str]``), or file-like object implementing a binary ``write()`` function. File path where the pickled object will be stored. - {compression_options} + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and 'path' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and + other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster compression and to create + a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. protocol : int Int which indicates which protocol should be used by the pickler, default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible @@ -3077,7 +3097,15 @@ def to_pickle( .. [1] https://docs.python.org/3/library/pickle.html. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. See Also -------- @@ -3756,10 +3784,6 @@ def to_csv( ) -> None: ... @final - @doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "path_or_buf", - ) def to_csv( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, @@ -3828,7 +3852,21 @@ def to_csv( A string representing the encoding to use in the output file, defaults to 'utf-8'. `encoding` is not supported if `path_or_buf` is a non-binary file object. - {compression_options} + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and 'path_or_buf' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and + other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster compression and to create + a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. May be a dict with key 'method' as compression mode and other entries as additional compression options if @@ -3869,7 +3907,15 @@ def to_csv( See the errors argument for :func:`open` for a full list of options. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. Returns ------- @@ -5150,10 +5196,6 @@ def sort_index( else: return result.__finalize__(self, method="sort_index") - @doc( - klass=_shared_doc_kwargs["klass"], - optional_reindex="", - ) def reindex( self, labels=None, @@ -5169,7 +5211,7 @@ def reindex( tolerance=None, ) -> Self: """ - Conform {klass} to new index with optional filling logic. + Conform Series/DataFrame to new index with optional filling logic. Places NA/NaN in locations having no value in the previous index. A new object is produced unless the new index is equivalent to the current one and @@ -5177,8 +5219,7 @@ def reindex( Parameters ---------- - {optional_reindex} - method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}} + method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} Method to use for filling holes in reindexed DataFrame. Please note: this is only applicable to DataFrames/Series with a monotonically increasing/decreasing index. @@ -5226,8 +5267,8 @@ def reindex( Returns ------- - {klass} - {klass} with changed index. + Series/DataFrame + Series/DataFrame with changed index. See Also -------- @@ -5990,7 +6031,6 @@ def pipe( ) -> T: ... @final - @doc(klass=_shared_doc_kwargs["klass"]) def pipe( self, func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str], @@ -6003,11 +6043,11 @@ def pipe( Parameters ---------- func : function - Function to apply to the {klass}. + Function to apply to the Series/DataFrame. ``args``, and ``kwargs`` are passed into ``func``. Alternatively a ``(callable, data_keyword)`` tuple where ``data_keyword`` is a string indicating the keyword of - ``callable`` that expects the {klass}. + ``callable`` that expects the Series/DataFrame. *args : iterable, optional Positional arguments passed into ``func``. **kwargs : mapping, optional @@ -6964,10 +7004,6 @@ def fillna( ) -> Self | None: ... @final - @doc( - klass=_shared_doc_kwargs["klass"], - axes_single_arg=_shared_doc_kwargs["axes_single_arg"], - ) def fillna( self, value: Hashable | Mapping | Series | DataFrame, @@ -6987,7 +7023,7 @@ def fillna( each index (for a Series) or column (for a DataFrame). Values not in the dict/Series/DataFrame will not be filled. This value cannot be a list. - axis : {axes_single_arg} + axis : {0 or 'index'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame Axis along which to fill missing values. For `Series` this parameter is unused and defaults to 0. inplace : bool, default False @@ -7000,7 +7036,7 @@ def fillna( Returns ------- - {klass} or None + Series/DataFrame or None Object with missing values filled or None if ``inplace=True``. See Also @@ -7239,10 +7275,6 @@ def ffill( ) -> Self | None: ... @final - @doc( - klass=_shared_doc_kwargs["klass"], - axes_single_arg=_shared_doc_kwargs["axes_single_arg"], - ) def ffill( self, *, @@ -7256,7 +7288,7 @@ def ffill( Parameters ---------- - axis : {axes_single_arg} + axis : {0 or 'index'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame Axis along which to fill missing values. For `Series` this parameter is unused and defaults to 0. inplace : bool, default False @@ -7270,7 +7302,7 @@ def ffill( be partially filled. If method is not specified, this is the maximum number of entries along the entire axis where NaNs will be filled. Must be greater than 0 if not None. - limit_area : {{`None`, 'inside', 'outside'}}, default None + limit_area : {None, 'inside', 'outside'}, default None If limit is specified, consecutive NaNs will be filled with this restriction. @@ -7283,7 +7315,7 @@ def ffill( Returns ------- - {klass} or None + Series/DataFrame or None Object with missing values filled or None if ``inplace=True``. See Also @@ -7372,10 +7404,6 @@ def bfill( ) -> Self | None: ... @final - @doc( - klass=_shared_doc_kwargs["klass"], - axes_single_arg=_shared_doc_kwargs["axes_single_arg"], - ) def bfill( self, *, @@ -7389,7 +7417,7 @@ def bfill( Parameters ---------- - axis : {axes_single_arg} + axis : {0 or 'index'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame Axis along which to fill missing values. For `Series` this parameter is unused and defaults to 0. inplace : bool, default False @@ -7403,7 +7431,7 @@ def bfill( be partially filled. If method is not specified, this is the maximum number of entries along the entire axis where NaNs will be filled. Must be greater than 0 if not None. - limit_area : {{`None`, 'inside', 'outside'}}, default None + limit_area : {None, 'inside', 'outside'}, default None If limit is specified, consecutive NaNs will be filled with this restriction. @@ -7416,7 +7444,7 @@ def bfill( Returns ------- - {klass} or None + Series/DataFrame or None Object with missing values filled or None if ``inplace=True``. See Also @@ -7513,11 +7541,6 @@ def replace( ) -> Self | None: ... @final - @doc( - _shared_docs["replace"], - klass=_shared_doc_kwargs["klass"], - inplace=_shared_doc_kwargs["inplace"], - ) def replace( self, to_replace=None, @@ -7526,6 +7549,314 @@ def replace( inplace: bool = False, regex: bool = False, ) -> Self | None: + """ + Replace values given in `to_replace` with `value`. + + Values of the Series/DataFrame are replaced with other values dynamically. + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value. + + Parameters + ---------- + to_replace : str, regex, list, dict, Series, int, float, or None + How to find the values that will be replaced. + + * numeric, str or regex: + + - numeric: numeric values equal to `to_replace` will be + replaced with `value` + - str: string exactly matching `to_replace` will be replaced + with `value` + - regex: regexes matching `to_replace` will be replaced with + `value` + + * list of str, regex, or numeric: + + - First, if `to_replace` and `value` are both lists, they + **must** be the same length. + - Second, if ``regex=True`` then all of the strings in **both** + lists will be interpreted as regexes otherwise they will match + directly. This doesn't matter much for `value` since there + are only a few possible substitution regexes you can use. + - str, regex and numeric rules apply as above. + + * dict: + + - Dicts can be used to specify different replacement values + for different existing values. For example, + ``{'a': 'b', 'y': 'z'}`` replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way, the optional `value` + parameter should not be given. + - For a DataFrame a dict can specify that different values + should be replaced in different columns. For example, + ``{'a': 1, 'b': 'z'}`` looks for the value 1 in column 'a' + and the value 'z' in column 'b' and replaces these values + with whatever is specified in `value`. The `value` parameter + should not be ``None`` in this case. You can treat this as a + special case of passing two lists except that you are + specifying the column to search in. + - For a DataFrame nested dictionaries, e.g., + ``{'a': {'b': np.nan}}``, are read as follows: look in column + 'a' for the value 'b' and replace it with NaN. The optional `value` + parameter should not be specified to use a nested dict in this + way. You can nest regular expressions as well. Note that + column names (the top-level dictionary keys in a nested + dictionary) **cannot** be regular expressions. + + * None: + + - This means that the `regex` argument must be a string, + compiled regular expression, or list, dict, ndarray or + Series of such elements. If `value` is also ``None`` then + this **must** be a nested dictionary or Series. + + See the examples section for examples of each of these. + value : scalar, dict, list, str, regex, default None + Value to replace any values matching `to_replace` with. + For a DataFrame a dict of values can be used to specify which + value to use for each column (columns not in the dict will not be + filled). Regular expressions, strings and lists or dicts of such + objects are also allowed. + inplace : bool, default False + If True, performs operation inplace and returns None. + regex : bool or same types as `to_replace`, default False + Whether to interpret `to_replace` and/or `value` as regular + expressions. Alternatively, this could be a regular expression or a + list, dict, or array of regular expressions in which case + `to_replace` must be ``None``. + + Returns + ------- + Series/DataFrame + Object after replacement. + + Raises + ------ + AssertionError + * If `regex` is not a ``bool`` and `to_replace` is not + ``None``. + + TypeError + * If `to_replace` is not a scalar, array-like, ``dict``, or ``None`` + * If `to_replace` is a ``dict`` and `value` is not a ``list``, + ``dict``, ``ndarray``, or ``Series`` + * If `to_replace` is ``None`` and `regex` is not compilable + into a regular expression or is a list, dict, ndarray, or + Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and + the arguments to `to_replace` does not match the type of the + value being replaced + + ValueError + * If a ``list`` or an ``ndarray`` is passed to `to_replace` and + `value` but they are not the same length. + + See Also + -------- + Series.fillna : Fill NA values. + DataFrame.fillna : Fill NA values. + Series.where : Replace values based on boolean condition. + DataFrame.where : Replace values based on boolean condition. + DataFrame.map: Apply a function to a Dataframe elementwise. + Series.map: Map values of Series according to an input mapping or function. + Series.str.replace : Simple string replacement. + + Notes + ----- + * Regex substitution is performed under the hood with ``re.sub``. The + rules for substitution for ``re.sub`` are the same. + * Regular expressions will only substitute on strings, meaning you + cannot provide, for example, a regular expression matching floating + point numbers and expect the columns in your frame that have a + numeric dtype to be matched. However, if those floating point + numbers *are* strings, then you can do this. + * This method has *a lot* of options. You are encouraged to experiment + and play with this method to gain intuition about how it works. + * When dict is used as the `to_replace` value, it is like + key(s) in the dict are the to_replace part and + value(s) in the dict are the value parameter. + + Examples + -------- + + **Scalar `to_replace` and `value`** + + >>> s = pd.Series([1, 2, 3, 4, 5]) + >>> s.replace(1, 5) + 0 5 + 1 2 + 2 3 + 3 4 + 4 5 + dtype: int64 + + >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4], + ... 'B': [5, 6, 7, 8, 9], + ... 'C': ['a', 'b', 'c', 'd', 'e']}) + >>> df.replace(0, 5) + A B C + 0 5 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + **List-like `to_replace`** + + >>> df.replace([0, 1, 2, 3], 4) + A B C + 0 4 5 a + 1 4 6 b + 2 4 7 c + 3 4 8 d + 4 4 9 e + + >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) + A B C + 0 4 5 a + 1 3 6 b + 2 2 7 c + 3 1 8 d + 4 4 9 e + + >>> s.replace([1, 2], inplace=True) + >>> s + 0 0 + 1 5 + 2 5 + 3 3 + 4 4 + dtype: int64 + + **dict-like `to_replace`** + + >>> df.replace({0: 10, 1: 100}) + A B C + 0 10 5 a + 1 100 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({'A': 0, 'B': 5}, 100) + A B C + 0 100 100 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({'A': {0: 100, 4: 400}}) + A B C + 0 100 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 400 9 e + + **Regular expressions** + + >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'], + ... 'B': ['abc', 'bar', 'xyz']}) + >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True) + A B + 0 new abc + 1 foo bar + 2 bait xyz + + >>> df.replace(regex=r'^ba.$', value='new') + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'}) + A B + 0 new abc + 1 xyz new + 2 bait xyz + + >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') + A B + 0 new abc + 1 new new + 2 bait xyz + + Note that when replacing multiple bool or datetime64 objects, + the data types in the `to_replace` parameter must match the data + type of the value being replaced: + + >>> df = pd.DataFrame({'A': [True, False, True], + ... 'B': [False, True, False]}) + >>> df.replace({'A': True, 'B': False}, value=False) + A B + 0 False False + 1 False True + 2 False False + + To avoid this, we can use string values instead: + + ``s.replace(to_replace={'a': None}, value=None)``: + + >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) + + When one uses a dict as the `to_replace` value, it is like the + value(s) in the dict are equal to the `value` parameter. + ``s.replace({'a': None})`` is equivalent to + ``s.replace(to_replace={'a': None}, value=None, method=None)``: + + >>> s.replace({'a': None}) + 0 10 + 1 None + 2 None + 3 b + 4 None + dtype: object + + When ``value`` is not explicitly passed and `to_replace` is a scalar, list + or tuple, `replace` will raise a ``TypeError``: + + >>> s.replace('a') + Traceback (most recent call last): + ... + TypeError: replace() missing 1 required positional argument: 'value' + + However, `replace` will raise an error when a list or dict is passed. + + >>> s.replace([1, 2, 3]) + Traceback (most recent call last): + ... + TypeError: Replace expects at most 2 positional arguments, + but 3 were given + + >>> s.replace({1: 2, 3: 4}) + Traceback (most recent call last): + ... + TypeError: replace() missing 1 required positional argument: 'value' + + **Nested dictionaries or other mappings** + + >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c']}) + >>> df.replace(to_replace='^[a-g]', value='e', regex=True) + A B + 0 e e + 1 e e + 2 e e + + This however is deprecated and will be removed in a future version. + + >>> df.replace(to_replace={'B': '^[a-c]', 'C': '^[h-j]'}, value='e', regex=True) + A B C + 0 a e c + 1 b e c + 2 a e c + """ if not is_bool(regex) and to_replace is not None: raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") @@ -8145,7 +8476,6 @@ def asof(self, where, subset=None): # ---------------------------------------------------------------------- # Action Methods - @doc(klass=_shared_doc_kwargs["klass"]) def isna(self) -> Self: """ Detect missing values. @@ -8158,15 +8488,18 @@ def isna(self) -> Self: Returns ------- - {klass} - Mask of bool values for each element in {klass} that + Series/DataFrame + Mask of bool values for each element in Series/DataFrame that indicates whether an element is an NA value. See Also -------- - {klass}.isnull : Alias of isna. - {klass}.notna : Boolean inverse of isna. - {klass}.dropna : Omit axes labels with missing values. + Series.isnull : Alias of isna. + DataFrame.isnull : Alias of isna. + Series.notna : Boolean inverse of isna. + DataFrame.notna : Boolean inverse of isna. + Series.dropna : Omit axes labels with missing values. + DataFrame.dropna : Omit axes labels with missing values. isna : Top-level isna. Examples @@ -8214,11 +8547,77 @@ def isna(self) -> Self: """ return isna(self).__finalize__(self, method="isna") - @doc(isna, klass=_shared_doc_kwargs["klass"]) def isnull(self) -> Self: + """ + Detect missing values. + + Return a boolean same-sized object indicating if the values are NA. + NA values, such as None or :attr:`numpy.NaN`, gets mapped to True + values. + Everything else gets mapped to False values. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values. + + Returns + ------- + Series/DataFrame + Mask of bool values for each element in Series/DataFrame that + indicates whether an element is an NA value. + + See Also + -------- + Series.isna : Detect missing values. + DataFrame.isna : Detect missing values. + Series.notna : Boolean inverse of isna. + DataFrame.notna : Boolean inverse of isna. + Series.dropna : Omit axes labels with missing values. + DataFrame.dropna : Omit axes labels with missing values. + isna : Top-level isna. + + Examples + -------- + Show which entries in a DataFrame are NA. + + >>> df = pd.DataFrame( + ... dict( + ... age=[5, 6, np.nan], + ... born=[ + ... pd.NaT, + ... pd.Timestamp("1939-05-27"), + ... pd.Timestamp("1940-04-25"), + ... ], + ... name=["Alfred", "Batman", ""], + ... toy=[None, "Batmobile", "Joker"], + ... ) + ... ) + >>> df + age born name toy + 0 5.0 NaT Alfred NaN + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.isna() + age born name toy + 0 False True False True + 1 False False False False + 2 True False False False + + Show which entries in a Series are NA. + + >>> ser = pd.Series([5, 6, np.nan]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.isna() + 0 False + 1 False + 2 True + dtype: bool + """ return isna(self).__finalize__(self, method="isnull") - @doc(klass=_shared_doc_kwargs["klass"]) def notna(self) -> Self: """ Detect existing (non-missing) values. @@ -8231,15 +8630,18 @@ def notna(self) -> Self: Returns ------- - {klass} - Mask of bool values for each element in {klass} that + Series/DataFrame + Mask of bool values for each element in Series/DataFrame that indicates whether an element is not an NA value. See Also -------- - {klass}.notnull : Alias of notna. - {klass}.isna : Boolean inverse of notna. - {klass}.dropna : Omit axes labels with missing values. + Series.notnull : Alias of notna. + DataFrame.notnull : Alias of notna. + Series.isna : Boolean inverse of notna. + DataFrame.isna : Boolean inverse of notna. + Series.dropna : Omit axes labels with missing values. + DataFrame.dropna : Omit axes labels with missing values. notna : Top-level notna. Examples @@ -8287,8 +8689,75 @@ def notna(self) -> Self: """ return notna(self).__finalize__(self, method="notna") - @doc(notna, klass=_shared_doc_kwargs["klass"]) def notnull(self) -> Self: + """ + Detect existing (non-missing) values. + + Return a boolean same-sized object indicating if the values are not NA. + Non-missing values get mapped to True. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values. + NA values, such as None or :attr:`numpy.NaN`, get mapped to False + values. + + Returns + ------- + Series/DataFrame + Mask of bool values for each element in Series/DataFrame that + indicates whether an element is not an NA value. + + See Also + -------- + Series.notna : Detect existing (non-missing) values. + DataFrame.notna : Detect existing (non-missing) values. + Series.isna : Boolean inverse of notna. + DataFrame.isna : Boolean inverse of notna. + Series.dropna : Omit axes labels with missing values. + DataFrame.dropna : Omit axes labels with missing values. + notna : Top-level notna. + + Examples + -------- + Show which entries in a DataFrame are not NA. + + >>> df = pd.DataFrame( + ... dict( + ... age=[5, 6, np.nan], + ... born=[ + ... pd.NaT, + ... pd.Timestamp("1939-05-27"), + ... pd.Timestamp("1940-04-25"), + ... ], + ... name=["Alfred", "Batman", ""], + ... toy=[None, "Batmobile", "Joker"], + ... ) + ... ) + >>> df + age born name toy + 0 5.0 NaT Alfred NaN + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.notna() + age born name toy + 0 True False True False + 1 True True True True + 2 False True True True + + Show which entries in a Series are not NA. + + >>> ser = pd.Series([5, 6, np.nan]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.notna() + 0 True + 1 True + 2 False + dtype: bool + """ return notna(self).__finalize__(self, method="notnull") @final @@ -8556,7 +9025,6 @@ def clip( return result @final - @doc(klass=_shared_doc_kwargs["klass"]) def asfreq( self, freq: Frequency, @@ -8571,7 +9039,7 @@ def asfreq( Returns the original data conformed to a new index with the specified frequency. - If the index of this {klass} is a :class:`~pandas.PeriodIndex`, the new index + If the index of this Series/DataFrame is a :class:`~pandas.PeriodIndex`, the new index is the result of transforming the original index with :meth:`PeriodIndex.asfreq ` (so the original index will map one-to-one to the new index). @@ -8608,8 +9076,8 @@ def asfreq( Returns ------- - {klass} - {klass} object reindexed to the specified frequency. + Series/DataFrame + Series/DataFrame object reindexed to the specified frequency. See Also -------- @@ -8827,7 +9295,6 @@ def between_time( return self.take(indexer, axis=axis) @final - @doc(klass=_shared_doc_kwargs["klass"]) def resample( self, rule, @@ -8913,8 +9380,8 @@ def resample( -------- Series.resample : Resample a Series. DataFrame.resample : Resample a DataFrame. - groupby : Group {klass} by mapping, function, label, or list of labels. - asfreq : Reindex a {klass} with the given frequency without grouping. + groupby : Group Series/DataFrame by mapping, function, label, or list of labels. + asfreq : Reindex a Series/DataFrame with the given frequency without grouping. Notes ----- @@ -9376,7 +9843,6 @@ def ranker(data): return ranker(data) - @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"]) def compare( self, other: Self, @@ -9385,6 +9851,35 @@ def compare( keep_equal: bool = False, result_names: Suffixes = ("self", "other"), ): + """ + Compare to another Series/DataFrame and show the differences. + + Parameters + ---------- + other : Series/DataFrame + Object to compare with. + + align_axis : {{0 or 'index', 1 or 'columns'}}, default 1 + Determine which axis to align the comparison on. + + * 0, or 'index' : Resulting differences are stacked vertically + with rows drawn alternately from self and other. + * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. + + keep_shape : bool, default False + If true, all rows and columns are kept. + Otherwise, only the ones with different values are kept. + + keep_equal : bool, default False + If true, the result keeps values that are equal. + Otherwise, equal values are shown as NaNs. + + result_names : tuple, default ('self', 'other') + Set the dataframes names in the comparison. + + .. versionadded:: 1.5.0 + """ if type(self) is not type(other): cls_self, cls_other = type(self).__name__, type(other).__name__ raise TypeError( @@ -9459,10 +9954,6 @@ def compare( return diff @final - @doc( - klass=_shared_doc_kwargs["klass"], - axes_single_arg=_shared_doc_kwargs["axes_single_arg"], - ) def align( self, other: NDFrameT, @@ -9490,7 +9981,7 @@ def align( * inner: use intersection of keys from both frames, preserve the order of the left keys. - axis : allowed axis of the other object, default None + axis : {0 or 'index', 1 or 'columns'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame, default None Align on index (0), columns (1), or both (None). level : int or level name, default None Broadcast across a level, matching Index values on the @@ -9518,7 +10009,7 @@ def align( Returns ------- - tuple of ({klass}, type of other) + tuple of (Series/DataFrame, type of other) Aligned objects. See Also @@ -9981,13 +10472,6 @@ def where( ) -> Self | None: ... @final - @doc( - klass=_shared_doc_kwargs["klass"], - cond="True", - cond_rev="False", - name="where", - name_other="mask", - ) def where( self, cond, @@ -9998,22 +10482,22 @@ def where( level: Level | None = None, ) -> Self | None: """ - Replace values where the condition is {cond_rev}. + Replace values where the condition is False. Parameters ---------- - cond : bool {klass}, array-like, or callable - Where `cond` is {cond}, keep the original value. Where - {cond_rev}, replace with corresponding value from `other`. - If `cond` is callable, it is computed on the {klass} and - should return boolean {klass} or array. The callable must - not change input {klass} (though pandas doesn't check it). - other : scalar, {klass}, or callable - Entries where `cond` is {cond_rev} are replaced with + cond : bool Series/DataFrame, array-like, or callable + Where `cond` is True, keep the original value. Where + False, replace with corresponding value from `other`. + If `cond` is callable, it is computed on the Series/DataFrame and + should return boolean Series/DataFrame or array. The callable must + not change input Series/DataFrame (though pandas doesn't check it). + other : scalar, Series/DataFrame, or callable + Entries where `cond` is False are replaced with corresponding value from `other`. - If other is callable, it is computed on the {klass} and - should return scalar or {klass}. The callable must not - change input {klass} (though pandas doesn't check it). + If other is callable, it is computed on the Series/DataFrame and + should return scalar or Series/DataFrame. The callable must not + change input Series/DataFrame (though pandas doesn't check it). If not specified, entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). @@ -10034,25 +10518,25 @@ def where( See Also -------- - :func:`DataFrame.{name_other}` : Return an object of same shape as + :func:`DataFrame.mask` : Return an object of same shape as caller. - :func:`Series.{name_other}` : Return an object of same shape as + :func:`Series.mask` : Return an object of same shape as caller. Notes ----- - The {name} method is an application of the if-then idiom. For each - element in the caller, if ``cond`` is ``{cond}`` the + The where method is an application of the if-then idiom. For each + element in the caller, if ``cond`` is ``True`` the element is used; otherwise the corresponding element from ``other`` is used. If the axis of ``other`` does not align with axis of - ``cond`` {klass}, the values of ``cond`` on misaligned index positions - will be filled with {cond_rev}. + ``cond`` Series/DataFrame, the values of ``cond`` on misaligned index positions + will be filled with False. The signature for :func:`Series.where` or :func:`DataFrame.where` differs from :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to ``np.where(m, df1, df2)``. - For further details and examples see the ``{name}`` documentation in + For further details and examples see the ``where`` documentation in :ref:`indexing `. The dtype of the object takes precedence. The fill value is casted to @@ -10186,14 +10670,6 @@ def mask( ) -> Self | None: ... @final - @doc( - where, - klass=_shared_doc_kwargs["klass"], - cond="False", - cond_rev="True", - name="mask", - name_other="where", - ) def mask( self, cond, @@ -10203,53 +10679,194 @@ def mask( axis: Axis | None = None, level: Level | None = None, ) -> Self | None: - inplace = validate_bool_kwarg(inplace, "inplace") - if inplace: - if not PYPY and not WARNING_CHECK_DISABLED: - if sys.getrefcount(self) <= REF_COUNT: - warnings.warn( - _chained_assignment_method_msg, - ChainedAssignmentError, - stacklevel=2, - ) - - cond = common.apply_if_callable(cond, self) - other = common.apply_if_callable(other, self) - - # see gh-21891 - if not hasattr(cond, "__invert__"): - cond = np.array(cond) - - return self._where( - ~cond, - other=other, - inplace=inplace, - axis=axis, - level=level, - ) - - @doc(klass=_shared_doc_kwargs["klass"]) - def shift( - self, - periods: int | Sequence[int] = 1, - freq=None, - axis: Axis = 0, - fill_value: Hashable = lib.no_default, - suffix: str | None = None, - ) -> Self | DataFrame: """ - Shift index by desired number of periods with an optional time `freq`. - - When `freq` is not passed, shift the index without realigning the data. - If `freq` is passed (in this case, the index must be date or datetime, - or it will raise a `NotImplementedError`), the index will be - increased using the periods and the `freq`. `freq` can be inferred - when specified as "infer" as long as either freq or inferred_freq - attribute is set in the index. + Replace values where the condition is True. Parameters ---------- - periods : int or Sequence + cond : bool Series/DataFrame, array-like, or callable + Where `cond` is False, keep the original value. Where + True, replace with corresponding value from `other`. + If `cond` is callable, it is computed on the Series/DataFrame and + should return boolean Series/DataFrame or array. The callable must + not change input Series/DataFrame (though pandas doesn't check it). + other : scalar, Series/DataFrame, or callable + Entries where `cond` is True are replaced with + corresponding value from `other`. + If other is callable, it is computed on the Series/DataFrame and + should return scalar or Series/DataFrame. The callable must not + change input Series/DataFrame (though pandas doesn't check it). + If not specified, entries will be filled with the corresponding + NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension + dtypes). + inplace : bool, default False + Whether to perform the operation in place on the data. + axis : int, default None + Alignment axis if needed. For `Series` this parameter is + unused and defaults to 0. + level : int, default None + Alignment level if needed. + + Returns + ------- + Series or DataFrame or None + When applied to a Series, the function will return a Series, + and when applied to a DataFrame, it will return a DataFrame; + if ``inplace=True``, it will return None. + + See Also + -------- + :func:`DataFrame.where` : Return an object of same shape as + caller. + :func:`Series.where` : Return an object of same shape as + caller. + + Notes + ----- + The mask method is an application of the if-then idiom. For each + element in the caller, if ``cond`` is ``False`` the + element is used; otherwise the corresponding element from + ``other`` is used. If the axis of ``other`` does not align with axis of + ``cond`` Series/DataFrame, the values of ``cond`` on misaligned index positions + will be filled with True. + + The signature for :func:`Series.mask` or + :func:`DataFrame.mask` differs from :func:`numpy.where`. + Roughly ``df1.mask(m, df2)`` is equivalent to ``np.where(m, df2, df1)``. + + For further details and examples see the ``mask`` documentation in + :ref:`indexing `. + + The dtype of the object takes precedence. The fill value is casted to + the object's dtype, if this can be done losslessly. + + Examples + -------- + >>> s = pd.Series(range(5)) + >>> s.where(s > 0) + 0 NaN + 1 1.0 + 2 2.0 + 3 3.0 + 4 4.0 + dtype: float64 + >>> s.mask(s > 0) + 0 0.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + >>> s = pd.Series(range(5)) + >>> t = pd.Series([True, False]) + >>> s.where(t, 99) + 0 0 + 1 99 + 2 99 + 3 99 + 4 99 + dtype: int64 + >>> s.mask(t, 99) + 0 99 + 1 1 + 2 99 + 3 99 + 4 99 + dtype: int64 + + >>> s.where(s > 1, 10) + 0 10 + 1 10 + 2 2 + 3 3 + 4 4 + dtype: int64 + >>> s.mask(s > 1, 10) + 0 0 + 1 1 + 2 10 + 3 10 + 4 10 + dtype: int64 + + >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=["A", "B"]) + >>> df + A B + 0 0 1 + 1 2 3 + 2 4 5 + 3 6 7 + 4 8 9 + >>> m = df % 3 == 0 + >>> df.where(m, -df) + A B + 0 0 -1 + 1 -2 3 + 2 -4 -5 + 3 6 -7 + 4 -8 9 + >>> df.where(m, -df) == np.where(m, df, -df) + A B + 0 True True + 1 True True + 2 True True + 3 True True + 4 True True + >>> df.where(m, -df) == df.mask(~m, -df) + A B + 0 True True + 1 True True + 2 True True + 3 True True + 4 True True + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if inplace: + if not PYPY and not WARNING_CHECK_DISABLED: + if sys.getrefcount(self) <= REF_COUNT: + warnings.warn( + _chained_assignment_method_msg, + ChainedAssignmentError, + stacklevel=2, + ) + + cond = common.apply_if_callable(cond, self) + other = common.apply_if_callable(other, self) + + # see gh-21891 + if not hasattr(cond, "__invert__"): + cond = np.array(cond) + + return self._where( + ~cond, + other=other, + inplace=inplace, + axis=axis, + level=level, + ) + + def shift( + self, + periods: int | Sequence[int] = 1, + freq=None, + axis: Axis = 0, + fill_value: Hashable = lib.no_default, + suffix: str | None = None, + ) -> Self | DataFrame: + """ + Shift index by desired number of periods with an optional time `freq`. + + When `freq` is not passed, shift the index without realigning the data. + If `freq` is passed (in this case, the index must be date or datetime, + or it will raise a `NotImplementedError`), the index will be + increased using the periods and the `freq`. `freq` can be inferred + when specified as "infer" as long as either freq or inferred_freq + attribute is set in the index. + + Parameters + ---------- + periods : int or Sequence Number of periods to shift. Can be positive or negative. If an iterable of ints, the data will be shifted once by each int. This is equivalent to shifting by one value at a time and @@ -10279,7 +10896,7 @@ def shift( Returns ------- - {klass} + Series/DataFrame Copy of input object, shifted. See Also @@ -10602,7 +11219,6 @@ def truncate( return result @final - @doc(klass=_shared_doc_kwargs["klass"]) def tz_convert( self, tz, @@ -10642,7 +11258,7 @@ def tz_convert( Returns ------- - {klass} + Series/DataFrame Object with time zone converted axis. Raises @@ -10706,7 +11322,6 @@ def _tz_convert(ax, tz): return result.__finalize__(self, method="tz_convert") @final - @doc(klass=_shared_doc_kwargs["klass"]) def tz_localize( self, tz, @@ -10727,7 +11342,7 @@ def tz_localize( tz : str or tzinfo or None Time zone to localize. Passing ``None`` will remove the time zone information and preserve local time. - axis : {{0 or 'index', 1 or 'columns'}}, default 0 + axis : {0 or 'index', 1 or 'columns'}, default 0 The axis to localize level : int, str, default None If axis ia a MultiIndex, localize a specific level. Otherwise @@ -10779,7 +11394,7 @@ def tz_localize( Returns ------- - {klass} + Series/DataFrame Same type as the input, with time zone naive or aware index, depending on ``tz``. @@ -11680,7 +12295,6 @@ def prod( product = prod @final - @doc(Rolling) def rolling( self, window: int | dt.timedelta | str | BaseOffset | BaseIndexer, @@ -11692,6 +12306,232 @@ def rolling( step: int | None = None, method: str = "single", ) -> Window | Rolling: + """ + Provide rolling window calculations. + + Parameters + ---------- + window : int, timedelta, str, offset, or BaseIndexer subclass + Interval of the moving window. + + If an integer, the delta between the start and end of each window. + The number of points in the window depends on the ``closed`` argument. + + If a timedelta, str, or offset, the time period of each window. Each + window will be a variable sized based on the observations included in + the time-period. This is only valid for datetimelike indexes. + To learn more about the offsets & frequency strings, please see + :ref:`this link`. + + If a BaseIndexer subclass, the window boundaries + based on the defined ``get_window_bounds`` method. Additional rolling + keyword arguments, namely ``min_periods``, ``center``, ``closed`` and + ``step`` will be passed to ``get_window_bounds``. + + min_periods : int, default None + Minimum number of observations in window required to have a value; + otherwise, result is ``np.nan``. + + For a window that is specified by an offset, ``min_periods`` will default to 1. + + For a window that is specified by an integer, ``min_periods`` will default + to the size of the window. + + center : bool, default False + If False, set the window labels as the right edge of the window index. + + If True, set the window labels as the center of the window index. + + win_type : str, default None + If ``None``, all points are evenly weighted. + + If a string, it must be a valid `scipy.signal window function + `__. + + Certain Scipy window types require additional parameters to be passed + in the aggregation function. The additional parameters must match + the keywords specified in the Scipy window type method signature. + + on : str, optional + For a DataFrame, a column label or Index level on which + to calculate the rolling window, rather than the DataFrame's index. + + Provided integer column is ignored and excluded from result since + an integer index is not used to calculate the rolling window. + + closed : str, default None + Determines the inclusivity of points in the window + + If ``'right'``, uses the window (first, last] meaning the last point + is included in the calculations. + + If ``'left'``, uses the window [first, last) meaning the first point + is included in the calculations. + + If ``'both'``, uses the window [first, last] meaning all points in + the window are included in the calculations. + + If ``'neither'``, uses the window (first, last) meaning the first + and last points in the window are excluded from calculations. + + () and [] are referencing open and closed set + notation respetively. + + Default ``None`` (``'right'``). + + step : int, default None + Evaluate the window at every ``step`` result, equivalent to slicing as + ``[::step]``. ``window`` must be an integer. Using a step argument other + than None or 1 will produce a result with a different shape than the input. + + .. versionadded:: 1.5.0 + + method : str {'single', 'table'}, default 'single' + + .. versionadded:: 1.3.0 + + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + Returns + ------- + pandas.api.typing.Window or pandas.api.typing.Rolling + An instance of Window is returned if ``win_type`` is passed. Otherwise, + an instance of Rolling is returned. + + See Also + -------- + expanding : Provides expanding transformations. + ewm : Provides exponential weighted functions. + + Notes + ----- + See :ref:`Windowing Operations ` for further usage details + and examples. + + Examples + -------- + >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + **window** + + Rolling sum with a window length of 2 observations. + + >>> df.rolling(2).sum() + B + 0 NaN + 1 1.0 + 2 3.0 + 3 NaN + 4 NaN + + Rolling sum with a window span of 2 seconds. + + >>> df_time = pd.DataFrame( + ... {"B": [0, 1, 2, np.nan, 4]}, + ... index=[ + ... pd.Timestamp("20130101 09:00:00"), + ... pd.Timestamp("20130101 09:00:02"), + ... pd.Timestamp("20130101 09:00:03"), + ... pd.Timestamp("20130101 09:00:05"), + ... pd.Timestamp("20130101 09:00:06"), + ... ], + ... ) + + >>> df_time + B + 2013-01-01 09:00:00 0.0 + 2013-01-01 09:00:02 1.0 + 2013-01-01 09:00:03 2.0 + 2013-01-01 09:00:05 NaN + 2013-01-01 09:00:06 4.0 + + >>> df_time.rolling("2s").sum() + B + 2013-01-01 09:00:00 0.0 + 2013-01-01 09:00:02 1.0 + 2013-01-01 09:00:03 3.0 + 2013-01-01 09:00:05 NaN + 2013-01-01 09:00:06 4.0 + + Rolling sum with forward looking windows with 2 observations. + + >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2) + >>> df.rolling(window=indexer, min_periods=1).sum() + B + 0 1.0 + 1 3.0 + 2 2.0 + 3 4.0 + 4 4.0 + + **min_periods** + + Rolling sum with a window length of 2 observations, but only needs a minimum of 1 + observation to calculate a value. + + >>> df.rolling(2, min_periods=1).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 2.0 + 4 4.0 + + **center** + + Rolling sum with the result assigned to the center of the window index. + + >>> df.rolling(3, min_periods=1, center=True).sum() + B + 0 1.0 + 1 3.0 + 2 3.0 + 3 6.0 + 4 4.0 + + >>> df.rolling(3, min_periods=1, center=False).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 3.0 + 4 6.0 + + **step** + + Rolling sum with a window length of 2 observations, minimum of 1 observation to + calculate a value, and a step of 2. + + >>> df.rolling(2, min_periods=1, step=2).sum() + B + 0 0.0 + 2 3.0 + 4 4.0 + + **win_type** + + Rolling sum with a window length of 2, using the Scipy ``'gaussian'`` + window type. ``std`` is required in the aggregation function. + + >>> df.rolling(2, win_type="gaussian").sum(std=3) + B + 0 NaN + 1 0.986207 + 2 2.958621 + 3 NaN + 4 3.943414 + """ if win_type is not None: return Window( self, @@ -11718,16 +12558,81 @@ def rolling( ) @final - @doc(Expanding) def expanding( self, min_periods: int = 1, method: Literal["single", "table"] = "single", ) -> Expanding: + """ + Provide expanding window calculations. + + An expanding window yields the value of an aggregation statistic with all the data + available up to that point in time. + + Parameters + ---------- + min_periods : int, default 1 + Minimum number of observations in window required to have a value; + otherwise, result is ``np.nan``. + + method : str {'single', 'table'}, default 'single' + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + .. versionadded:: 1.3.0 + + Returns + ------- + pandas.api.typing.Expanding + An instance of Expanding for further expanding window calculations, + e.g. using the ``sum`` method. + + See Also + -------- + rolling : Provides rolling window calculations. + ewm : Provides exponential weighted functions. + + Notes + ----- + See :ref:`Windowing Operations ` for further usage details + and examples. + + Examples + -------- + >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + **min_periods** + + Expanding sum with 1 vs 3 observations needed to calculate a value. + + >>> df.expanding(1).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 3.0 + 4 7.0 + >>> df.expanding(3).sum() + B + 0 NaN + 1 NaN + 2 3.0 + 3 3.0 + 4 7.0 + """ return Expanding(self, min_periods=min_periods, method=method) @final - @doc(ExponentialMovingWindow) def ewm( self, com: float | None = None, @@ -11740,6 +12645,191 @@ def ewm( times: np.ndarray | DataFrame | Series | None = None, method: Literal["single", "table"] = "single", ) -> ExponentialMovingWindow: + r""" + Provide exponentially weighted (EW) calculations. + + Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be + provided if ``times`` is not provided. If ``times`` is provided and ``adjust=True``, + ``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided. + If ``times`` is provided and ``adjust=False``, ``halflife`` must be the only + provided decay-specification parameter. + + Parameters + ---------- + com : float, optional + Specify decay in terms of center of mass + + :math:`\alpha = 1 / (1 + com)`, for :math:`com \geq 0`. + + span : float, optional + Specify decay in terms of span + + :math:`\alpha = 2 / (span + 1)`, for :math:`span \geq 1`. + + halflife : float, str, timedelta, optional + Specify decay in terms of half-life + + :math:`\alpha = 1 - \exp\left(-\ln(2) / halflife\right)`, for + :math:`halflife > 0`. + + If ``times`` is specified, a timedelta convertible unit over which an + observation decays to half its value. Only applicable to ``mean()``, + and halflife value will not apply to the other functions. + + alpha : float, optional + Specify smoothing factor :math:`\alpha` directly + + :math:`0 < \alpha \leq 1`. + + min_periods : int, default 0 + Minimum number of observations in window required to have a value; + otherwise, result is ``np.nan``. + + adjust : bool, default True + Divide by decaying adjustment factor in beginning periods to account + for imbalance in relative weightings (viewing EWMA as a moving average). + + - When ``adjust=True`` (default), the EW function is calculated using weights + :math:`w_i = (1 - \alpha)^i`. For example, the EW moving average of the series + [:math:`x_0, x_1, ..., x_t`] would be: + + .. math:: + y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + (1 - + \alpha)^t x_0}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + (1 - \alpha)^t} + + - When ``adjust=False``, the exponentially weighted function is calculated + recursively: + + .. math:: + \begin{split} + y_0 &= x_0\\ + y_t &= (1 - \alpha) y_{t-1} + \alpha x_t, + \end{split} + ignore_na : bool, default False + Ignore missing values when calculating weights. + + - When ``ignore_na=False`` (default), weights are based on absolute positions. + For example, the weights of :math:`x_0` and :math:`x_2` used in calculating + the final weighted average of [:math:`x_0`, None, :math:`x_2`] are + :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and + :math:`(1-\alpha)^2` and :math:`\alpha` if ``adjust=False``. + + - When ``ignore_na=True``, weights are based + on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` + used in calculating the final weighted average of + [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if + ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``. + + times : np.ndarray, Series, default None + + Only applicable to ``mean()``. + + Times corresponding to the observations. Must be monotonically increasing and + ``datetime64[ns]`` dtype. + + If 1-D array like, a sequence with the same shape as the observations. + + method : str {'single', 'table'}, default 'single' + .. versionadded:: 1.4.0 + + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + Only applicable to ``mean()`` + + Returns + ------- + pandas.api.typing.ExponentialMovingWindow + An instance of ExponentialMovingWindow for further exponentially weighted (EW) + calculations, e.g. using the ``mean`` method. + + See Also + -------- + rolling : Provides rolling window calculations. + expanding : Provides expanding transformations. + + Notes + ----- + See :ref:`Windowing Operations ` + for further usage details and examples. + + Examples + -------- + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + >>> df.ewm(com=0.5).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + >>> df.ewm(alpha=2 / 3).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + + **adjust** + + >>> df.ewm(com=0.5, adjust=True).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + >>> df.ewm(com=0.5, adjust=False).mean() + B + 0 0.000000 + 1 0.666667 + 2 1.555556 + 3 1.555556 + 4 3.650794 + + **ignore_na** + + >>> df.ewm(com=0.5, ignore_na=True).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.225000 + >>> df.ewm(com=0.5, ignore_na=False).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + + **times** + + Exponentially weighted mean with weights calculated with a timedelta ``halflife`` + relative to ``times``. + + >>> times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17'] + >>> df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean() + B + 0 0.000000 + 1 0.585786 + 2 1.523889 + 3 1.523889 + 4 3.233686 + """ return ExponentialMovingWindow( self, com=com, @@ -11847,10 +12937,9 @@ def _find_valid_index(self, *, how: str) -> Hashable: return self.index[idxpos] @final - @doc(position="first", klass=_shared_doc_kwargs["klass"]) def first_valid_index(self) -> Hashable: """ - Return index for {position} non-missing value or None, if no value is found. + Return index for first non-missing value or None, if no value is found. See the :ref:`User Guide ` for more information on which values are considered missing. @@ -11858,7 +12947,7 @@ def first_valid_index(self) -> Hashable: Returns ------- type of index - Index of {position} non-missing value. + Index of first non-missing value. See Also -------- @@ -11896,7 +12985,7 @@ def first_valid_index(self) -> Hashable: For DataFrame: - >>> df = pd.DataFrame({{"A": [None, None, 2], "B": [None, 3, 4]}}) + >>> df = pd.DataFrame({"A": [None, None, 2], "B": [None, 3, 4]}) >>> df A B 0 NaN NaN @@ -11907,7 +12996,7 @@ def first_valid_index(self) -> Hashable: >>> df.last_valid_index() 2 - >>> df = pd.DataFrame({{"A": [None, None, None], "B": [None, None, None]}}) + >>> df = pd.DataFrame({"A": [None, None, None], "B": [None, None, None]}) >>> df A B 0 None None @@ -11935,8 +13024,90 @@ def first_valid_index(self) -> Hashable: return self._find_valid_index(how="first") @final - @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"]) def last_valid_index(self) -> Hashable: + """ + Return index for last non-missing value or None, if no value is found. + + See the :ref:`User Guide ` for more information + on which values are considered missing. + + Returns + ------- + type of index + Index of last non-missing value. + + See Also + -------- + DataFrame.first_valid_index : Return index for first non-NA value or None, if + no non-NA value is found. + Series.first_valid_index : Return index for first non-NA value or None, if no + non-NA value is found. + DataFrame.isna : Detect missing values. + + Examples + -------- + For Series: + + >>> s = pd.Series([None, 3, 4]) + >>> s.first_valid_index() + 1 + >>> s.last_valid_index() + 2 + + >>> s = pd.Series([None, None]) + >>> print(s.first_valid_index()) + None + >>> print(s.last_valid_index()) + None + + If all elements in Series are NA/null, returns None. + + >>> s = pd.Series() + >>> print(s.first_valid_index()) + None + >>> print(s.last_valid_index()) + None + + If Series is empty, returns None. + + For DataFrame: + + >>> df = pd.DataFrame({"A": [None, None, 2], "B": [None, 3, 4]}) + >>> df + A B + 0 NaN NaN + 1 NaN 3.0 + 2 2.0 4.0 + >>> df.first_valid_index() + 1 + >>> df.last_valid_index() + 2 + + >>> df = pd.DataFrame({"A": [None, None, None], "B": [None, None, None]}) + >>> df + A B + 0 None None + 1 None None + 2 None None + >>> print(df.first_valid_index()) + None + >>> print(df.last_valid_index()) + None + + If all elements in DataFrame are NA/null, returns None. + + >>> df = pd.DataFrame() + >>> df + Empty DataFrame + Columns: [] + Index: [] + >>> print(df.first_valid_index()) + None + >>> print(df.last_valid_index()) + None + + If DataFrame is empty, returns None. + """ return self._find_valid_index(how="last") From b7b80d4815dc587b9ee48bea159a3cff435841fe Mon Sep 17 00:00:00 2001 From: wdyy20041223 <2795352227@qq,com> Date: Mon, 13 Oct 2025 14:17:36 +0800 Subject: [PATCH 2/5] fix: format issues for ruff compliance --- pandas/core/generic.py | 71 ++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3c558ec6ac27d..42b39f1ea3c68 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2227,7 +2227,7 @@ def to_excel( .. versionadded:: 1.2.0 engine_kwargs : dict, optional Arbitrary keyword arguments passed to excel engine. - + See Also -------- to_csv : Write DataFrame to a comma-separated values (csv) file. @@ -2398,19 +2398,19 @@ def to_json( throw ValueError if incorrect 'orient' since others are not list-like. compression : str or dict, default 'infer' - For on-the-fly compression of the output data. If 'infer' and 'path_or_buf' is - path-like, then detect compression from the following extensions: '.gz', + For on-the-fly compression of the output data. If 'infer' and 'path_or_buf' + is path-like, then detect compression from the following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression). Set to ``None`` for no compression. Can also be a dict with key ``'method'`` set - to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and - other key-value pairs are forwarded to + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, + ``'tar'``} and other key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``, ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or ``tarfile.TarFile``, respectively. - As an example, the following could be passed for faster compression and to create - a reproducible gzip archive: + As an example, the following could be passed for faster compression and + to create a reproducible gzip archive: ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. .. versionchanged:: 1.4.0 Zstandard support. @@ -3081,13 +3081,13 @@ def to_pickle( (otherwise no compression). Set to ``None`` for no compression. Can also be a dict with key ``'method'`` set - to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and - other key-value pairs are forwarded to + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, + ``'tar'``} and other key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``, ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or ``tarfile.TarFile``, respectively. - As an example, the following could be passed for faster compression and to create - a reproducible gzip archive: + As an example, the following could be passed for faster compression and + to create a reproducible gzip archive: ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. protocol : int Int which indicates which protocol should be used by the pickler, @@ -3853,19 +3853,19 @@ def to_csv( defaults to 'utf-8'. `encoding` is not supported if `path_or_buf` is a non-binary file object. compression : str or dict, default 'infer' - For on-the-fly compression of the output data. If 'infer' and 'path_or_buf' is - path-like, then detect compression from the following extensions: '.gz', + For on-the-fly compression of the output data. If 'infer' and 'path_or_buf' + is path-like, then detect compression from the following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression). Set to ``None`` for no compression. Can also be a dict with key ``'method'`` set - to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and - other key-value pairs are forwarded to + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, + ``'tar'``} and other key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``, ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or ``tarfile.TarFile``, respectively. - As an example, the following could be passed for faster compression and to create - a reproducible gzip archive: + As an example, the following could be passed for faster compression and + to create a reproducible gzip archive: ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. May be a dict with key 'method' as compression mode @@ -9039,8 +9039,8 @@ def asfreq( Returns the original data conformed to a new index with the specified frequency. - If the index of this Series/DataFrame is a :class:`~pandas.PeriodIndex`, the new index - is the result of transforming the original index with + If the index of this Series/DataFrame is a :class:`~pandas.PeriodIndex`, the + new index is the result of transforming the original index with :meth:`PeriodIndex.asfreq ` (so the original index will map one-to-one to the new index). @@ -9981,7 +9981,8 @@ def align( * inner: use intersection of keys from both frames, preserve the order of the left keys. - axis : {0 or 'index', 1 or 'columns'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame, default None + axis : {0 or 'index', 1 or 'columns'} for Series, + {0 or 'index', 1 or 'columns'} for DataFrame, default None Align on index (0), columns (1), or both (None). level : int or level name, default None Broadcast across a level, matching Index values on the @@ -12332,7 +12333,8 @@ def rolling( Minimum number of observations in window required to have a value; otherwise, result is ``np.nan``. - For a window that is specified by an offset, ``min_periods`` will default to 1. + For a window that is specified by an offset, ``min_periods`` will default + to 1. For a window that is specified by an integer, ``min_periods`` will default to the size of the window. @@ -12477,8 +12479,8 @@ def rolling( **min_periods** - Rolling sum with a window length of 2 observations, but only needs a minimum of 1 - observation to calculate a value. + Rolling sum with a window length of 2 observations, but only needs a minimum + of 1 observation to calculate a value. >>> df.rolling(2, min_periods=1).sum() B @@ -12566,8 +12568,8 @@ def expanding( """ Provide expanding window calculations. - An expanding window yields the value of an aggregation statistic with all the data - available up to that point in time. + An expanding window yields the value of an aggregation statistic with all the + data available up to that point in time. Parameters ---------- @@ -12649,9 +12651,10 @@ def ewm( Provide exponentially weighted (EW) calculations. Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be - provided if ``times`` is not provided. If ``times`` is provided and ``adjust=True``, - ``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided. - If ``times`` is provided and ``adjust=False``, ``halflife`` must be the only + provided if ``times`` is not provided. If ``times`` is provided and + ``adjust=True``, ``halflife`` and one of ``com``, ``span`` or ``alpha`` may be + provided. If ``times`` is provided and ``adjust=False``, ``halflife`` must be + the only provided decay-specification parameter. Parameters @@ -12689,13 +12692,14 @@ def ewm( Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings (viewing EWMA as a moving average). - - When ``adjust=True`` (default), the EW function is calculated using weights - :math:`w_i = (1 - \alpha)^i`. For example, the EW moving average of the series + - When ``adjust=True`` (default), the EW function is calculated using + weights :math:`w_i = (1 - \alpha)^i`. For example, the EW moving average + of the series [:math:`x_0, x_1, ..., x_t`] would be: .. math:: - y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + (1 - - \alpha)^t x_0}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + (1 - \alpha)^t} + y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + + (1 - \alpha)^t x_0}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + (1 - \alpha)^t} - When ``adjust=False``, the exponentially weighted function is calculated recursively: @@ -12708,7 +12712,8 @@ def ewm( ignore_na : bool, default False Ignore missing values when calculating weights. - - When ``ignore_na=False`` (default), weights are based on absolute positions. + - When ``ignore_na=False`` (default), weights are based on absolute + positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and From 4f1313bee69fbad9292dc016f55b478a751e60d1 Mon Sep 17 00:00:00 2001 From: wdyy20041223 <2795352227@qq,com> Date: Mon, 13 Oct 2025 14:25:31 +0800 Subject: [PATCH 3/5] fix: resolve all ruff format issues - remove trailing whitespace and fix line lengths --- pandas/core/generic.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 42b39f1ea3c68..427d100059b20 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9981,7 +9981,7 @@ def align( * inner: use intersection of keys from both frames, preserve the order of the left keys. - axis : {0 or 'index', 1 or 'columns'} for Series, + axis : {0 or 'index', 1 or 'columns'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame, default None Align on index (0), columns (1), or both (None). level : int or level name, default None @@ -12698,8 +12698,9 @@ def ewm( [:math:`x_0, x_1, ..., x_t`] would be: .. math:: - y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + - (1 - \alpha)^t x_0}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + (1 - \alpha)^t} + y_t = \\frac{x_t + (1 - \\alpha)x_{t-1} + (1 - \\alpha)^2 x_{t-2} + ... + + (1 - \\alpha)^t x_0}{1 + (1 - \\alpha) + (1 - \\alpha)^2 + ... + + (1 - \\alpha)^t} - When ``adjust=False``, the exponentially weighted function is calculated recursively: @@ -12714,23 +12715,24 @@ def ewm( - When ``ignore_na=False`` (default), weights are based on absolute positions. - For example, the weights of :math:`x_0` and :math:`x_2` used in calculating - the final weighted average of [:math:`x_0`, None, :math:`x_2`] are - :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and + For example, the weights of :math:`x_0` and :math:`x_2` used in + calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] + are :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and :math:`(1-\alpha)^2` and :math:`\alpha` if ``adjust=False``. - When ``ignore_na=True``, weights are based - on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` - used in calculating the final weighted average of + on relative positions. For example, the weights of :math:`x_0` and + :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if - ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``. + ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if + ``adjust=False``. times : np.ndarray, Series, default None Only applicable to ``mean()``. - Times corresponding to the observations. Must be monotonically increasing and - ``datetime64[ns]`` dtype. + Times corresponding to the observations. Must be monotonically increasing + and ``datetime64[ns]`` dtype. If 1-D array like, a sequence with the same shape as the observations. @@ -12748,8 +12750,8 @@ def ewm( Returns ------- pandas.api.typing.ExponentialMovingWindow - An instance of ExponentialMovingWindow for further exponentially weighted (EW) - calculations, e.g. using the ``mean`` method. + An instance of ExponentialMovingWindow for further exponentially weighted + (EW) calculations, e.g. using the ``mean`` method. See Also -------- @@ -12823,10 +12825,11 @@ def ewm( **times** - Exponentially weighted mean with weights calculated with a timedelta ``halflife`` - relative to ``times``. + Exponentially weighted mean with weights calculated with a timedelta + ``halflife`` relative to ``times``. - >>> times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17'] + >>> times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', + ... '2020-01-17'] >>> df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean() B 0 0.000000 From 9d05b222c4a941ba9b2a850aafb5d15b82645e83 Mon Sep 17 00:00:00 2001 From: wdyy20041223 <2795352227@qq,com> Date: Mon, 13 Oct 2025 14:28:18 +0800 Subject: [PATCH 4/5] fix: split long math formula line to meet 88 char limit --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 427d100059b20..f9f95a9669ad4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -12698,9 +12698,9 @@ def ewm( [:math:`x_0, x_1, ..., x_t`] would be: .. math:: - y_t = \\frac{x_t + (1 - \\alpha)x_{t-1} + (1 - \\alpha)^2 x_{t-2} + ... + - (1 - \\alpha)^t x_0}{1 + (1 - \\alpha) + (1 - \\alpha)^2 + ... + - (1 - \\alpha)^t} + y_t = \\frac{x_t + (1 - \\alpha)x_{t-1} + (1 - \\alpha)^2 x_{t-2} + + ... + (1 - \\alpha)^t x_0}{1 + (1 - \\alpha) + (1 - \\alpha)^2 + + ... + (1 - \\alpha)^t} - When ``adjust=False``, the exponentially weighted function is calculated recursively: From 26cb1c1be1e76003068e89883d50266e5a1eb7f5 Mon Sep 17 00:00:00 2001 From: wdyy20041223 <2795352227@qq,com> Date: Mon, 13 Oct 2025 14:31:31 +0800 Subject: [PATCH 5/5] style: apply ruff format and isort auto-formatting --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f9f95a9669ad4..427d100059b20 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -12698,9 +12698,9 @@ def ewm( [:math:`x_0, x_1, ..., x_t`] would be: .. math:: - y_t = \\frac{x_t + (1 - \\alpha)x_{t-1} + (1 - \\alpha)^2 x_{t-2} + - ... + (1 - \\alpha)^t x_0}{1 + (1 - \\alpha) + (1 - \\alpha)^2 + - ... + (1 - \\alpha)^t} + y_t = \\frac{x_t + (1 - \\alpha)x_{t-1} + (1 - \\alpha)^2 x_{t-2} + ... + + (1 - \\alpha)^t x_0}{1 + (1 - \\alpha) + (1 - \\alpha)^2 + ... + + (1 - \\alpha)^t} - When ``adjust=False``, the exponentially weighted function is calculated recursively: