From a3f2e0a2c35e59d6dc07d6e5bea3070bc2e4c16a Mon Sep 17 00:00:00 2001 From: wdyy20041223 <2795352227@qq,com> Date: Sun, 12 Oct 2025 21:01:39 +0800 Subject: [PATCH] Replace dynamic docstring decorators with static docstrings in pandas\core\generic.py --- pandas/core/generic.py | 1621 ++++++++++++++++++++++++++++++++++------ 1 file changed, 1397 insertions(+), 224 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0154087b18399..a3292d555c3da 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -209,8 +209,6 @@ from pandas.core.indexers.objects import BaseIndexer from pandas.core.resample import Resampler -import textwrap - # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = {**_shared_docs} @@ -775,10 +773,9 @@ def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: self._mgr.set_axis(axis, labels) @final - @doc(klass=_shared_doc_kwargs["klass"]) def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: """ - Return {klass} with requested index / column level(s) removed. + Return Series/DataFrame with requested index / column level(s) removed. Parameters ---------- @@ -787,7 +784,7 @@ def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: If list-like, elements must be names or positional indexes of levels. - axis : {{0 or 'index', 1 or 'columns'}}, default 0 + axis : {0 or 'index', 1 or 'columns'}, default 0 Axis along which the level(s) is removed: * 0 or 'index': remove level(s) in column. @@ -797,8 +794,8 @@ def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: Returns ------- - {klass} - {klass} with requested index / column level(s) removed. + Series/DataFrame + Series/DataFrame with requested index / column level(s) removed. See Also -------- @@ -2144,19 +2141,6 @@ def _repr_data_resource_(self): # I/O Methods @final - @doc( - klass="object", - storage_options=_shared_docs["storage_options"], - storage_options_versionadded="1.2.0", - encoding_parameter="", - verbose_parameter="", - extra_parameters=textwrap.dedent( - """\ - engine_kwargs : dict, optional - Arbitrary keyword arguments passed to excel engine. - """ - ), - ) def to_excel( self, excel_writer: FilePath | WriteExcelBuffer | ExcelWriter, @@ -2178,9 +2162,9 @@ def to_excel( engine_kwargs: dict[str, Any] | None = None, ) -> None: """ - Write {klass} to an Excel sheet. + Write object to an Excel sheet. - To write a single {klass} to an Excel .xlsx file it is only necessary to + To write a single object to an Excel .xlsx file it is only necessary to specify a target file name. To write to multiple sheets it is necessary to create an `ExcelWriter` object with a target file name, and specify a sheet in the file to write to. @@ -2224,18 +2208,25 @@ def to_excel( merge_cells : bool or 'columns', default False If True, write MultiIndex index and columns as merged cells. If 'columns', merge MultiIndex column cells only. - {encoding_parameter} inf_rep : str, default 'inf' Representation for infinity (there is no native representation for infinity in Excel). - {verbose_parameter} freeze_panes : tuple of int (length 2), optional Specifies the one-based bottommost row and rightmost column that is to be frozen. - {storage_options} - - .. versionadded:: {storage_options_versionadded} - {extra_parameters} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. + + .. versionadded:: 1.2.0 + engine_kwargs : dict, optional + Arbitrary keyword arguments passed to excel engine. See Also -------- to_csv : Write DataFrame to a comma-separated values (csv) file. @@ -2321,10 +2312,6 @@ def to_excel( ) @final - @doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "path_or_buf", - ) def to_json( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, @@ -2361,27 +2348,27 @@ def to_json( * Series: - default is 'index' - - allowed values are: {{'split', 'records', 'index', 'table'}}. + - allowed values are: {'split', 'records', 'index', 'table'}. * DataFrame: - default is 'columns' - - allowed values are: {{'split', 'records', 'index', 'columns', - 'values', 'table'}}. + - allowed values are: {'split', 'records', 'index', 'columns', + 'values', 'table'}. * The format of the JSON string: - - 'split' : dict like {{'index' -> [index], 'columns' -> [columns], - 'data' -> [values]}} - - 'records' : list like [{{column -> value}}, ... , {{column -> value}}] - - 'index' : dict like {{index -> {{column -> value}}}} - - 'columns' : dict like {{column -> {{index -> value}}}} + - 'split' : dict like {'index' -> [index], 'columns' -> [columns], + 'data' -> [values]} + - 'records' : list like [{column -> value}, ... , {column -> value}] + - 'index' : dict like {index -> {column -> value}} + - 'columns' : dict like {column -> {index -> value}} - 'values' : just the values array - - 'table' : dict like {{'schema': {{schema}}, 'data': {{data}}}} + - 'table' : dict like {'schema': {schema}, 'data': {data}} Describing the data, where data component is like ``orient='records'``. - date_format : {{None, 'epoch', 'iso'}} + date_format : {None, 'epoch', 'iso'} Type of date conversion. 'epoch' = epoch milliseconds, 'iso' = ISO8601. The default depends on the `orient`. For ``orient='table'``, the default is 'iso'. For all other orients, @@ -2409,7 +2396,21 @@ def to_json( If 'orient' is 'records' write out line-delimited json format. Will throw ValueError if incorrect 'orient' since others are not list-like. - {compression_options} + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and 'path_or_buf' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and + other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster compression and to create + a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. .. versionchanged:: 1.4.0 Zstandard support. @@ -2422,7 +2423,15 @@ def to_json( indent : int, optional Length of whitespace used to indent each record. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. mode : str, default 'w' (writing) Specify the IO mode for output when supplying a path_or_buf. @@ -3047,10 +3056,6 @@ def to_sql( ) @final - @doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "path", - ) def to_pickle( self, path: FilePath | WriteBuffer[bytes], @@ -3068,7 +3073,26 @@ def to_pickle( String, path object (implementing ``os.PathLike[str]``), or file-like object implementing a binary ``write()`` function. File path where the pickled object will be stored. - {compression_options} + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and 'path' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and + other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster compression and to create + a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. + + .. versionadded:: 1.5.0 + Added support for `.tar` files. + + .. versionchanged:: 1.4.0 Zstandard support. protocol : int Int which indicates which protocol should be used by the pickler, default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible @@ -3077,7 +3101,14 @@ def to_pickle( .. [1] https://docs.python.org/3/library/pickle.html. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` documentation + for more details, and for more examples on storage options refer here: + https://pandas.pydata.org/docs/user_guide/io.html#reading-writing-remote-files See Also -------- @@ -3756,10 +3787,6 @@ def to_csv( ) -> None: ... @final - @doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "path_or_buf", - ) def to_csv( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, @@ -3828,7 +3855,24 @@ def to_csv( A string representing the encoding to use in the output file, defaults to 'utf-8'. `encoding` is not supported if `path_or_buf` is a non-binary file object. - {compression_options} + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and 'path_or_buf' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and + other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster compression and to create + a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. + + .. versionadded:: 1.5.0 + Added support for `.tar` files. May be a dict with key 'method' as compression mode and other entries as additional compression options if @@ -3869,7 +3913,15 @@ def to_csv( See the errors argument for :func:`open` for a full list of options. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. Returns ------- @@ -5150,10 +5202,6 @@ def sort_index( else: return result.__finalize__(self, method="sort_index") - @doc( - klass=_shared_doc_kwargs["klass"], - optional_reindex="", - ) def reindex( self, labels=None, @@ -5169,7 +5217,7 @@ def reindex( tolerance=None, ) -> Self: """ - Conform {klass} to new index with optional filling logic. + Conform Series/DataFrame to new index with optional filling logic. Places NA/NaN in locations having no value in the previous index. A new object is produced unless the new index is equivalent to the current one and @@ -5177,7 +5225,6 @@ def reindex( Parameters ---------- - {optional_reindex} method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}} Method to use for filling holes in reindexed DataFrame. Please note: this is only applicable to DataFrames/Series with a @@ -5226,8 +5273,8 @@ def reindex( Returns ------- - {klass} - {klass} with changed index. + Series/DataFrame + Series/DataFrame with changed index. See Also -------- @@ -5990,7 +6037,6 @@ def pipe( ) -> T: ... @final - @doc(klass=_shared_doc_kwargs["klass"]) def pipe( self, func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str], @@ -6003,11 +6049,11 @@ def pipe( Parameters ---------- func : function - Function to apply to the {klass}. + Function to apply to the Series/DataFrame. ``args``, and ``kwargs`` are passed into ``func``. Alternatively a ``(callable, data_keyword)`` tuple where ``data_keyword`` is a string indicating the keyword of - ``callable`` that expects the {klass}. + ``callable`` that expects the Series/DataFrame. *args : iterable, optional Positional arguments passed into ``func``. **kwargs : mapping, optional @@ -6964,10 +7010,6 @@ def fillna( ) -> Self | None: ... @final - @doc( - klass=_shared_doc_kwargs["klass"], - axes_single_arg=_shared_doc_kwargs["axes_single_arg"], - ) def fillna( self, value: Hashable | Mapping | Series | DataFrame, @@ -6987,7 +7029,7 @@ def fillna( each index (for a Series) or column (for a DataFrame). Values not in the dict/Series/DataFrame will not be filled. This value cannot be a list. - axis : {axes_single_arg} + axis : {{0 or 'index'}} for Series, {{0 or 'index', 1 or 'columns'}} for DataFrame Axis along which to fill missing values. For `Series` this parameter is unused and defaults to 0. inplace : bool, default False @@ -7000,7 +7042,7 @@ def fillna( Returns ------- - {klass} or None + Series/DataFrame or None Object with missing values filled or None if ``inplace=True``. See Also @@ -7239,10 +7281,6 @@ def ffill( ) -> Self | None: ... @final - @doc( - klass=_shared_doc_kwargs["klass"], - axes_single_arg=_shared_doc_kwargs["axes_single_arg"], - ) def ffill( self, *, @@ -7256,7 +7294,7 @@ def ffill( Parameters ---------- - axis : {axes_single_arg} + axis : {{0 or 'index'}} for Series, {{0 or 'index', 1 or 'columns'}} for DataFrame Axis along which to fill missing values. For `Series` this parameter is unused and defaults to 0. inplace : bool, default False @@ -7283,7 +7321,7 @@ def ffill( Returns ------- - {klass} or None + Series/DataFrame or None Object with missing values filled or None if ``inplace=True``. See Also @@ -7372,10 +7410,6 @@ def bfill( ) -> Self | None: ... @final - @doc( - klass=_shared_doc_kwargs["klass"], - axes_single_arg=_shared_doc_kwargs["axes_single_arg"], - ) def bfill( self, *, @@ -7389,7 +7423,7 @@ def bfill( Parameters ---------- - axis : {axes_single_arg} + axis : {{0 or 'index'}} for Series, {{0 or 'index', 1 or 'columns'}} for DataFrame Axis along which to fill missing values. For `Series` this parameter is unused and defaults to 0. inplace : bool, default False @@ -7416,7 +7450,7 @@ def bfill( Returns ------- - {klass} or None + Series/DataFrame or None Object with missing values filled or None if ``inplace=True``. See Also @@ -7513,11 +7547,6 @@ def replace( ) -> Self | None: ... @final - @doc( - _shared_docs["replace"], - klass=_shared_doc_kwargs["klass"], - inplace=_shared_doc_kwargs["inplace"], - ) def replace( self, to_replace=None, @@ -7526,6 +7555,294 @@ def replace( inplace: bool = False, regex: bool = False, ) -> Self | None: + """ + Replace values given in `to_replace` with `value`. + + Values of the Series/DataFrame are replaced with other values dynamically. + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value. + + Parameters + ---------- + to_replace : str, regex, list, dict, Series, int, float, or None + How to find the values that will be replaced. + + * numeric, str or regex: + + - numeric: numeric values equal to `to_replace` will be + replaced with `value` + - str: string exactly matching `to_replace` will be replaced + with `value` + - regex: regexes matching `to_replace` will be replaced with + `value` + + * list of str, regex, or numeric: + + - First, if `to_replace` and `value` are both lists, they + **must** be the same length. + - Second, if ``regex=True`` then all of the strings in **both** + lists will be interpreted as regexes otherwise they will match + directly. This doesn't matter much for `value` since there + are only a few possible substitution regexes you can use. + - str, regex and numeric rules apply as above. + + * dict: + + - Dicts can be used to specify different replacement values + for different existing values. For example, + ``{'a': 'b', 'y': 'z'}`` replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way, the optional `value` + parameter should not be given. + - For a DataFrame a dict can specify that different values + should be replaced in different columns. For example, + ``{'a': 1, 'b': 'z'}`` looks for the value 1 in column 'a' + and the value 'z' in column 'b' and replaces these values + with whatever is specified in `value`. The `value` parameter + should not be ``None`` in this case. You can treat this as a + special case of passing two lists except that you are + specifying the column to search in. + - For a DataFrame nested dictionaries, e.g., + ``{'a': {'b': np.nan}}``, are read as follows: look in column + 'a' for the value 'b' and replace it with NaN. The optional `value` + parameter should not be specified to use a nested dict in this + way. You can nest regular expressions as well. Note that + column names (the top-level dictionary keys in a nested + dictionary) **cannot** be regular expressions. + + * None: + + - This means that the `regex` argument must be a string, + compiled regular expression, or list, dict, ndarray or + Series of such elements. If `value` is also ``None`` then + this **must** be a nested dictionary or Series. + + See the examples section for examples of each of these. + value : scalar, dict, list, str, regex, default None + Value to replace any values matching `to_replace` with. + For a DataFrame a dict of values can be used to specify which + value to use for each column (columns not in the dict will not be + filled). Regular expressions, strings and lists or dicts of such + objects are also allowed. + inplace : bool, default False + If True, performs operation inplace and returns None. + regex : bool or same types as `to_replace`, default False + Whether to interpret `to_replace` and/or `value` as regular + expressions. Alternatively, this could be a regular expression or a + list, dict, or array of regular expressions in which case + `to_replace` must be ``None``. + + Returns + ------- + Series/DataFrame + Object after replacement. + + Raises + ------ + AssertionError + * If `regex` is not a ``bool`` and `to_replace` is not + ``None``. + + TypeError + * If `to_replace` is not a scalar, array-like, ``dict``, or ``None`` + * If `to_replace` is a ``dict`` and `value` is not a ``list``, + ``dict``, ``ndarray``, or ``Series`` + * If `to_replace` is ``None`` and `regex` is not compilable + into a regular expression or is a list, dict, ndarray, or + Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and + the arguments to `to_replace` does not match the type of the + value being replaced + + ValueError + * If a ``list`` or an ``ndarray`` is passed to `to_replace` and + `value` but they are not the same length. + + See Also + -------- + Series.fillna : Fill NA values. + DataFrame.fillna : Fill NA values. + Series.where : Replace values based on boolean condition. + DataFrame.where : Replace values based on boolean condition. + DataFrame.map: Apply a function to a Dataframe elementwise. + Series.map: Map values of Series according to an input mapping or function. + Series.str.replace : Simple string replacement. + + Notes + ----- + * Regex substitution is performed under the hood with ``re.sub``. The + rules for substitution for ``re.sub`` are the same. + * Regular expressions will only substitute on strings, meaning you + cannot provide, for example, a regular expression matching floating + point numbers and expect the columns in your frame that have a + numeric dtype to be matched. However, if those floating point + numbers *are* strings, then you can do this. + * This method has *a lot* of options. You are encouraged to experiment + and play with this method to gain intuition about how it works. + * When dict is used as the `to_replace` value, it is like + key(s) in the dict are the to_replace part and + value(s) in the dict are the value parameter. + + Examples + -------- + + **Scalar `to_replace` and `value`** + + >>> s = pd.Series([1, 2, 3, 4, 5]) + >>> s.replace(1, 5) + 0 5 + 1 2 + 2 3 + 3 4 + 4 5 + dtype: int64 + + >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4], + ... 'B': [5, 6, 7, 8, 9], + ... 'C': ['a', 'b', 'c', 'd', 'e']}) + >>> df.replace(0, 5) + A B C + 0 5 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + **List-like `to_replace`** + + >>> df.replace([0, 1, 2, 3], 4) + A B C + 0 4 5 a + 1 4 6 b + 2 4 7 c + 3 4 8 d + 4 4 9 e + + >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) + A B C + 0 4 5 a + 1 3 6 b + 2 2 7 c + 3 1 8 d + 4 4 9 e + + **dict-like `to_replace`** + + >>> df.replace({0: 10, 1: 100}) + A B C + 0 10 5 a + 1 100 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({'A': 0, 'B': 5}, 100) + A B C + 0 100 100 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({'A': {0: 100, 4: 400}}) + A B C + 0 100 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 400 9 e + + **Regular expression `to_replace`** + + >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'], + ... 'B': ['abc', 'bar', 'xyz']}) + >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True) + A B + 0 new abc + 1 foo bar + 2 bait xyz + + >>> df.replace(regex=r'^ba.$', value='new') + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'}) + A B + 0 new abc + 1 xyz new + 2 bait xyz + + >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') + A B + 0 new abc + 1 new new + 2 bait xyz + + Compare the behavior of ``s.replace({'a': None})`` and + ``s.replace('a', None)`` to understand the peculiarities + of the `to_replace` parameter: + + >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) + + When one uses a dict as the `to_replace` value, it is like the + value(s) in the dict are equal to the `value` parameter. + ``s.replace({'a': None})`` is equivalent to + ``s.replace(to_replace={'a': None}, value=None)``: + + >>> s.replace({'a': None}) + 0 10 + 1 None + 2 None + 3 b + 4 None + dtype: object + + If ``None`` is explicitly passed for ``value``, it will be respected: + + >>> s.replace('a', None) + 0 10 + 1 None + 2 None + 3 b + 4 None + dtype: object + + .. versionchanged:: 1.4.0 + Previously the explicit ``None`` was silently ignored. + + When ``regex=True``, ``value`` is not ``None`` and `to_replace` is a string, + the replacement will be applied in all columns of the DataFrame. + + >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4], + ... 'B': ['a', 'b', 'c', 'd', 'e'], + ... 'C': ['f', 'g', 'h', 'i', 'j']}) + + >>> df.replace(to_replace='^[a-g]', value='e', regex=True) + A B C + 0 0 e e + 1 1 e e + 2 2 e h + 3 3 e i + 4 4 e j + + If ``value`` is not ``None`` and `to_replace` is a dictionary, the dictionary + keys will be the DataFrame columns that the replacement will be applied. + + >>> df.replace(to_replace={'B': '^[a-c]', 'C': '^[h-j]'}, value='e', regex=True) + A B C + 0 0 e f + 1 1 e g + 2 2 e e + 3 3 d e + 4 4 e e + """ if not is_bool(regex) and to_replace is not None: raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") @@ -8145,7 +8462,6 @@ def asof(self, where, subset=None): # ---------------------------------------------------------------------- # Action Methods - @doc(klass=_shared_doc_kwargs["klass"]) def isna(self) -> Self: """ Detect missing values. @@ -8158,15 +8474,15 @@ def isna(self) -> Self: Returns ------- - {klass} - Mask of bool values for each element in {klass} that + Series/DataFrame + Mask of bool values for each element in Series/DataFrame that indicates whether an element is an NA value. See Also -------- - {klass}.isnull : Alias of isna. - {klass}.notna : Boolean inverse of isna. - {klass}.dropna : Omit axes labels with missing values. + Series/DataFrame.isnull : Alias of isna. + Series/DataFrame.notna : Boolean inverse of isna. + Series/DataFrame.dropna : Omit axes labels with missing values. isna : Top-level isna. Examples @@ -8214,11 +8530,74 @@ def isna(self) -> Self: """ return isna(self).__finalize__(self, method="isna") - @doc(isna, klass=_shared_doc_kwargs["klass"]) def isnull(self) -> Self: + """ + Detect missing values. + + Return a boolean same-sized object indicating if the values are NA. + NA values, such as None or :attr:`numpy.NaN`, gets mapped to True + values. + Everything else gets mapped to False values. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values. + + Returns + ------- + Series/DataFrame + Mask of bool values for each element in Series/DataFrame that + indicates whether an element is an NA value. + + See Also + -------- + Series/DataFrame.isna : Detect missing values. + Series/DataFrame.notna : Boolean inverse of isna. + Series/DataFrame.dropna : Omit axes labels with missing values. + isna : Top-level isna. + + Examples + -------- + Show which entries in a DataFrame are NA. + + >>> df = pd.DataFrame( + ... dict( + ... age=[5, 6, np.nan], + ... born=[ + ... pd.NaT, + ... pd.Timestamp("1939-05-27"), + ... pd.Timestamp("1940-04-25"), + ... ], + ... name=["Alfred", "Batman", ""], + ... toy=[None, "Batmobile", "Joker"], + ... ) + ... ) + >>> df + age born name toy + 0 5.0 NaT Alfred NaN + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.isna() + age born name toy + 0 False True False True + 1 False False False False + 2 True False False False + + Show which entries in a Series are NA. + + >>> ser = pd.Series([5, 6, np.nan]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.isna() + 0 False + 1 False + 2 True + dtype: bool + """ return isna(self).__finalize__(self, method="isnull") - @doc(klass=_shared_doc_kwargs["klass"]) def notna(self) -> Self: """ Detect existing (non-missing) values. @@ -8231,15 +8610,15 @@ def notna(self) -> Self: Returns ------- - {klass} - Mask of bool values for each element in {klass} that + Series/DataFrame + Mask of bool values for each element in Series/DataFrame that indicates whether an element is not an NA value. See Also -------- - {klass}.notnull : Alias of notna. - {klass}.isna : Boolean inverse of notna. - {klass}.dropna : Omit axes labels with missing values. + Series/DataFrame.notnull : Alias of notna. + Series/DataFrame.isna : Boolean inverse of notna. + Series/DataFrame.dropna : Omit axes labels with missing values. notna : Top-level notna. Examples @@ -8287,8 +8666,72 @@ def notna(self) -> Self: """ return notna(self).__finalize__(self, method="notna") - @doc(notna, klass=_shared_doc_kwargs["klass"]) def notnull(self) -> Self: + """ + Detect existing (non-missing) values. + + Return a boolean same-sized object indicating if the values are not NA. + Non-missing values get mapped to True. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values. + NA values, such as None or :attr:`numpy.NaN`, get mapped to False + values. + + Returns + ------- + Series/DataFrame + Mask of bool values for each element in Series/DataFrame that + indicates whether an element is not an NA value. + + See Also + -------- + Series/DataFrame.notna : Detect existing (non-missing) values. + Series/DataFrame.isna : Boolean inverse of notna. + Series/DataFrame.dropna : Omit axes labels with missing values. + notna : Top-level notna. + + Examples + -------- + Show which entries in a DataFrame are not NA. + + >>> df = pd.DataFrame( + ... dict( + ... age=[5, 6, np.nan], + ... born=[ + ... pd.NaT, + ... pd.Timestamp("1939-05-27"), + ... pd.Timestamp("1940-04-25"), + ... ], + ... name=["Alfred", "Batman", ""], + ... toy=[None, "Batmobile", "Joker"], + ... ) + ... ) + >>> df + age born name toy + 0 5.0 NaT Alfred NaN + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.notna() + age born name toy + 0 True False True False + 1 True True True True + 2 False True True True + + Show which entries in a Series are not NA. + + >>> ser = pd.Series([5, 6, np.nan]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.notna() + 0 True + 1 True + 2 False + dtype: bool + """ return notna(self).__finalize__(self, method="notnull") @final @@ -8556,7 +8999,6 @@ def clip( return result @final - @doc(klass=_shared_doc_kwargs["klass"]) def asfreq( self, freq: Frequency, @@ -8571,7 +9013,7 @@ def asfreq( Returns the original data conformed to a new index with the specified frequency. - If the index of this {klass} is a :class:`~pandas.PeriodIndex`, the new index + If the index of this Series/DataFrame is a :class:`~pandas.PeriodIndex`, the new index is the result of transforming the original index with :meth:`PeriodIndex.asfreq ` (so the original index will map one-to-one to the new index). @@ -8591,7 +9033,7 @@ def asfreq( ---------- freq : DateOffset or str Frequency DateOffset or string. - method : {{'backfill'/'bfill', 'pad'/'ffill'}}, default None + method : {'backfill'/'bfill', 'pad'/'ffill'}, default None Method to use for filling holes in reindexed Series (note this does not fill NaNs that already were present): @@ -8827,7 +9269,6 @@ def between_time( return self.take(indexer, axis=axis) @final - @doc(klass=_shared_doc_kwargs["klass"]) def resample( self, rule, @@ -8852,15 +9293,15 @@ def resample( ---------- rule : DateOffset, Timedelta or str The offset string or object representing target conversion. - closed : {{'right', 'left'}}, default None + closed : {'right', 'left'}, default None Which side of bin interval is closed. The default is 'left' for all frequency offsets except for 'ME', 'YE', 'QE', 'BME', 'BA', 'BQE', and 'W' which all have a default of 'right'. - label : {{'right', 'left'}}, default None + label : {'right', 'left'}, default None Which bin edge label to label bucket with. The default is 'left' for all frequency offsets except for 'ME', 'YE', 'QE', 'BME', 'BA', 'BQE', and 'W' which all have a default of 'right'. - convention : {{'start', 'end', 's', 'e'}}, default 'start' + convention : {'start', 'end', 's', 'e'}, default 'start' For `PeriodIndex` only, controls whether to use the start or end of `rule`. on : str, optional @@ -8913,8 +9354,8 @@ def resample( -------- Series.resample : Resample a Series. DataFrame.resample : Resample a DataFrame. - groupby : Group {klass} by mapping, function, label, or list of labels. - asfreq : Reindex a {klass} with the given frequency without grouping. + groupby : Group Series/DataFrame by mapping, function, label, or list of labels. + asfreq : Reindex a Series/DataFrame with the given frequency without grouping. Notes ----- @@ -9376,7 +9817,6 @@ def ranker(data): return ranker(data) - @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"]) def compare( self, other: Self, @@ -9385,6 +9825,35 @@ def compare( keep_equal: bool = False, result_names: Suffixes = ("self", "other"), ): + """ + Compare to another Series/DataFrame and show the differences. + + Parameters + ---------- + other : Series/DataFrame + Object to compare with. + + align_axis : {0 or 'index', 1 or 'columns'}, default 1 + Determine which axis to align the comparison on. + + * 0, or 'index' : Resulting differences are stacked vertically + with rows drawn alternately from self and other. + * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. + + keep_shape : bool, default False + If true, all rows and columns are kept. + Otherwise, only the ones with different values are kept. + + keep_equal : bool, default False + If true, the result keeps values that are equal. + Otherwise, equal values are shown as NaNs. + + result_names : tuple, default ('self', 'other') + Set the dataframes names in the comparison. + + .. versionadded:: 1.5.0 + """ if type(self) is not type(other): cls_self, cls_other = type(self).__name__, type(other).__name__ raise TypeError( @@ -9459,10 +9928,6 @@ def compare( return diff @final - @doc( - klass=_shared_doc_kwargs["klass"], - axes_single_arg=_shared_doc_kwargs["axes_single_arg"], - ) def align( self, other: NDFrameT, @@ -9518,7 +9983,7 @@ def align( Returns ------- - tuple of ({klass}, type of other) + tuple of (Series/DataFrame, type of other) Aligned objects. See Also @@ -9981,13 +10446,6 @@ def where( ) -> Self | None: ... @final - @doc( - klass=_shared_doc_kwargs["klass"], - cond="True", - cond_rev="False", - name="where", - name_other="mask", - ) def where( self, cond, @@ -9998,22 +10456,22 @@ def where( level: Level | None = None, ) -> Self | None: """ - Replace values where the condition is {cond_rev}. + Replace values where the condition is False. Parameters ---------- - cond : bool {klass}, array-like, or callable - Where `cond` is {cond}, keep the original value. Where - {cond_rev}, replace with corresponding value from `other`. - If `cond` is callable, it is computed on the {klass} and - should return boolean {klass} or array. The callable must - not change input {klass} (though pandas doesn't check it). - other : scalar, {klass}, or callable - Entries where `cond` is {cond_rev} are replaced with + cond : bool Series/DataFrame, array-like, or callable + Where `cond` is True, keep the original value. Where + False, replace with corresponding value from `other`. + If `cond` is callable, it is computed on the Series/DataFrame and + should return boolean Series/DataFrame or array. The callable must + not change input Series/DataFrame (though pandas doesn't check it). + other : scalar, Series/DataFrame, or callable + Entries where `cond` is False are replaced with corresponding value from `other`. - If other is callable, it is computed on the {klass} and - should return scalar or {klass}. The callable must not - change input {klass} (though pandas doesn't check it). + If other is callable, it is computed on the Series/DataFrame and + should return scalar or Series/DataFrame. The callable must not + change input Series/DataFrame (though pandas doesn't check it). If not specified, entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). @@ -10034,25 +10492,25 @@ def where( See Also -------- - :func:`DataFrame.{name_other}` : Return an object of same shape as + :func:`DataFrame.mask` : Return an object of same shape as caller. - :func:`Series.{name_other}` : Return an object of same shape as + :func:`Series.mask` : Return an object of same shape as caller. Notes ----- - The {name} method is an application of the if-then idiom. For each - element in the caller, if ``cond`` is ``{cond}`` the + The where method is an application of the if-then idiom. For each + element in the caller, if ``cond`` is ``True`` the element is used; otherwise the corresponding element from ``other`` is used. If the axis of ``other`` does not align with axis of - ``cond`` {klass}, the values of ``cond`` on misaligned index positions - will be filled with {cond_rev}. + ``cond`` Series/DataFrame, the values of ``cond`` on misaligned index positions + will be filled with False. The signature for :func:`Series.where` or :func:`DataFrame.where` differs from :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to ``np.where(m, df1, df2)``. - For further details and examples see the ``{name}`` documentation in + For further details and examples see the ``where`` documentation in :ref:`indexing `. The dtype of the object takes precedence. The fill value is casted to @@ -10186,14 +10644,6 @@ def mask( ) -> Self | None: ... @final - @doc( - where, - klass=_shared_doc_kwargs["klass"], - cond="False", - cond_rev="True", - name="mask", - name_other="where", - ) def mask( self, cond, @@ -10203,68 +10653,209 @@ def mask( axis: Axis | None = None, level: Level | None = None, ) -> Self | None: - inplace = validate_bool_kwarg(inplace, "inplace") - if inplace: - if not PYPY and not WARNING_CHECK_DISABLED: - if sys.getrefcount(self) <= REF_COUNT: - warnings.warn( - _chained_assignment_method_msg, - ChainedAssignmentError, - stacklevel=2, - ) - - cond = common.apply_if_callable(cond, self) - other = common.apply_if_callable(other, self) - - # see gh-21891 - if not hasattr(cond, "__invert__"): - cond = np.array(cond) - - return self._where( - ~cond, - other=other, - inplace=inplace, - axis=axis, - level=level, - ) - - @doc(klass=_shared_doc_kwargs["klass"]) - def shift( - self, - periods: int | Sequence[int] = 1, - freq=None, - axis: Axis = 0, - fill_value: Hashable = lib.no_default, - suffix: str | None = None, - ) -> Self | DataFrame: """ - Shift index by desired number of periods with an optional time `freq`. - - When `freq` is not passed, shift the index without realigning the data. - If `freq` is passed (in this case, the index must be date or datetime, - or it will raise a `NotImplementedError`), the index will be - increased using the periods and the `freq`. `freq` can be inferred - when specified as "infer" as long as either freq or inferred_freq - attribute is set in the index. + Replace values where the condition is True. Parameters ---------- - periods : int or Sequence - Number of periods to shift. Can be positive or negative. - If an iterable of ints, the data will be shifted once by each int. - This is equivalent to shifting by one value at a time and - concatenating all resulting frames. The resulting columns will have - the shift suffixed to their column names. For multiple periods, - axis must not be 1. - freq : DateOffset, tseries.offsets, timedelta, or str, optional - Offset to use from the tseries module or time rule (e.g. 'EOM'). - If `freq` is specified then the index values are shifted but the - data is not realigned. That is, use `freq` if you would like to - extend the index when shifting and preserve the original data. - If `freq` is specified as "infer" then it will be inferred from - the freq or inferred_freq attributes of the index. If neither of + cond : bool Series/DataFrame, array-like, or callable + Where `cond` is False, keep the original value. Where + True, replace with corresponding value from `other`. + If `cond` is callable, it is computed on the Series/DataFrame and + should return boolean Series/DataFrame or array. The callable must + not change input Series/DataFrame (though pandas doesn't check it). + other : scalar, Series/DataFrame, or callable + Entries where `cond` is True are replaced with + corresponding value from `other`. + If other is callable, it is computed on the Series/DataFrame and + should return scalar or Series/DataFrame. The callable must not + change input Series/DataFrame (though pandas doesn't check it). + If not specified, entries will be filled with the corresponding + NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension + dtypes). + inplace : bool, default False + Whether to perform the operation in place on the data. + axis : int, default None + Alignment axis if needed. For `Series` this parameter is + unused and defaults to 0. + level : int, default None + Alignment level if needed. + + Returns + ------- + Series or DataFrame or None + When applied to a Series, the function will return a Series, + and when applied to a DataFrame, it will return a DataFrame; + if ``inplace=True``, it will return None. + + See Also + -------- + :func:`DataFrame.where` : Return an object of same shape as + caller. + :func:`Series.where` : Return an object of same shape as + caller. + + Notes + ----- + The mask method is an application of the if-then idiom. For each + element in the caller, if ``cond`` is ``False`` the + element is used; otherwise the corresponding element from + ``other`` is used. If the axis of ``other`` does not align with axis of + ``cond`` Series/DataFrame, the values of ``cond`` on misaligned index positions + will be filled with True. + + The signature for :func:`Series.where` or + :func:`DataFrame.where` differs from :func:`numpy.where`. + Roughly ``df1.where(m, df2)`` is equivalent to ``np.where(m, df1, df2)``. + + For further details and examples see the ``mask`` documentation in + :ref:`indexing `. + + The dtype of the object takes precedence. The fill value is casted to + the object's dtype, if this can be done losslessly. + + Examples + -------- + >>> s = pd.Series(range(5)) + >>> s.where(s > 0) + 0 NaN + 1 1.0 + 2 2.0 + 3 3.0 + 4 4.0 + dtype: float64 + >>> s.mask(s > 0) + 0 0.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + >>> s = pd.Series(range(5)) + >>> t = pd.Series([True, False]) + >>> s.where(t, 99) + 0 0 + 1 99 + 2 99 + 3 99 + 4 99 + dtype: int64 + >>> s.mask(t, 99) + 0 99 + 1 1 + 2 99 + 3 99 + 4 99 + dtype: int64 + + >>> s.where(s > 1, 10) + 0 10 + 1 10 + 2 2 + 3 3 + 4 4 + dtype: int64 + >>> s.mask(s > 1, 10) + 0 0 + 1 1 + 2 10 + 3 10 + 4 10 + dtype: int64 + + >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=["A", "B"]) + >>> df + A B + 0 0 1 + 1 2 3 + 2 4 5 + 3 6 7 + 4 8 9 + >>> m = df % 3 == 0 + >>> df.where(m, -df) + A B + 0 0 -1 + 1 -2 3 + 2 -4 -5 + 3 6 -7 + 4 -8 9 + >>> df.where(m, -df) == np.where(m, df, -df) + A B + 0 True True + 1 True True + 2 True True + 3 True True + 4 True True + >>> df.where(m, -df) == df.mask(~m, -df) + A B + 0 True True + 1 True True + 2 True True + 3 True True + 4 True True + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if inplace: + if not PYPY and not WARNING_CHECK_DISABLED: + if sys.getrefcount(self) <= REF_COUNT: + warnings.warn( + _chained_assignment_method_msg, + ChainedAssignmentError, + stacklevel=2, + ) + + cond = common.apply_if_callable(cond, self) + other = common.apply_if_callable(other, self) + + # see gh-21891 + if not hasattr(cond, "__invert__"): + cond = np.array(cond) + + return self._where( + ~cond, + other=other, + inplace=inplace, + axis=axis, + level=level, + ) + + def shift( + self, + periods: int | Sequence[int] = 1, + freq=None, + axis: Axis = 0, + fill_value: Hashable = lib.no_default, + suffix: str | None = None, + ) -> Self | DataFrame: + """ + Shift index by desired number of periods with an optional time `freq`. + + When `freq` is not passed, shift the index without realigning the data. + If `freq` is passed (in this case, the index must be date or datetime, + or it will raise a `NotImplementedError`), the index will be + increased using the periods and the `freq`. `freq` can be inferred + when specified as "infer" as long as either freq or inferred_freq + attribute is set in the index. + + Parameters + ---------- + periods : int or Sequence + Number of periods to shift. Can be positive or negative. + If an iterable of ints, the data will be shifted once by each int. + This is equivalent to shifting by one value at a time and + concatenating all resulting frames. The resulting columns will have + the shift suffixed to their column names. For multiple periods, + axis must not be 1. + freq : DateOffset, tseries.offsets, timedelta, or str, optional + Offset to use from the tseries module or time rule (e.g. 'EOM'). + If `freq` is specified then the index values are shifted but the + data is not realigned. That is, use `freq` if you would like to + extend the index when shifting and preserve the original data. + If `freq` is specified as "infer" then it will be inferred from + the freq or inferred_freq attributes of the index. If neither of those attributes exist, a ValueError is thrown. - axis : {{0 or 'index', 1 or 'columns', None}}, default None + axis : {0 or 'index', 1 or 'columns', None}, default None Shift direction. For `Series` this parameter is unused and defaults to 0. fill_value : object, optional The scalar value to use for newly introduced missing values. @@ -10279,7 +10870,7 @@ def shift( Returns ------- - {klass} + Series/DataFrame Copy of input object, shifted. See Also @@ -10602,7 +11193,6 @@ def truncate( return result @final - @doc(klass=_shared_doc_kwargs["klass"]) def tz_convert( self, tz, @@ -10618,7 +11208,7 @@ def tz_convert( tz : str or tzinfo object or None Target time zone. Passing ``None`` will convert to UTC and remove the timezone information. - axis : {{0 or 'index', 1 or 'columns'}}, default 0 + axis : {0 or 'index', 1 or 'columns'}, default 0 The axis to convert level : int, str, default None If axis is a MultiIndex, convert a specific level. Otherwise @@ -10642,7 +11232,7 @@ def tz_convert( Returns ------- - {klass} + Series/DataFrame Object with time zone converted axis. Raises @@ -10706,7 +11296,6 @@ def _tz_convert(ax, tz): return result.__finalize__(self, method="tz_convert") @final - @doc(klass=_shared_doc_kwargs["klass"]) def tz_localize( self, tz, @@ -10727,7 +11316,7 @@ def tz_localize( tz : str or tzinfo or None Time zone to localize. Passing ``None`` will remove the time zone information and preserve local time. - axis : {{0 or 'index', 1 or 'columns'}}, default 0 + axis : {0 or 'index', 1 or 'columns'}, default 0 The axis to localize level : int, str, default None If axis ia a MultiIndex, localize a specific level. Otherwise @@ -10779,7 +11368,7 @@ def tz_localize( Returns ------- - {klass} + Series/DataFrame Same type as the input, with time zone naive or aware index, depending on ``tz``. @@ -11680,7 +12269,6 @@ def prod( product = prod @final - @doc(Rolling) def rolling( self, window: int | dt.timedelta | str | BaseOffset | BaseIndexer, @@ -11692,6 +12280,260 @@ def rolling( step: int | None = None, method: str = "single", ) -> Window | Rolling: + """ + Provide rolling window calculations. + + Parameters + ---------- + window : int, timedelta, str, offset, or BaseIndexer subclass + Interval of the moving window. + + If an integer, the delta between the start and end of each window. + The number of points in the window depends on the ``closed`` argument. + + If a timedelta, str, or offset, the time period of each window. Each + window will be a variable sized based on the observations included in + the time-period. This is only valid for datetimelike indexes. + To learn more about the offsets & frequency strings, please see + :ref:`this link`. + + If a BaseIndexer subclass, the window boundaries + based on the defined ``get_window_bounds`` method. Additional rolling + keyword arguments, namely ``min_periods``, ``center``, ``closed`` and + ``step`` will be passed to ``get_window_bounds``. + + min_periods : int, default None + Minimum number of observations in window required to have a value; + otherwise, result is ``np.nan``. + + For a window that is specified by an offset, ``min_periods`` will default to 1. + + For a window that is specified by an integer, ``min_periods`` will default + to the size of the window. + + center : bool, default False + If False, set the window labels as the right edge of the window index. + + If True, set the window labels as the center of the window index. + + win_type : str, default None + If ``None``, all points are evenly weighted. + + If a string, it must be a valid `scipy.signal window function + `__. + + Certain Scipy window types require additional parameters to be passed + in the aggregation function. The additional parameters must match + the keywords specified in the Scipy window type method signature. + + on : str, optional + For a DataFrame, a column label or Index level on which + to calculate the rolling window, rather than the DataFrame's index. + + Provided integer column is ignored and excluded from result since + an integer index is not used to calculate the rolling window. + + closed : str, default None + Determines the inclusivity of points in the window + + If ``'right'``, uses the window (first, last] meaning the last point + is included in the calculations. + + If ``'left'``, uses the window [first, last) meaning the first point + is included in the calculations. + + If ``'both'``, uses the window [first, last] meaning all points in + the window are included in the calculations. + + If ``'neither'``, uses the window (first, last) meaning the first + and last points in the window are excluded from calculations. + + () and [] are referencing open and closed set + notation respetively. + + Default ``None`` (``'right'``). + + step : int, default None + Evaluate the window at every ``step`` result, equivalent to slicing as + ``[::step]``. ``window`` must be an integer. Using a step argument other + than None or 1 will produce a result with a different shape than the input. + + .. versionadded:: 1.5.0 + + method : str {'single', 'table'}, default 'single' + + .. versionadded:: 1.3.0 + + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + Returns + ------- + pandas.api.typing.Window or pandas.api.typing.Rolling + An instance of Window is returned if ``win_type`` is passed. Otherwise, + an instance of Rolling is returned. + + See Also + -------- + expanding : Provides expanding transformations. + ewm : Provides exponential weighted functions. + + Notes + ----- + See :ref:`Windowing Operations ` for further usage details + and examples. + + Examples + -------- + >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + **window** + + Rolling sum with a window length of 2 observations. + + >>> df.rolling(2).sum() + B + 0 NaN + 1 1.0 + 2 3.0 + 3 NaN + 4 NaN + + Rolling sum with a window span of 2 seconds. + + >>> df_time = pd.DataFrame( + ... {"B": [0, 1, 2, np.nan, 4]}, + ... index=[ + ... pd.Timestamp("20130101 09:00:00"), + ... pd.Timestamp("20130101 09:00:02"), + ... pd.Timestamp("20130101 09:00:03"), + ... pd.Timestamp("20130101 09:00:05"), + ... pd.Timestamp("20130101 09:00:06"), + ... ], + ... ) + + >>> df_time + B + 2013-01-01 09:00:00 0.0 + 2013-01-01 09:00:02 1.0 + 2013-01-01 09:00:03 2.0 + 2013-01-01 09:00:05 NaN + 2013-01-01 09:00:06 4.0 + + >>> df_time.rolling("2s").sum() + B + 2013-01-01 09:00:00 0.0 + 2013-01-01 09:00:02 1.0 + 2013-01-01 09:00:03 3.0 + 2013-01-01 09:00:05 NaN + 2013-01-01 09:00:06 4.0 + + Rolling sum with forward looking windows with 2 observations. + + >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2) + >>> df.rolling(window=indexer, min_periods=1).sum() + B + 0 1.0 + 1 3.0 + 2 2.0 + 3 4.0 + 4 4.0 + + **min_periods** + + Rolling sum with a window length of 2 observations, but only needs a minimum of 1 + observation to calculate a value. + + >>> df.rolling(2, min_periods=1).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 2.0 + 4 4.0 + + **center** + + Rolling sum with the result assigned to the center of the window index. + + >>> df.rolling(3, min_periods=1, center=True).sum() + B + 0 1.0 + 1 3.0 + 2 3.0 + 3 6.0 + 4 4.0 + + >>> df.rolling(3, min_periods=1, center=False).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 3.0 + 4 6.0 + + **step** + + Rolling sum with a window length of 2 observations, minimum of 1 observation to + calculate a value, and a step of 2. + + >>> df.rolling(2, min_periods=1, step=2).sum() + B + 0 0.0 + 2 3.0 + 4 4.0 + + **win_type** + + Rolling sum with a window length of 2, using the Scipy ``'gaussian'`` + window type. ``std`` is required in the aggregation function. + + >>> df.rolling(2, win_type="gaussian").sum(std=3) + B + 0 NaN + 1 0.986207 + 2 2.958621 + 3 NaN + 4 NaN + + **on** + + Rolling sum with a window length of 2 days. + + >>> df = pd.DataFrame( + ... { + ... "A": [ + ... pd.to_datetime("2020-01-01"), + ... pd.to_datetime("2020-01-01"), + ... pd.to_datetime("2020-01-02"), + ... ], + ... "B": [1, 2, 3], + ... }, + ... index=pd.date_range("2020", periods=3), + ... ) + + >>> df + A B + 2020-01-01 2020-01-01 1 + 2020-01-02 2020-01-01 2 + 2020-01-03 2020-01-02 3 + + >>> df.rolling("2D", on="A").sum() + A B + 2020-01-01 2020-01-01 1.0 + 2020-01-02 2020-01-01 3.0 + 2020-01-03 2020-01-02 6.0 + """ if win_type is not None: return Window( self, @@ -11718,16 +12560,81 @@ def rolling( ) @final - @doc(Expanding) def expanding( self, min_periods: int = 1, method: Literal["single", "table"] = "single", ) -> Expanding: + """ + Provide expanding window calculations. + + An expanding window yields the value of an aggregation statistic with all the data + available up to that point in time. + + Parameters + ---------- + min_periods : int, default 1 + Minimum number of observations in window required to have a value; + otherwise, result is ``np.nan``. + + method : str {'single', 'table'}, default 'single' + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + .. versionadded:: 1.3.0 + + Returns + ------- + pandas.api.typing.Expanding + An instance of Expanding for further expanding window calculations, + e.g. using the ``sum`` method. + + See Also + -------- + rolling : Provides rolling window calculations. + ewm : Provides exponential weighted functions. + + Notes + ----- + See :ref:`Windowing Operations ` for further usage details + and examples. + + Examples + -------- + >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + **min_periods** + + Expanding sum with 1 vs 3 observations needed to calculate a value. + + >>> df.expanding(1).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 3.0 + 4 7.0 + >>> df.expanding(3).sum() + B + 0 NaN + 1 NaN + 2 3.0 + 3 3.0 + 4 7.0 + """ return Expanding(self, min_periods=min_periods, method=method) @final - @doc(ExponentialMovingWindow) def ewm( self, com: float | None = None, @@ -11740,6 +12647,205 @@ def ewm( times: np.ndarray | DataFrame | Series | None = None, method: Literal["single", "table"] = "single", ) -> ExponentialMovingWindow: + r""" + Provide exponentially weighted (EW) calculations. + + Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be + provided if ``times`` is not provided. If ``times`` is provided and ``adjust=True``, + ``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided. + If ``times`` is provided and ``adjust=False``, ``halflife`` must be the only + provided decay-specification parameter. + + Parameters + ---------- + com : float, optional + Specify decay in terms of center of mass + + :math:`\alpha = 1 / (1 + com)`, for :math:`com \geq 0`. + + span : float, optional + Specify decay in terms of span + + :math:`\alpha = 2 / (span + 1)`, for :math:`span \geq 1`. + + halflife : float, str, timedelta, optional + Specify decay in terms of half-life + + :math:`\alpha = 1 - \exp\left(-\ln(2) / halflife\right)`, for + :math:`halflife > 0`. + + If ``times`` is specified, a timedelta convertible unit over which an + observation decays to half its value. Only applicable to ``mean()``, + and halflife value will not apply to the other functions. + + alpha : float, optional + Specify smoothing factor :math:`\alpha` directly + + :math:`0 < \alpha \leq 1`. + + min_periods : int, default 0 + Minimum number of observations in window required to have a value; + otherwise, result is ``np.nan``. + + adjust : bool, default True + Divide by decaying adjustment factor in beginning periods to account + for imbalance in relative weightings (viewing EWMA as a moving average). + + - When ``adjust=True`` (default), the EW function is calculated using weights + :math:`w_i = (1 - \alpha)^i`. For example, the EW moving average of the series + [:math:`x_0, x_1, ..., x_t`] would be: + + .. math:: + y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + (1 - \alpha)^t x_0}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + (1 - \alpha)^t} + + - When ``adjust=False``, the exponentially weighted function is calculated + recursively: + + .. math:: + \begin{split} + y_0 &= x_0\\ + y_t &= (1 - \alpha) y_{t-1} + \alpha x_t, + \end{split} + + which is equivalent to using weights: + + .. math:: + w_i = \begin{cases} + \alpha (1 - \alpha)^i & \text{if } i < t \\ + (1 - \alpha)^i & \text{if } i = t. + \end{cases} + + .. note:: + These equations are sometimes written in terms of :math:`\alpha' = 1 - \alpha`, e.g. + + .. math:: + y_t = \alpha' y_{t-1} + (1 - \alpha') x_t. + + ignore_na : bool, default False + Ignore missing values when calculating weights. + + - When ``ignore_na=False`` (default), weights are based on absolute positions. + For example, the weights of :math:`x_0` and :math:`x_2` used in calculating + the final weighted average of [:math:`x_0`, None, :math:`x_2`] are + :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and + :math:`(1-\alpha)^2` and :math:`\alpha` if ``adjust=False``. + + - When ``ignore_na=True``, weights are based + on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` + used in calculating the final weighted average of + [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if + ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``. + + times : np.ndarray, Series, default None + + Only applicable to ``mean()``. + + Times corresponding to the observations. Must be monotonically increasing and + ``datetime64[ns]`` dtype. + + If 1-D array like, a sequence with the same shape as the observations. + + method : str {'single', 'table'}, default 'single' + .. versionadded:: 1.4.0 + + Execute the rolling operation per single column or row (``'single'``) + or over the entire object (``'table'``). + + This argument is only implemented when specifying ``engine='numba'`` + in the method call. + + Only applicable to ``mean()`` + + Returns + ------- + pandas.api.typing.ExponentialMovingWindow + An instance of ExponentialMovingWindow for further exponentially weighted (EW) + calculations, e.g. using the ``mean`` method. + + See Also + -------- + rolling : Provides rolling window calculations. + expanding : Provides expanding transformations. + + Notes + ----- + See :ref:`Windowing Operations ` + for further usage details and examples. + + Examples + -------- + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + >>> df.ewm(com=0.5).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + >>> df.ewm(alpha=2 / 3).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + + **adjust** + + >>> df.ewm(com=0.5, adjust=True).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + >>> df.ewm(com=0.5, adjust=False).mean() + B + 0 0.000000 + 1 0.666667 + 2 1.555556 + 3 1.555556 + 4 3.650794 + + **ignore_na** + + >>> df.ewm(com=0.5, ignore_na=True).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + >>> df.ewm(com=0.5, ignore_na=False).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + + **times** + + Exponentially weighted mean with weights calculated with a timedelta ``halflife`` + relative to ``times``. + + >>> times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17'] + >>> df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean() + B + 0 0.000000 + 1 0.585786 + 2 1.523889 + 3 1.523889 + 4 3.233686 + """ return ExponentialMovingWindow( self, com=com, @@ -11847,10 +12953,9 @@ def _find_valid_index(self, *, how: str) -> Hashable: return self.index[idxpos] @final - @doc(position="first", klass=_shared_doc_kwargs["klass"]) def first_valid_index(self) -> Hashable: """ - Return index for {position} non-missing value or None, if no value is found. + Return index for first non-missing value or None, if no value is found. See the :ref:`User Guide ` for more information on which values are considered missing. @@ -11858,7 +12963,7 @@ def first_valid_index(self) -> Hashable: Returns ------- type of index - Index of {position} non-missing value. + Index of first non-missing value. See Also -------- @@ -11935,8 +13040,76 @@ def first_valid_index(self) -> Hashable: return self._find_valid_index(how="first") @final - @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"]) def last_valid_index(self) -> Hashable: + """ + Return index for last non-missing value or None, if no value is found. + + See the :ref:`User Guide ` for more information + on which values are considered missing. + + Returns + ------- + type of index + Index of last non-missing value. + + See Also + -------- + DataFrame.first_valid_index : Return index for first non-NA value or None, if + no non-NA value is found. + Series.first_valid_index : Return index for first non-NA value or None, if no + non-NA value is found. + DataFrame.isna : Detect missing values. + + Examples + -------- + For Series: + + >>> s = pd.Series([None, 3, 4]) + >>> s.first_valid_index() + 1 + >>> s.last_valid_index() + 2 + + >>> s = pd.Series([None, None]) + >>> print(s.first_valid_index()) + None + >>> print(s.last_valid_index()) + None + + If all elements in Series are NA/null, returns None. + + >>> s = pd.Series() + >>> print(s.first_valid_index()) + None + >>> print(s.last_valid_index()) + None + + If Series is empty, returns None. + + For DataFrame: + + >>> df = pd.DataFrame({"A": [None, None, 2], "B": [None, 3, 4]}) + >>> df + A B + 0 NaN NaN + 1 NaN 3.0 + 2 2.0 4.0 + >>> df.first_valid_index() + 1 + >>> df.last_valid_index() + 2 + + >>> df = pd.DataFrame({"A": [None, None, None], "B": [None, None, None]}) + >>> df + A B + 0 NaN NaN + 1 NaN NaN + 2 NaN NaN + >>> print(df.first_valid_index()) + None + >>> print(df.last_valid_index()) + None + """ return self._find_valid_index(how="last")