Skip to content

Commit

Permalink
DOC: Enforce Numpy Docstring Validation for pandas.DataFrame.sem (#58513
Browse files Browse the repository at this point in the history
)

* DOC: add PR01,RT03,SA01 in pandas.DataFrame.sem

* DOC: remove PR01,RT03,SA01 in pandas.DataFrame.sem
  • Loading branch information
tuhinsharma121 committed May 1, 2024
1 parent 564d0d9 commit 9250bf7
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 2 deletions.
1 change: 0 additions & 1 deletion ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.DataFrame.median RT03,SA01" \
-i "pandas.DataFrame.min RT03" \
-i "pandas.DataFrame.plot PR02,SA01" \
-i "pandas.DataFrame.sem PR01,RT03,SA01" \
-i "pandas.DataFrame.std PR01,RT03,SA01" \
-i "pandas.DataFrame.sum RT03" \
-i "pandas.DataFrame.swaplevel SA01" \
Expand Down
71 changes: 70 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11945,7 +11945,6 @@ def sem(
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem")
@doc(make_doc("sem", ndim=2))
def sem(
self,
axis: Axis | None = 0,
Expand All @@ -11954,6 +11953,76 @@ def sem(
numeric_only: bool = False,
**kwargs,
) -> Series | Any:
"""
Return unbiased standard error of the mean over requested axis.
Normalized by N-1 by default. This can be changed using the ddof argument
Parameters
----------
axis : {index (0), columns (1)}
For `Series` this parameter is unused and defaults to 0.
.. warning::
The behavior of DataFrame.sem with ``axis=None`` is deprecated,
in a future version this will reduce over both axes and return a scalar
To retain the old behavior, pass axis=0 (or do not pass axis).
skipna : bool, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA.
ddof : int, default 1
Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
where N represents the number of elements.
numeric_only : bool, default False
Include only float, int, boolean columns. Not implemented for Series.
**kwargs :
Additional keywords passed.
Returns
-------
Series or DataFrame (if level specified)
Unbiased standard error of the mean over requested axis.
See Also
--------
DataFrame.var : Return unbiased variance over requested axis.
DataFrame.std : Returns sample standard deviation over requested axis.
Examples
--------
>>> s = pd.Series([1, 2, 3])
>>> s.sem().round(6)
0.57735
With a DataFrame
>>> df = pd.DataFrame({"a": [1, 2], "b": [2, 3]}, index=["tiger", "zebra"])
>>> df
a b
tiger 1 2
zebra 2 3
>>> df.sem()
a 0.5
b 0.5
dtype: float64
Using axis=1
>>> df.sem(axis=1)
tiger 0.5
zebra 0.5
dtype: float64
In this case, `numeric_only` should be set to `True`
to avoid getting an error.
>>> df = pd.DataFrame({"a": [1, 2], "b": ["T", "Z"]}, index=["tiger", "zebra"])
>>> df.sem(numeric_only=True)
a 0.5
dtype: float64
"""
result = super().sem(
axis=axis, skipna=skipna, ddof=ddof, numeric_only=numeric_only, **kwargs
)
Expand Down

0 comments on commit 9250bf7

Please sign in to comment.