DOC: Enforce Numpy Docstring Validation for pandas.DataFrame.sem (#58513

) * DOC: add PR01,RT03,SA01 in pandas.DataFrame.sem * DOC: remove PR01,RT03,SA01 in pandas.DataFrame.sem
pandas-dev · May 1, 2024 · 9250bf7 · 9250bf7
1 parent 564d0d9
commit 9250bf7
Show file tree

Hide file tree

Showing 2 changed files with 70 additions and 2 deletions.
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -75,7 +75,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.median RT03,SA01" \
         -i "pandas.DataFrame.min RT03" \
         -i "pandas.DataFrame.plot PR02,SA01" \
-        -i "pandas.DataFrame.sem PR01,RT03,SA01" \
         -i "pandas.DataFrame.std PR01,RT03,SA01" \
         -i "pandas.DataFrame.sum RT03" \
         -i "pandas.DataFrame.swaplevel SA01" \

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -11945,7 +11945,6 @@ def sem(
     ) -> Series | Any: ...
 
     @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem")
-    @doc(make_doc("sem", ndim=2))
     def sem(
         self,
         axis: Axis | None = 0,
@@ -11954,6 +11953,76 @@ def sem(
         numeric_only: bool = False,
         **kwargs,
     ) -> Series | Any:
+        """
+        Return unbiased standard error of the mean over requested axis.
+
+        Normalized by N-1 by default. This can be changed using the ddof argument
+
+        Parameters
+        ----------
+        axis : {index (0), columns (1)}
+            For `Series` this parameter is unused and defaults to 0.
+
+            .. warning::
+
+                The behavior of DataFrame.sem with ``axis=None`` is deprecated,
+                in a future version this will reduce over both axes and return a scalar
+                To retain the old behavior, pass axis=0 (or do not pass axis).
+
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+        ddof : int, default 1
+            Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
+            where N represents the number of elements.
+        numeric_only : bool, default False
+            Include only float, int, boolean columns. Not implemented for Series.
+        **kwargs :
+            Additional keywords passed.
+
+        Returns
+        -------
+        Series or DataFrame (if level specified)
+            Unbiased standard error of the mean over requested axis.
+
+        See Also
+        --------
+        DataFrame.var : Return unbiased variance over requested axis.
+        DataFrame.std : Returns sample standard deviation over requested axis.
+
+        Examples
+        --------
+        >>> s = pd.Series([1, 2, 3])
+        >>> s.sem().round(6)
+        0.57735
+
+        With a DataFrame
+
+        >>> df = pd.DataFrame({"a": [1, 2], "b": [2, 3]}, index=["tiger", "zebra"])
+        >>> df
+               a   b
+        tiger  1   2
+        zebra  2   3
+        >>> df.sem()
+        a   0.5
+        b   0.5
+        dtype: float64
+
+        Using axis=1
+
+        >>> df.sem(axis=1)
+        tiger   0.5
+        zebra   0.5
+        dtype: float64
+
+        In this case, `numeric_only` should be set to `True`
+        to avoid getting an error.
+
+        >>> df = pd.DataFrame({"a": [1, 2], "b": ["T", "Z"]}, index=["tiger", "zebra"])
+        >>> df.sem(numeric_only=True)
+        a   0.5
+        dtype: float64
+        """
         result = super().sem(
             axis=axis, skipna=skipna, ddof=ddof, numeric_only=numeric_only, **kwargs
         )