Skip to content

Commit

Permalink
DOC: EX01 ({Categorical, Interval, Multi, Datetime, Timedelta}-Index) (
Browse files Browse the repository at this point in the history
…#53925)

* add examples for CategoricalIndex.equals

* add examples for MultiIndex.dtypes

* add examples for MultiIndex.drop

* add examples for DatetimeIndex.to_pydatetime/std

* add examples for TimedeltaIndex

* minor modifications

* fix docstrings and make up for removed ones

* fix doctests
  • Loading branch information
Charlie-XIAO committed Jun 30, 2023
1 parent 6eb59b3 commit 2574694
Show file tree
Hide file tree
Showing 10 changed files with 309 additions and 75 deletions.
18 changes: 0 additions & 18 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -138,25 +138,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
pandas.util.hash_pandas_object \
pandas_object \
pandas.api.interchange.from_dataframe \
pandas.CategoricalIndex.codes \
pandas.CategoricalIndex.categories \
pandas.CategoricalIndex.ordered \
pandas.CategoricalIndex.reorder_categories \
pandas.CategoricalIndex.set_categories \
pandas.CategoricalIndex.as_ordered \
pandas.CategoricalIndex.as_unordered \
pandas.CategoricalIndex.equals \
pandas.IntervalIndex.values \
pandas.IntervalIndex.to_tuples \
pandas.MultiIndex.dtypes \
pandas.MultiIndex.drop \
pandas.DatetimeIndex.snap \
pandas.DatetimeIndex.as_unit \
pandas.DatetimeIndex.to_pydatetime \
pandas.DatetimeIndex.to_series \
pandas.DatetimeIndex.mean \
pandas.DatetimeIndex.std \
pandas.TimedeltaIndex \
pandas.core.window.rolling.Rolling.max \
pandas.core.window.rolling.Rolling.cov \
pandas.core.window.rolling.Rolling.skew \
Expand Down
141 changes: 112 additions & 29 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
Attributes
----------
categories : Index
The categories of this categorical
The categories of this categorical.
codes : ndarray
The codes (integer positions, which point to the categories) of this
categorical, read only.
Expand Down Expand Up @@ -760,23 +760,32 @@ def categories(self) -> Index:
Examples
--------
For :class:`pandas.Series`:
For Series:
>>> ser = pd.Series(["a", "b", "c", "a"], dtype="category")
>>> ser = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
>>> ser.cat.categories
Index(['a', 'b', 'c'], dtype='object')
>>> raw_cat = pd.Categorical(["a", "b", "c", "a"], categories=["b", "c", "d"],)
>>> raw_cat = pd.Categorical(['a', 'b', 'c', 'a'], categories=['b', 'c', 'd'])
>>> ser = pd.Series(raw_cat)
>>> ser.cat.categories
Index(['b', 'c', 'd'], dtype='object')
For Categorical:
For :class:`pandas.Categorical`:
>>> cat = pd.Categorical(['a', 'b'], ordered=True)
>>> cat.categories
Index(['a', 'b'], dtype='object')
For :class:`pandas.CategoricalIndex`:
>>> ci = pd.CategoricalIndex(['a', 'c', 'b', 'a', 'c', 'b'])
>>> ci.categories
Index(['a', 'b', 'c'], dtype='object')
>>> ci = pd.CategoricalIndex(['a', 'c'], categories=['c', 'b', 'a'])
>>> ci.categories
Index(['c', 'b', 'a'], dtype='object')
"""
return self.dtype.categories

Expand All @@ -787,19 +796,18 @@ def ordered(self) -> Ordered:
Examples
--------
For :class:`pandas.Series`:
For Series:
>>> ser = pd.Series(["a", "b", "c", "a"], dtype="category")
>>> ser = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
>>> ser.cat.ordered
False
>>> raw_cat = pd.Categorical(["a", "b", "c", "a"], ordered=True)
>>> raw_cat = pd.Categorical(['a', 'b', 'c', 'a'], ordered=True)
>>> ser = pd.Series(raw_cat)
>>> ser.cat.ordered
True
For Categorical:
For :class:`pandas.Categorical`:
>>> cat = pd.Categorical(['a', 'b'], ordered=True)
>>> cat.ordered
Expand All @@ -808,13 +816,23 @@ def ordered(self) -> Ordered:
>>> cat = pd.Categorical(['a', 'b'], ordered=False)
>>> cat.ordered
False
For :class:`pandas.CategoricalIndex`:
>>> ci = pd.CategoricalIndex(['a', 'b'], ordered=True)
>>> ci.ordered
True
>>> ci = pd.CategoricalIndex(['a', 'b'], ordered=False)
>>> ci.ordered
False
"""
return self.dtype.ordered

@property
def codes(self) -> np.ndarray:
"""
The category codes of this categorical.
The category codes of this categorical index.
Codes are an array of integers which are the positions of the actual
values in the categories array.
Expand All @@ -825,13 +843,25 @@ def codes(self) -> np.ndarray:
Returns
-------
ndarray[int]
A non-writable view of the `codes` array.
A non-writable view of the ``codes`` array.
Examples
--------
For :class:`pandas.Categorical`:
>>> cat = pd.Categorical(['a', 'b'], ordered=True)
>>> cat.codes
array([0, 1], dtype=int8)
For :class:`pandas.CategoricalIndex`:
>>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'])
>>> ci.codes
array([0, 1, 2, 0, 1, 2], dtype=int8)
>>> ci = pd.CategoricalIndex(['a', 'c'], categories=['c', 'b', 'a'])
>>> ci.codes
array([2, 0], dtype=int8)
"""
v = self._codes.view()
v.flags.writeable = False
Expand Down Expand Up @@ -915,12 +945,23 @@ def as_ordered(self) -> Self:
Examples
--------
>>> ser = pd.Series(["a", "b", "c", "a"], dtype="category")
For :class:`pandas.Series`:
>>> ser = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
>>> ser.cat.ordered
False
>>> ser = ser.cat.as_ordered()
>>> ser.cat.ordered
True
For :class:`pandas.CategoricalIndex`:
>>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a'])
>>> ci.ordered
False
>>> ci = ci.as_ordered()
>>> ci.ordered
True
"""
return self.set_ordered(True)

Expand All @@ -935,24 +976,36 @@ def as_unordered(self) -> Self:
Examples
--------
>>> raw_cate = pd.Categorical(["a", "b", "c"],
... categories=["a", "b", "c"], ordered=True)
>>> ser = pd.Series(raw_cate)
For :class:`pandas.Series`:
>>> raw_cat = pd.Categorical(['a', 'b', 'c', 'a'], ordered=True)
>>> ser = pd.Series(raw_cat)
>>> ser.cat.ordered
True
>>> ser = ser.cat.as_unordered()
>>> ser.cat.ordered
False
For :class:`pandas.CategoricalIndex`:
>>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a'], ordered=True)
>>> ci.ordered
True
>>> ci = ci.as_unordered()
>>> ci.ordered
False
"""
return self.set_ordered(False)

def set_categories(self, new_categories, ordered=None, rename: bool = False):
"""
Set the categories to the specified new_categories.
Set the categories to the specified new categories.
`new_categories` can include new categories (which will result in
``new_categories`` can include new categories (which will result in
unused categories) or remove old categories (which results in values
set to NaN). If `rename==True`, the categories will simple be renamed
set to ``NaN``). If ``rename=True``, the categories will simply be renamed
(less or more items than in old categories will result in values set to
NaN or in unused categories respectively).
``NaN`` or in unused categories respectively).
This method can be used to perform more than one action of adding,
removing, and reordering simultaneously and is therefore faster than
Expand Down Expand Up @@ -994,23 +1047,41 @@ def set_categories(self, new_categories, ordered=None, rename: bool = False):
Examples
--------
>>> raw_cate = pd.Categorical(["a", "b", "c", "A"],
... categories=["a", "b", "c"], ordered=True)
>>> ser = pd.Series(raw_cate)
For :class:`pandas.Series`:
>>> raw_cat = pd.Categorical(['a', 'b', 'c', 'A'],
... categories=['a', 'b', 'c'], ordered=True)
>>> ser = pd.Series(raw_cat)
>>> ser
0 a
1 b
2 c
3 NaN
dtype: category
Categories (3, object): ['a' < 'b' < 'c']
>>> ser.cat.set_categories(["A", "B", "C"], rename=True)
>>> ser.cat.set_categories(['A', 'B', 'C'], rename=True)
0 A
1 B
2 C
3 NaN
dtype: category
Categories (3, object): ['A' < 'B' < 'C']
For :class:`pandas.CategoricalIndex`:
>>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'A'],
... categories=['a', 'b', 'c'], ordered=True)
>>> ci
CategoricalIndex(['a', 'b', 'c', nan], categories=['a', 'b', 'c'],
ordered=True, dtype='category')
>>> ci.set_categories(['A', 'b', 'c'])
CategoricalIndex([nan, 'b', 'c', nan], categories=['A', 'b', 'c'],
ordered=True, dtype='category')
>>> ci.set_categories(['A', 'b', 'c'], rename=True)
CategoricalIndex(['A', 'b', 'c', nan], categories=['A', 'b', 'c'],
ordered=True, dtype='category')
"""

if ordered is None:
Expand Down Expand Up @@ -1108,7 +1179,7 @@ def reorder_categories(self, new_categories, ordered=None) -> Self:
"""
Reorder categories as specified in new_categories.
`new_categories` need to include all old categories and no new category
``new_categories`` need to include all old categories and no new category
items.
Parameters
Expand Down Expand Up @@ -1140,7 +1211,9 @@ def reorder_categories(self, new_categories, ordered=None) -> Self:
Examples
--------
>>> ser = pd.Series(["a", "b", "c", "a"], dtype="category")
For :class:`pandas.Series`:
>>> ser = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
>>> ser = ser.cat.reorder_categories(['c', 'b', 'a'], ordered=True)
>>> ser
0 a
Expand All @@ -1149,14 +1222,24 @@ def reorder_categories(self, new_categories, ordered=None) -> Self:
3 a
dtype: category
Categories (3, object): ['c' < 'b' < 'a']
>>> ser = ser.sort_values()
>>> ser
>>> ser.sort_values()
2 c
1 b
0 a
3 a
dtype: category
Categories (3, object): ['c' < 'b' < 'a']
For :class:`pandas.CategoricalIndex`:
>>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a'])
>>> ci
CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c'],
ordered=False, dtype='category')
>>> ci.reorder_categories(['c', 'b', 'a'], ordered=True)
CategoricalIndex(['a', 'b', 'c', 'a'], categories=['c', 'b', 'a'],
ordered=True, dtype='category')
"""
if (
len(self.categories) != len(new_categories)
Expand Down
11 changes: 11 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1552,6 +1552,17 @@ def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0):
Examples
--------
For :class:`pandas.DatetimeIndex`:
>>> idx = pd.date_range('2001-01-01 00:00', periods=3)
>>> idx
DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'],
dtype='datetime64[ns]', freq='D')
>>> idx.mean()
Timestamp('2001-01-02 00:00:00')
For :class:`pandas.TimedeltaIndex`:
>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='D')
>>> tdelta_idx
TimedeltaIndex(['1 days', '2 days', '3 days'],
Expand Down
43 changes: 34 additions & 9 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,11 +1090,19 @@ def tz_localize(

def to_pydatetime(self) -> npt.NDArray[np.object_]:
"""
Return an ndarray of datetime.datetime objects.
Return an ndarray of ``datetime.datetime`` objects.
Returns
-------
numpy.ndarray
Examples
--------
>>> idx = pd.date_range('2018-02-27', periods=3)
>>> idx.to_pydatetime()
array([datetime.datetime(2018, 2, 27, 0, 0),
datetime.datetime(2018, 2, 28, 0, 0),
datetime.datetime(2018, 3, 1, 0, 0)], dtype=object)
"""
return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._creso)

Expand Down Expand Up @@ -2097,23 +2105,40 @@ def std(
"""
Return sample standard deviation over requested axis.
Normalized by N-1 by default. This can be changed using the ddof argument
Normalized by `N-1` by default. This can be changed using ``ddof``.
Parameters
----------
axis : int optional, default None
Axis for the function to be applied on.
For `Series` this parameter is unused and defaults to `None`.
axis : int, optional
Axis for the function to be applied on. For :class:`pandas.Series`
this parameter is unused and defaults to ``None``.
ddof : int, default 1
Degrees of Freedom. The divisor used in calculations is N - ddof,
where N represents the number of elements.
Degrees of Freedom. The divisor used in calculations is `N - ddof`,
where `N` represents the number of elements.
skipna : bool, default True
Exclude NA/null values. If an entire row/column is NA, the result will be
NA.
Exclude NA/null values. If an entire row/column is ``NA``, the result
will be ``NA``.
Returns
-------
Timedelta
See Also
--------
numpy.ndarray.std : Returns the standard deviation of the array elements
along given axis.
Series.std : Return sample standard deviation over requested axis.
Examples
--------
For :class:`pandas.DatetimeIndex`:
>>> idx = pd.date_range('2001-01-01 00:00', periods=3)
>>> idx
DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'],
dtype='datetime64[ns]', freq='D')
>>> idx.std()
Timedelta('1 days 00:00:00')
"""
# Because std is translation-invariant, we can get self.std
# by calculating (self - Timestamp(0)).std, and we can do it
Expand Down

0 comments on commit 2574694

Please sign in to comment.