From eace576d68c64960a83caedbf4019870837572ae Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 16 Feb 2019 11:27:49 -0600 Subject: [PATCH] DOC/CLN: Fix various docstring errors (#25295) --- pandas/core/arrays/categorical.py | 72 ++++++++++---------- pandas/core/arrays/datetimes.py | 64 +++++++++--------- pandas/core/base.py | 22 ++++-- pandas/core/frame.py | 4 +- pandas/core/generic.py | 63 ++++++++--------- pandas/core/reshape/concat.py | 38 +++++------ pandas/core/reshape/pivot.py | 56 ++++++++-------- pandas/core/reshape/reshape.py | 14 ++-- pandas/core/reshape/tile.py | 2 +- pandas/core/strings.py | 108 ++++++++++++++++-------------- 10 files changed, 232 insertions(+), 211 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d7d0882bbcc94..c2b024c9ae12e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -214,7 +214,7 @@ def contains(cat, key, container): class Categorical(ExtensionArray, PandasObject): """ - Represent a categorical variable in classic R / S-plus fashion + Represent a categorical variable in classic R / S-plus fashion. `Categoricals` can only take on only a limited, and usually fixed, number of possible values (`categories`). In contrast to statistical categorical @@ -235,7 +235,7 @@ class Categorical(ExtensionArray, PandasObject): The unique categories for this categorical. If not given, the categories are assumed to be the unique values of `values` (sorted, if possible, otherwise in the order in which they appear). - ordered : boolean, (default False) + ordered : bool, default False Whether or not this categorical is treated as a ordered categorical. If True, the resulting categorical will be ordered. An ordered categorical respects, when sorted, the order of its @@ -253,7 +253,7 @@ class Categorical(ExtensionArray, PandasObject): codes : ndarray The codes (integer positions, which point to the categories) of this categorical, read only. - ordered : boolean + ordered : bool Whether or not this Categorical is ordered. dtype : CategoricalDtype The instance of ``CategoricalDtype`` storing the ``categories`` @@ -297,7 +297,7 @@ class Categorical(ExtensionArray, PandasObject): Ordered `Categoricals` can be sorted according to the custom order of the categories and can have a min and max value. - >>> c = pd.Categorical(['a','b','c','a','b','c'], ordered=True, + >>> c = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'], ordered=True, ... categories=['c', 'b', 'a']) >>> c [a, b, c, a, b, c] @@ -618,7 +618,7 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): ---------- codes : array-like, integers An integer array, where each integer points to a category in - categories or dtype.categories, or else is -1 for NaN + categories or dtype.categories, or else is -1 for NaN. categories : index-like, optional The categories for the categorical. Items need to be unique. If the categories are not given here, then they must be provided @@ -700,7 +700,7 @@ def _set_categories(self, categories, fastpath=False): Parameters ---------- - fastpath : boolean (default: False) + fastpath : bool, default False Don't perform validation of the categories for uniqueness or nulls Examples @@ -747,15 +747,15 @@ def _set_dtype(self, dtype): def set_ordered(self, value, inplace=False): """ - Set the ordered attribute to the boolean value + Set the ordered attribute to the boolean value. Parameters ---------- - value : boolean to set whether this categorical is ordered (True) or - not (False) - inplace : boolean (default: False) - Whether or not to set the ordered attribute inplace or return a copy - of this categorical with ordered set to the value + value : bool + Set whether this categorical is ordered (True) or not (False). + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to the value. """ inplace = validate_bool_kwarg(inplace, 'inplace') new_dtype = CategoricalDtype(self.categories, ordered=value) @@ -770,9 +770,9 @@ def as_ordered(self, inplace=False): Parameters ---------- - inplace : boolean (default: False) - Whether or not to set the ordered attribute inplace or return a copy - of this categorical with ordered set to True + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to True. """ inplace = validate_bool_kwarg(inplace, 'inplace') return self.set_ordered(True, inplace=inplace) @@ -783,9 +783,9 @@ def as_unordered(self, inplace=False): Parameters ---------- - inplace : boolean (default: False) - Whether or not to set the ordered attribute inplace or return a copy - of this categorical with ordered set to False + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to False. """ inplace = validate_bool_kwarg(inplace, 'inplace') return self.set_ordered(False, inplace=inplace) @@ -815,19 +815,19 @@ def set_categories(self, new_categories, ordered=None, rename=False, ---------- new_categories : Index-like The categories in new order. - ordered : boolean, (default: False) + ordered : bool, default False Whether or not the categorical is treated as a ordered categorical. If not given, do not change the ordered information. - rename : boolean (default: False) + rename : bool, default False Whether or not the new_categories should be considered as a rename of the old categories or as reordered categories. - inplace : boolean (default: False) - Whether or not to reorder the categories inplace or return a copy of - this categorical with reordered categories. + inplace : bool, default False + Whether or not to reorder the categories in-place or return a copy + of this categorical with reordered categories. Returns ------- - cat : Categorical with reordered categories or None if inplace. + Categorical with reordered categories or None if inplace. Raises ------ @@ -890,7 +890,7 @@ def rename_categories(self, new_categories, inplace=False): Currently, Series are considered list like. In a future version of pandas they'll be considered dict-like. - inplace : boolean (default: False) + inplace : bool, default False Whether or not to rename the categories inplace or return a copy of this categorical with renamed categories. @@ -967,10 +967,10 @@ def reorder_categories(self, new_categories, ordered=None, inplace=False): ---------- new_categories : Index-like The categories in new order. - ordered : boolean, optional + ordered : bool, optional Whether or not the categorical is treated as a ordered categorical. If not given, do not change the ordered information. - inplace : boolean (default: False) + inplace : bool, default False Whether or not to reorder the categories inplace or return a copy of this categorical with reordered categories. @@ -1010,7 +1010,7 @@ def add_categories(self, new_categories, inplace=False): ---------- new_categories : category or list-like of category The new categories to be included. - inplace : boolean (default: False) + inplace : bool, default False Whether or not to add the categories inplace or return a copy of this categorical with added categories. @@ -1060,7 +1060,7 @@ def remove_categories(self, removals, inplace=False): ---------- removals : category or list of categories The categories which should be removed. - inplace : boolean (default: False) + inplace : bool, default False Whether or not to remove the categories inplace or return a copy of this categorical with removed categories. @@ -1108,7 +1108,7 @@ def remove_unused_categories(self, inplace=False): Parameters ---------- - inplace : boolean (default: False) + inplace : bool, default False Whether or not to drop unused categories inplace or return a copy of this categorical with unused categories dropped. @@ -1460,7 +1460,7 @@ def value_counts(self, dropna=True): Parameters ---------- - dropna : boolean, default True + dropna : bool, default True Don't include counts of NaN. Returns @@ -1581,9 +1581,9 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'): Parameters ---------- - inplace : boolean, default False + inplace : bool, default False Do operation in place. - ascending : boolean, default True + ascending : bool, default True Order ascending. Passing False orders descending. The ordering parameter provides the method by which the category values are organized. @@ -2239,7 +2239,7 @@ def mode(self, dropna=True): Parameters ---------- - dropna : boolean, default True + dropna : bool, default True Don't consider counts of NaN/NaT. .. versionadded:: 0.24.0 @@ -2332,7 +2332,7 @@ def equals(self, other): Returns ------- - are_equal : boolean + bool """ if self.is_dtype_equal(other): if self.categories.equals(other.categories): @@ -2356,7 +2356,7 @@ def is_dtype_equal(self, other): Returns ------- - are_equal : boolean + bool """ try: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1b2a4da389dc4..cd8e8ed520ddc 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -799,14 +799,14 @@ def tz_convert(self, tz): Parameters ---------- - tz : string, pytz.timezone, dateutil.tz.tzfile or None + tz : str, pytz.timezone, dateutil.tz.tzfile or None Time zone for time. Corresponding timestamps would be converted to this time zone of the Datetime Array/Index. A `tz` of None will convert to UTC and remove the timezone information. Returns ------- - normalized : same type as self + Array or Index Raises ------ @@ -842,7 +842,7 @@ def tz_convert(self, tz): With the ``tz=None``, we can remove the timezone (after converting to UTC if necessary): - >>> dti = pd.date_range(start='2014-08-01 09:00',freq='H', + >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H', ... periods=3, tz='Europe/Berlin') >>> dti @@ -882,7 +882,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', Parameters ---------- - tz : string, pytz.timezone, dateutil.tz.tzfile or None + tz : str, pytz.timezone, dateutil.tz.tzfile or None Time zone to convert timestamps to. Passing ``None`` will remove the time zone information preserving local time. ambiguous : 'infer', 'NaT', bool array, default 'raise' @@ -930,7 +930,7 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', Returns ------- - result : same type as self + Same type as self Array/Index converted to the specified time zone. Raises @@ -970,43 +970,39 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', Be careful with DST changes. When there is sequential data, pandas can infer the DST time: - >>> s = pd.to_datetime(pd.Series([ - ... '2018-10-28 01:30:00', - ... '2018-10-28 02:00:00', - ... '2018-10-28 02:30:00', - ... '2018-10-28 02:00:00', - ... '2018-10-28 02:30:00', - ... '2018-10-28 03:00:00', - ... '2018-10-28 03:30:00'])) + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) >>> s.dt.tz_localize('CET', ambiguous='infer') - 2018-10-28 01:30:00+02:00 0 - 2018-10-28 02:00:00+02:00 1 - 2018-10-28 02:30:00+02:00 2 - 2018-10-28 02:00:00+01:00 3 - 2018-10-28 02:30:00+01:00 4 - 2018-10-28 03:00:00+01:00 5 - 2018-10-28 03:30:00+01:00 6 - dtype: int64 + 0 2018-10-28 01:30:00+02:00 + 1 2018-10-28 02:00:00+02:00 + 2 2018-10-28 02:30:00+02:00 + 3 2018-10-28 02:00:00+01:00 + 4 2018-10-28 02:30:00+01:00 + 5 2018-10-28 03:00:00+01:00 + 6 2018-10-28 03:30:00+01:00 + dtype: datetime64[ns, CET] In some cases, inferring the DST is impossible. In such cases, you can pass an ndarray to the ambiguous parameter to set the DST explicitly - >>> s = pd.to_datetime(pd.Series([ - ... '2018-10-28 01:20:00', - ... '2018-10-28 02:36:00', - ... '2018-10-28 03:46:00'])) + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) - 0 2018-10-28 01:20:00+02:00 - 1 2018-10-28 02:36:00+02:00 - 2 2018-10-28 03:46:00+01:00 - dtype: datetime64[ns, CET] + 0 2015-03-29 03:00:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] If the DST transition causes nonexistent times, you can shift these dates forward or backwards with a timedelta object or `'shift_forward'` or `'shift_backwards'`. - >>> s = pd.to_datetime(pd.Series([ - ... '2015-03-29 02:30:00', - ... '2015-03-29 03:30:00'])) + >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'])) >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') 0 2015-03-29 03:00:00+02:00 1 2015-03-29 03:30:00+02:00 @@ -1129,7 +1125,7 @@ def to_period(self, freq=None): Parameters ---------- - freq : string or Offset, optional + freq : str or Offset, optional One of pandas' :ref:`offset strings ` or an Offset object. Will be inferred by default. @@ -1150,7 +1146,7 @@ def to_period(self, freq=None): Examples -------- - >>> df = pd.DataFrame({"y": [1,2,3]}, + >>> df = pd.DataFrame({"y": [1, 2, 3]}, ... index=pd.to_datetime(["2000-03-31 00:00:00", ... "2000-05-31 00:00:00", ... "2000-08-31 00:00:00"])) diff --git a/pandas/core/base.py b/pandas/core/base.py index 5a98e83c65884..7fdc64a8d9f85 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -794,7 +794,7 @@ def array(self): Returns ------- - array : ExtensionArray + ExtensionArray An ExtensionArray of the values stored within. For extension types, this is the actual array. For NumPy native types, this is a thin (no copy) wrapper around :class:`numpy.ndarray`. @@ -1022,7 +1022,7 @@ def max(self, axis=None, skipna=True): def argmax(self, axis=None, skipna=True): """ - Return a ndarray of the maximum argument indexer. + Return an ndarray of the maximum argument indexer. Parameters ---------- @@ -1087,6 +1087,10 @@ def argmin(self, axis=None, skipna=True): Dummy argument for consistency with Series skipna : bool, default True + Returns + ------- + numpy.ndarray + See Also -------- numpy.ndarray.argmin @@ -1102,6 +1106,10 @@ def tolist(self): (for str, int, float) or a pandas scalar (for Timestamp/Timedelta/Interval/Period) + Returns + ------- + list + See Also -------- numpy.ndarray.tolist @@ -1162,7 +1170,7 @@ def _map_values(self, mapper, na_action=None): Returns ------- - applied : Union[Index, MultiIndex], inferred + Union[Index, MultiIndex], inferred The output of the mapping function applied to the index. If the function returns a tuple with more than one element a MultiIndex will be returned. @@ -1246,7 +1254,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, Returns ------- - counts : Series + Series See Also -------- @@ -1363,7 +1371,7 @@ def is_unique(self): Returns ------- - is_unique : boolean + bool """ return self.nunique(dropna=False) == len(self) @@ -1377,7 +1385,7 @@ def is_monotonic(self): Returns ------- - is_monotonic : boolean + bool """ from pandas import Index return Index(self).is_monotonic @@ -1394,7 +1402,7 @@ def is_monotonic_decreasing(self): Returns ------- - is_monotonic_decreasing : boolean + bool """ from pandas import Index return Index(self).is_monotonic_decreasing diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cf97c94f6d129..a239ff4b4d5db 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6346,6 +6346,8 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, Returns ------- Series or DataFrame + Result of applying ``func`` along the given axis of the + DataFrame. See Also -------- @@ -6364,7 +6366,7 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, Examples -------- - >>> df = pd.DataFrame([[4, 9],] * 3, columns=['A', 'B']) + >>> df = pd.DataFrame([[4, 9]] * 3, columns=['A', 'B']) >>> df A B 0 4 9 diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b1fcbba7bd7ec..3a73861086bed 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5959,17 +5959,18 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, value : scalar, dict, Series, or DataFrame Value to use to fill holes (e.g. 0), alternately a dict/Series/DataFrame of values specifying which value to use for - each index (for a Series) or column (for a DataFrame). (values not - in the dict/Series/DataFrame will not be filled). This value cannot + each index (for a Series) or column (for a DataFrame). Values not + in the dict/Series/DataFrame will not be filled. This value cannot be a list. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap + backfill / bfill: use next valid observation to fill gap. axis : %(axes_single_arg)s - inplace : boolean, default False - If True, fill in place. Note: this will modify any - other views on this object, (e.g. a no-copy slice for a column in a + Axis along which to fill missing values. + inplace : bool, default False + If True, fill in-place. Note: this will modify any + other views on this object (e.g., a no-copy slice for a column in a DataFrame). limit : int, default None If method is specified, this is the maximum number of consecutive @@ -5979,18 +5980,20 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, maximum number of entries along the entire axis where NaNs will be filled. Must be greater than 0 if not None. downcast : dict, default is None - a dict of item->dtype of what to downcast if possible, + A dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to downcast to an appropriate - equal type (e.g. float64 to int64 if possible) + equal type (e.g. float64 to int64 if possible). Returns ------- - filled : %(klass)s + %(klass)s + Object with missing values filled. See Also -------- interpolate : Fill NaN values using interpolation. - reindex, asfreq + reindex : Conform object to new index. + asfreq : Convert TimeSeries to specified frequency. Examples -------- @@ -5998,7 +6001,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, ... [3, 4, np.nan, 1], ... [np.nan, np.nan, np.nan, 5], ... [np.nan, 3, np.nan, 4]], - ... columns=list('ABCD')) + ... columns=list('ABCD')) >>> df A B C D 0 NaN 2.0 NaN 0 @@ -6752,7 +6755,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, Note how the first entry in column 'b' remains ``NaN``, because there is no entry befofe it to use for interpolation. - >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), + >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), ... (np.nan, 2.0, np.nan, np.nan), ... (2.0, 3.0, np.nan, 9.0), ... (np.nan, 4.0, -4.0, 16.0)], @@ -7221,9 +7224,9 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, upper : float or array_like, default None Maximum threshold value. All values above this threshold will be set to it. - axis : int or string axis name, optional + axis : int or str axis name, optional Align object with lower and upper along the given axis. - inplace : boolean, default False + inplace : bool, default False Whether to perform the operation in place on the data. .. versionadded:: 0.21.0 @@ -7345,7 +7348,7 @@ def clip_upper(self, threshold, axis=None, inplace=False): axis : {0 or 'index', 1 or 'columns'}, default 0 Align object with `threshold` along the given axis. - inplace : boolean, default False + inplace : bool, default False Whether to perform the operation in place on the data. .. versionadded:: 0.21.0 @@ -7426,7 +7429,7 @@ def clip_lower(self, threshold, axis=None, inplace=False): axis : {0 or 'index', 1 or 'columns'}, default 0 Align `self` with `threshold` along the given axis. - inplace : boolean, default False + inplace : bool, default False Whether to perform the operation in place on the data. .. versionadded:: 0.21.0 @@ -7583,9 +7586,9 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, Examples -------- - >>> df = pd.DataFrame({'Animal' : ['Falcon', 'Falcon', - ... 'Parrot', 'Parrot'], - ... 'Max Speed' : [380., 370., 24., 26.]}) + >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon', + ... 'Parrot', 'Parrot'], + ... 'Max Speed': [380., 370., 24., 26.]}) >>> df Animal Max Speed 0 Falcon 380.0 @@ -7606,8 +7609,8 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], ... ['Captive', 'Wild', 'Captive', 'Wild']] >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) - >>> df = pd.DataFrame({'Max Speed' : [390., 350., 30., 20.]}, - ... index=index) + >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]}, + ... index=index) >>> df Max Speed Animal Type @@ -7740,14 +7743,14 @@ def at_time(self, time, asof=False, axis=None): Parameters ---------- - time : datetime.time or string + time : datetime.time or str axis : {0 or 'index', 1 or 'columns'}, default 0 .. versionadded:: 0.24.0 Returns ------- - values_at_time : same type as caller + Series or DataFrame Raises ------ @@ -7765,7 +7768,7 @@ def at_time(self, time, asof=False, axis=None): Examples -------- >>> i = pd.date_range('2018-04-09', periods=4, freq='12H') - >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) >>> ts A 2018-04-09 00:00:00 1 @@ -7800,17 +7803,17 @@ def between_time(self, start_time, end_time, include_start=True, Parameters ---------- - start_time : datetime.time or string - end_time : datetime.time or string - include_start : boolean, default True - include_end : boolean, default True + start_time : datetime.time or str + end_time : datetime.time or str + include_start : bool, default True + include_end : bool, default True axis : {0 or 'index', 1 or 'columns'}, default 0 .. versionadded:: 0.24.0 Returns ------- - values_between_time : same type as caller + Series or DataFrame Raises ------ @@ -7828,7 +7831,7 @@ def between_time(self, start_time, end_time, include_start=True, Examples -------- >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min') - >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) >>> ts A 2018-04-09 00:00:00 1 diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 53671e00e88b4..a6c945ac2e464 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -38,15 +38,15 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, If a dict is passed, the sorted keys will be used as the `keys` argument, unless it is passed, in which case the values will be selected (see below). Any None objects will be dropped silently unless - they are all None in which case a ValueError will be raised + they are all None in which case a ValueError will be raised. axis : {0/'index', 1/'columns'}, default 0 - The axis to concatenate along + The axis to concatenate along. join : {'inner', 'outer'}, default 'outer' - How to handle indexes on other axis(es) + How to handle indexes on other axis (or axes). join_axes : list of Index objects Specific indexes to use for the other n - 1 axes instead of performing - inner/outer set logic - ignore_index : boolean, default False + inner/outer set logic. + ignore_index : bool, default False If True, do not use the index values along the concatenation axis. The resulting axis will be labeled 0, ..., n - 1. This is useful if you are concatenating objects where the concatenation axis does not have @@ -54,16 +54,16 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, axes are still respected in the join. keys : sequence, default None If multiple levels passed, should contain tuples. Construct - hierarchical index using the passed keys as the outermost level + hierarchical index using the passed keys as the outermost level. levels : list of sequences, default None Specific levels (unique values) to use for constructing a - MultiIndex. Otherwise they will be inferred from the keys + MultiIndex. Otherwise they will be inferred from the keys. names : list, default None - Names for the levels in the resulting hierarchical index - verify_integrity : boolean, default False + Names for the levels in the resulting hierarchical index. + verify_integrity : bool, default False Check whether the new concatenated axis contains duplicates. This can - be very expensive relative to the actual data concatenation - sort : boolean, default None + be very expensive relative to the actual data concatenation. + sort : bool, default None Sort non-concatenation axis if it is not already aligned when `join` is 'outer'. The current default of sorting is deprecated and will change to not-sorting in a future version of pandas. @@ -76,12 +76,12 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, .. versionadded:: 0.23.0 - copy : boolean, default True - If False, do not copy data unnecessarily + copy : bool, default True + If False, do not copy data unnecessarily. Returns ------- - concatenated : object, type of objs + object, type of objs When concatenating all ``Series`` along the index (axis=0), a ``Series`` is returned. When ``objs`` contains at least one ``DataFrame``, a ``DataFrame`` is returned. When concatenating along @@ -89,10 +89,10 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, See Also -------- - Series.append - DataFrame.append - DataFrame.join - DataFrame.merge + Series.append : Concatenate Series. + DataFrame.append : Concatenate DataFrames. + DataFrame.join : Join DataFrames using indexes. + DataFrame.merge : Merge DataFrames by indexes or columns. Notes ----- @@ -128,7 +128,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, Add a hierarchical index at the outermost level of the data with the ``keys`` option. - >>> pd.concat([s1, s2], keys=['s1', 's2',]) + >>> pd.concat([s1, s2], keys=['s1', 's2']) s1 0 a 1 b s2 0 c diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 54f11646fc753..8d7616c4b6b61 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -392,36 +392,36 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, aggfunc=None, margins=False, margins_name='All', dropna=True, normalize=False): """ - Compute a simple cross-tabulation of two (or more) factors. By default + Compute a simple cross tabulation of two (or more) factors. By default computes a frequency table of the factors unless an array of values and an - aggregation function are passed + aggregation function are passed. Parameters ---------- index : array-like, Series, or list of arrays/Series - Values to group by in the rows + Values to group by in the rows. columns : array-like, Series, or list of arrays/Series - Values to group by in the columns + Values to group by in the columns. values : array-like, optional Array of values to aggregate according to the factors. Requires `aggfunc` be specified. rownames : sequence, default None - If passed, must match number of row arrays passed + If passed, must match number of row arrays passed. colnames : sequence, default None - If passed, must match number of column arrays passed + If passed, must match number of column arrays passed. aggfunc : function, optional - If specified, requires `values` be specified as well - margins : boolean, default False - Add row/column margins (subtotals) - margins_name : string, default 'All' - Name of the row / column that will contain the totals + If specified, requires `values` be specified as well. + margins : bool, default False + Add row/column margins (subtotals). + margins_name : str, default 'All' + Name of the row/column that will contain the totals when margins is True. .. versionadded:: 0.21.0 - dropna : boolean, default True - Do not include columns whose entries are all NaN - normalize : boolean, {'all', 'index', 'columns'}, or {0,1}, default False + dropna : bool, default True + Do not include columns whose entries are all NaN. + normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False Normalize by dividing all values by the sum of values. - If passed 'all' or `True`, will normalize over all values. @@ -433,7 +433,13 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, Returns ------- - crosstab : DataFrame + DataFrame + Cross tabulation of the data. + + See Also + -------- + DataFrame.pivot : Reshape data based on column values. + pivot_table : Create a pivot table as a DataFrame. Notes ----- @@ -455,32 +461,26 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, ... "one", "two", "two", "two", "one"], dtype=object) >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny", ... "shiny", "dull", "shiny", "shiny", "shiny"], - ... dtype=object) - + ... dtype=object) >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) - ... # doctest: +NORMALIZE_WHITESPACE b one two c dull shiny dull shiny a bar 1 2 1 0 foo 2 2 1 2 + Here 'c' and 'f' are not represented in the data and will not be + shown in the output because dropna is True by default. Set + dropna=False to preserve categories with no data. + >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) - >>> crosstab(foo, bar) # 'c' and 'f' are not represented in the data, - # and will not be shown in the output because - # dropna is True by default. Set 'dropna=False' - # to preserve categories with no data - ... # doctest: +SKIP + >>> pd.crosstab(foo, bar) col_0 d e row_0 a 1 0 b 0 1 - - >>> crosstab(foo, bar, dropna=False) # 'c' and 'f' are not represented - # in the data, but they still will be counted - # and shown in the output - ... # doctest: +SKIP + >>> pd.crosstab(foo, bar, dropna=False) col_0 d e f row_0 a 1 0 0 diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index f436b3b92a359..6ba33301753d6 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -701,19 +701,20 @@ def _convert_level_number(level_num, columns): def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, columns=None, sparse=False, drop_first=False, dtype=None): """ - Convert categorical variable into dummy/indicator variables + Convert categorical variable into dummy/indicator variables. Parameters ---------- data : array-like, Series, or DataFrame - prefix : string, list of strings, or dict of strings, default None + Data of which to get dummy indicators. + prefix : str, list of str, or dict of str, default None String to append DataFrame column names. Pass a list with length equal to the number of columns when calling get_dummies on a DataFrame. Alternatively, `prefix` can be a dictionary mapping column names to prefixes. - prefix_sep : string, default '_' + prefix_sep : str, default '_' If appending prefix, separator/delimiter to use. Or pass a - list or dictionary as with `prefix.` + list or dictionary as with `prefix`. dummy_na : bool, default False Add a column to indicate NaNs, if False NaNs are ignored. columns : list-like, default None @@ -736,11 +737,12 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, Returns ------- - dummies : DataFrame + DataFrame + Dummy-coded data. See Also -------- - Series.str.get_dummies + Series.str.get_dummies : Convert Series to dummy codes. Examples -------- diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 2a654fec36a9f..f99fd9004bb31 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -163,7 +163,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, Use `drop` optional when bins is not unique >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True, - ... right=False, duplicates='drop') + ... right=False, duplicates='drop') ... # doctest: +ELLIPSIS (a 0.0 b 1.0 diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 183a91c952140..cc7a4db515c42 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -120,7 +120,7 @@ def str_count(arr, pat, flags=0): Returns ------- - counts : Series or Index + Series or Index Same type as the calling object containing the integer counts. See Also @@ -283,7 +283,7 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True): return `True`. However, '.0' as a regex matches any character followed by a 0. - >>> s2 = pd.Series(['40','40.0','41','41.0','35']) + >>> s2 = pd.Series(['40', '40.0', '41', '41.0', '35']) >>> s2.str.contains('.0', regex=True) 0 True 1 True @@ -433,13 +433,13 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): Parameters ---------- - pat : string or compiled regex + pat : str or compiled regex String can be a character sequence or regular expression. .. versionadded:: 0.20.0 `pat` also accepts a compiled regex. - repl : string or callable + repl : str or callable Replacement string or a callable. The callable is passed the regex match object and must return a replacement string to be used. See :func:`re.sub`. @@ -448,15 +448,15 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): `repl` also accepts a callable. n : int, default -1 (all) - Number of replacements to make from start - case : boolean, default None + Number of replacements to make from start. + case : bool, default None - If True, case sensitive (the default if `pat` is a string) - Set to False for case insensitive - Cannot be set if `pat` is a compiled regex flags : int, default 0 (no flags) - re module flags, e.g. re.IGNORECASE - Cannot be set if `pat` is a compiled regex - regex : boolean, default True + regex : bool, default True - If True, assumes the passed-in pattern is a regular expression. - If False, treats the pattern as a literal string - Cannot be set to False if `pat` is a compiled regex or `repl` is @@ -537,6 +537,7 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): Using a compiled regex with flags + >>> import re >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE) >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar') 0 foo @@ -604,6 +605,7 @@ def str_repeat(arr, repeats): 0 a 1 b 2 c + dtype: object Single int repeats string in Series @@ -611,6 +613,7 @@ def str_repeat(arr, repeats): 0 aa 1 bb 2 cc + dtype: object Sequence of int repeats corresponding string in Series @@ -618,6 +621,7 @@ def str_repeat(arr, repeats): 0 a 1 bb 2 ccc + dtype: object """ if is_scalar(repeats): def rep(x): @@ -646,13 +650,14 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan): Parameters ---------- - pat : string - Character sequence or regular expression - case : boolean, default True - If True, case sensitive + pat : str + Character sequence or regular expression. + case : bool, default True + If True, case sensitive. flags : int, default 0 (no flags) - re module flags, e.g. re.IGNORECASE - na : default NaN, fill value for missing values + re module flags, e.g. re.IGNORECASE. + na : default NaN + Fill value for missing values. Returns ------- @@ -768,7 +773,7 @@ def str_extract(arr, pat, flags=0, expand=True): Parameters ---------- - pat : string + pat : str Regular expression pattern with capturing groups. flags : int, default 0 (no flags) Flags from the ``re`` module, e.g. ``re.IGNORECASE``, that @@ -966,21 +971,23 @@ def str_extractall(arr, pat, flags=0): def str_get_dummies(arr, sep='|'): """ - Split each string in the Series by sep and return a frame of - dummy/indicator variables. + Split each string in the Series by sep and return a DataFrame + of dummy/indicator variables. Parameters ---------- - sep : string, default "|" + sep : str, default "|" String to split on. Returns ------- - dummies : DataFrame + DataFrame + Dummy variables corresponding to values of the Series. See Also -------- - get_dummies + get_dummies : Convert categorical variable into dummy/indicator + variables. Examples -------- @@ -1089,11 +1096,11 @@ def str_findall(arr, pat, flags=0): Parameters ---------- - pat : string + pat : str Pattern or regular expression. flags : int, default 0 - ``re`` module flags, e.g. `re.IGNORECASE` (default is 0, which means - no flags). + Flags from ``re`` module, e.g. `re.IGNORECASE` (default is 0, which + means no flags). Returns ------- @@ -1182,17 +1189,18 @@ def str_find(arr, sub, start=0, end=None, side='left'): Parameters ---------- sub : str - Substring being searched + Substring being searched. start : int - Left edge index + Left edge index. end : int - Right edge index + Right edge index. side : {'left', 'right'}, default 'left' - Specifies a starting side, equivalent to ``find`` or ``rfind`` + Specifies a starting side, equivalent to ``find`` or ``rfind``. Returns ------- - found : Series/Index of integer values + Series or Index + Indexes where substring is found. """ if not isinstance(sub, compat.string_types): @@ -1430,7 +1438,7 @@ def str_slice_replace(arr, start=None, stop=None, repl=None): Returns ------- - replaced : Series or Index + Series or Index Same type as the original object. See Also @@ -1513,7 +1521,7 @@ def str_strip(arr, to_strip=None, side='both'): Returns ------- - stripped : Series/Index of objects + Series or Index """ if side == 'both': f = lambda x: x.strip(to_strip) @@ -1537,30 +1545,30 @@ def str_wrap(arr, width, **kwargs): Parameters ---------- width : int - Maximum line-width + Maximum line width. expand_tabs : bool, optional - If true, tab characters will be expanded to spaces (default: True) + If True, tab characters will be expanded to spaces (default: True). replace_whitespace : bool, optional - If true, each whitespace character (as defined by string.whitespace) + If True, each whitespace character (as defined by string.whitespace) remaining after tab expansion will be replaced by a single space - (default: True) + (default: True). drop_whitespace : bool, optional - If true, whitespace that, after wrapping, happens to end up at the - beginning or end of a line is dropped (default: True) + If True, whitespace that, after wrapping, happens to end up at the + beginning or end of a line is dropped (default: True). break_long_words : bool, optional - If true, then words longer than width will be broken in order to ensure + If True, then words longer than width will be broken in order to ensure that no lines are longer than width. If it is false, long words will - not be broken, and some lines may be longer than width. (default: True) + not be broken, and some lines may be longer than width (default: True). break_on_hyphens : bool, optional - If true, wrapping will occur preferably on whitespace and right after + If True, wrapping will occur preferably on whitespace and right after hyphens in compound words, as it is customary in English. If false, only whitespaces will be considered as potentially good places for line breaks, but you need to set break_long_words to false if you want truly - insecable words. (default: True) + insecable words (default: True). Returns ------- - wrapped : Series/Index of objects + Series or Index Notes ----- @@ -1581,6 +1589,7 @@ def str_wrap(arr, width, **kwargs): >>> s.str.wrap(12) 0 line to be\nwrapped 1 another line\nto be\nwrapped + dtype: object """ kwargs['width'] = width @@ -1613,7 +1622,7 @@ def str_translate(arr, table, deletechars=None): Returns ------- - translated : Series/Index of objects + Series or Index """ if deletechars is None: f = lambda x: x.translate(table) @@ -1641,15 +1650,16 @@ def str_get(arr, i): Returns ------- - items : Series/Index of objects + Series or Index Examples -------- >>> s = pd.Series(["String", - (1, 2, 3), - ["a", "b", "c"], - 123, -456, - {1:"Hello", "2":"World"}]) + ... (1, 2, 3), + ... ["a", "b", "c"], + ... 123, + ... -456, + ... {1: "Hello", "2": "World"}]) >>> s 0 String 1 (1, 2, 3) @@ -1674,7 +1684,7 @@ def str_get(arr, i): 2 c 3 NaN 4 NaN - 5 NaN + 5 None dtype: object """ def f(x): @@ -1699,7 +1709,7 @@ def str_decode(arr, encoding, errors="strict"): Returns ------- - decoded : Series/Index of objects + Series or Index """ if encoding in _cpython_optimized_decoders: # CPython optimized implementation @@ -2091,7 +2101,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): Returns ------- - concat : str or Series/Index of objects + str, Series or Index If `others` is None, `str` is returned, otherwise a `Series/Index` (same type as caller) of objects is returned.