diff --git a/pandas/core/series.py b/pandas/core/series.py index 427da96c5e1c4..db8a15932106a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -129,7 +129,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): sequence are used, the index will override the keys found in the dict. dtype : str, numpy.dtype, or ExtensionDtype, optional - dtype for the output Series. If not specified, this will be + Data type for the output Series. If not specified, this will be inferred from `data`. See the :ref:`user guide ` for more usages. copy : bool, default False @@ -444,7 +444,7 @@ def values(self): Returns ------- - arr : numpy.ndarray or ndarray-like + numpy.ndarray or ndarray-like See Also -------- @@ -513,6 +513,11 @@ def ravel(self, order='C'): """ Return the flattened underlying data as an ndarray. + Returns + ------- + numpy.ndarray or ndarray-like + Flattened data of the Series. + See Also -------- numpy.ndarray.ravel @@ -830,7 +835,7 @@ def _ixs(self, i, axis=0): Returns ------- - value : scalar (int) or Series (slice, sequence) + scalar (int) or Series (slice, sequence) """ try: @@ -1173,7 +1178,7 @@ def get_value(self, label, takeable=False): Returns ------- - value : scalar value + scalar value """ warnings.warn("get_value is deprecated and will be removed " "in a future release. Please use " @@ -1207,7 +1212,7 @@ def set_value(self, label, value, takeable=False): Returns ------- - series : Series + Series If label is contained, will be reference to calling Series, otherwise a new object """ @@ -1394,29 +1399,30 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, Parameters ---------- buf : StringIO-like, optional - buffer to write to - na_rep : string, optional - string representation of NAN to use, default 'NaN' + Buffer to write to. + na_rep : str, optional + String representation of NaN to use, default 'NaN'. float_format : one-parameter function, optional - formatter function to apply to columns' elements if they are floats - default None - header : boolean, default True - Add the Series header (index name) + Formatter function to apply to columns' elements if they are + floats, default None. + header : bool, default True + Add the Series header (index name). index : bool, optional - Add index (row) labels, default True - length : boolean, default False - Add the Series length - dtype : boolean, default False - Add the Series dtype - name : boolean, default False - Add the Series name if not None + Add index (row) labels, default True. + length : bool, default False + Add the Series length. + dtype : bool, default False + Add the Series dtype. + name : bool, default False + Add the Series name if not None. max_rows : int, optional Maximum number of rows to show before truncating. If None, show all. Returns ------- - formatted : string (if not buffer passed) + str or None + String representation of Series if ``buf=None``, otherwise None. """ formatter = fmt.SeriesFormatter(self, name=name, length=length, @@ -1476,7 +1482,8 @@ def to_dict(self, into=dict): Returns ------- - value_dict : collections.Mapping + collections.Mapping + Key-value representation of Series. Examples -------- @@ -1488,7 +1495,7 @@ def to_dict(self, into=dict): OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) >>> dd = defaultdict(list) >>> s.to_dict(dd) - defaultdict(, {0: 1, 1: 2, 2: 3, 3: 4}) + defaultdict(, {0: 1, 1: 2, 2: 3, 3: 4}) """ # GH16122 into_c = com.standardize_mapping(into) @@ -1506,7 +1513,18 @@ def to_frame(self, name=None): Returns ------- - data_frame : DataFrame + DataFrame + DataFrame representation of Series. + + Examples + -------- + >>> s = pd.Series(["a", "b", "c"], + ... name="vals") + >>> s.to_frame() + vals + 0 a + 1 b + 2 c """ if name is None: df = self._constructor_expanddim(self) @@ -1521,12 +1539,14 @@ def to_sparse(self, kind='block', fill_value=None): Parameters ---------- - kind : {'block', 'integer'} + kind : {'block', 'integer'}, default 'block' fill_value : float, defaults to NaN (missing) + Value to use for filling NaN values. Returns ------- - sp : SparseSeries + SparseSeries + Sparse representation of the Series. """ # TODO: deprecate from pandas.core.sparse.series import SparseSeries @@ -1564,11 +1584,18 @@ def count(self, level=None): ---------- level : int or level name, default None If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a smaller Series + particular level, collapsing into a smaller Series. Returns ------- - nobs : int or Series (if level specified) + int or Series (if level specified) + Number of non-null values in the Series. + + Examples + -------- + >>> s = pd.Series([0.0, 1.0, np.nan]) + >>> s.count() + 2 """ if level is None: return notna(com.values_from_object(self)).sum() @@ -1597,14 +1624,15 @@ def mode(self, dropna=True): Parameters ---------- - dropna : boolean, default True + dropna : bool, default True Don't consider counts of NaN/NaT. .. versionadded:: 0.24.0 Returns ------- - modes : Series (sorted) + Series + Modes of the Series in sorted order. """ # TODO: Add option for bins like value_counts() return algorithms.mode(self, dropna=dropna) @@ -1677,12 +1705,13 @@ def drop_duplicates(self, keep='first', inplace=False): - 'first' : Drop duplicates except for the first occurrence. - 'last' : Drop duplicates except for the last occurrence. - ``False`` : Drop all duplicates. - inplace : boolean, default ``False`` + inplace : bool, default ``False`` If ``True``, performs operation inplace and returns None. Returns ------- - deduplicated : Series + Series + Series with duplicates dropped. See Also -------- @@ -1759,7 +1788,9 @@ def duplicated(self, keep='first'): Returns ------- - pandas.core.series.Series + Series + Series indicating whether each value has occurred in the + preceding values. See Also -------- @@ -1823,7 +1854,7 @@ def idxmin(self, axis=0, skipna=True, *args, **kwargs): Parameters ---------- - skipna : boolean, default True + skipna : bool, default True Exclude NA/null values. If the entire Series is NA, the result will be NA. axis : int, default 0 @@ -1835,7 +1866,8 @@ def idxmin(self, axis=0, skipna=True, *args, **kwargs): Returns ------- - idxmin : Index of minimum of values. + Index + Label of the minimum value. Raises ------ @@ -1860,7 +1892,7 @@ def idxmin(self, axis=0, skipna=True, *args, **kwargs): Examples -------- >>> s = pd.Series(data=[1, None, 4, 1], - ... index=['A' ,'B' ,'C' ,'D']) + ... index=['A', 'B', 'C', 'D']) >>> s A 1.0 B NaN @@ -1892,7 +1924,7 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs): Parameters ---------- - skipna : boolean, default True + skipna : bool, default True Exclude NA/null values. If the entire Series is NA, the result will be NA. axis : int, default 0 @@ -1904,7 +1936,8 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs): Returns ------- - idxmax : Index of maximum of values. + Index + Label of the maximum value. Raises ------ @@ -1988,12 +2021,22 @@ def round(self, decimals=0, *args, **kwargs): Returns ------- - Series object + Series + Rounded values of the Series. See Also -------- - numpy.around - DataFrame.round + numpy.around : Round values of an np.array. + DataFrame.round : Round values of a DataFrame. + + Examples + -------- + >>> s = pd.Series([0.1, 1.3, 2.7]) + >>> s.round() + 0 0.0 + 1 1.0 + 2 3.0 + dtype: float64 """ nv.validate_round(args, kwargs) result = com.values_from_object(self).round(decimals) @@ -2008,7 +2051,7 @@ def quantile(self, q=0.5, interpolation='linear'): Parameters ---------- q : float or array-like, default 0.5 (50% quantile) - 0 <= q <= 1, the quantile(s) to compute + 0 <= q <= 1, the quantile(s) to compute. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} .. versionadded:: 0.18.0 @@ -2024,9 +2067,10 @@ def quantile(self, q=0.5, interpolation='linear'): Returns ------- - quantile : float or Series + float or Series If ``q`` is an array, a Series will be returned where the - index is ``q`` and the values are the quantiles. + index is ``q`` and the values are the quantiles, otherwise + a float will be returned. See Also -------- @@ -2072,6 +2116,7 @@ def corr(self, other, method='pearson', min_periods=None): Parameters ---------- other : Series + Series with which to compute the correlation. method : {'pearson', 'kendall', 'spearman'} or callable * pearson : standard correlation coefficient * kendall : Kendall Tau correlation coefficient @@ -2081,16 +2126,18 @@ def corr(self, other, method='pearson', min_periods=None): .. versionadded:: 0.24.0 min_periods : int, optional - Minimum number of observations needed to have a valid result + Minimum number of observations needed to have a valid result. Returns ------- - correlation : float + float + Correlation with other. Examples -------- - >>> histogram_intersection = lambda a, b: np.minimum(a, b - ... ).sum().round(decimals=1) + >>> def histogram_intersection(a, b): + ... v = np.minimum(a, b).sum().round(decimals=1) + ... return v >>> s1 = pd.Series([.2, .0, .6, .2]) >>> s2 = pd.Series([.3, .6, .0, .1]) >>> s1.corr(s2, method=histogram_intersection) @@ -2115,14 +2162,22 @@ def cov(self, other, min_periods=None): Parameters ---------- other : Series + Series with which to compute the covariance. min_periods : int, optional - Minimum number of observations needed to have a valid result + Minimum number of observations needed to have a valid result. Returns ------- - covariance : float + float + Covariance between Series and other normalized by N-1 + (unbiased estimator). - Normalized by N-1 (unbiased estimator). + Examples + -------- + >>> s1 = pd.Series([0.90010907, 0.13484424, 0.62036035]) + >>> s2 = pd.Series([0.12528585, 0.26962463, 0.51111198]) + >>> s1.cov(s2) + -0.01685762652715874 """ this, other = self.align(other, join='inner', copy=False) if len(this) == 0: @@ -2145,7 +2200,8 @@ def diff(self, periods=1): Returns ------- - diffed : Series + Series + First differences of the Series. See Also -------- @@ -2279,7 +2335,7 @@ def dot(self, other): 8 >>> s @ other 8 - >>> df = pd.DataFrame([[0 ,1], [-2, 3], [4, -5], [6, 7]]) + >>> df = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]]) >>> s.dot(df) 0 24 1 14 @@ -2348,17 +2404,19 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): Parameters ---------- to_append : Series or list/tuple of Series - ignore_index : boolean, default False + Series to append with self. + ignore_index : bool, default False If True, do not use the index labels. .. versionadded:: 0.19.0 - verify_integrity : boolean, default False - If True, raise Exception on creating index with duplicates + verify_integrity : bool, default False + If True, raise Exception on creating index with duplicates. Returns ------- - appended : Series + Series + Concatenated Series. See Also -------- @@ -2376,7 +2434,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): -------- >>> s1 = pd.Series([1, 2, 3]) >>> s2 = pd.Series([4, 5, 6]) - >>> s3 = pd.Series([4, 5, 6], index=[3,4,5]) + >>> s3 = pd.Series([4, 5, 6], index=[3, 4, 5]) >>> s1.append(s2) 0 1 1 2 @@ -2439,7 +2497,7 @@ def _binop(self, other, func, level=None, fill_value=None): Returns ------- - combined : Series + Series """ if not isinstance(other, Series): raise AssertionError('Other operand must be Series') @@ -2862,7 +2920,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Returns ------- - pandas.Series + Series The original Series sorted by the labels. See Also @@ -3002,7 +3060,9 @@ def argsort(self, axis=0, kind='quicksort', order=None): Returns ------- - argsorted : Series, with -1 indicated where nan values are present + Series + Positions of values within the sort order with -1 indicating + nan values. See Also -------- @@ -3220,12 +3280,13 @@ def swaplevel(self, i=-2, j=-1, copy=True): Parameters ---------- - i, j : int, string (can be mixed) + i, j : int, str (can be mixed) Level of index to be swapped. Can pass level name as string. Returns ------- - swapped : Series + Series + Series with levels swapped in MultiIndex. .. versionchanged:: 0.18.1 @@ -3265,21 +3326,23 @@ def unstack(self, level=-1, fill_value=None): Parameters ---------- - level : int, string, or list of these, default last level - Level(s) to unstack, can pass level name - fill_value : replace NaN with this value if the unstack produces - missing values + level : int, str, or list of these, default last level + Level(s) to unstack, can pass level name. + fill_value : scalar value, default None + Value to use when replacing NaN values. .. versionadded:: 0.18.0 Returns ------- - unstacked : DataFrame + DataFrame + Unstacked Series. Examples -------- >>> s = pd.Series([1, 2, 3, 4], - ... index=pd.MultiIndex.from_product([['one', 'two'], ['a', 'b']])) + ... index=pd.MultiIndex.from_product([['one', 'two'], + ... ['a', 'b']])) >>> s one a 1 b 2 @@ -3679,7 +3742,7 @@ def rename(self, index=None, **kwargs): Scalar or hashable sequence-like will alter the ``Series.name`` attribute. copy : bool, default True - Also copy underlying data + Whether to copy underlying data. inplace : bool, default False Whether to return a new Series. If True then value of copy is ignored. @@ -3689,11 +3752,12 @@ def rename(self, index=None, **kwargs): Returns ------- - renamed : Series (new object) + Series + Series with index labels or name altered. See Also -------- - Series.rename_axis + Series.rename_axis : Set the name of the axis. Examples -------- @@ -3703,7 +3767,7 @@ def rename(self, index=None, **kwargs): 1 2 2 3 dtype: int64 - >>> s.rename("my_name") # scalar, changes Series.name + >>> s.rename("my_name") # scalar, changes Series.name 0 1 1 2 2 3 @@ -3762,7 +3826,8 @@ def drop(self, labels=None, axis=0, index=None, columns=None, Returns ------- - dropped : pandas.Series + Series + Series with specified index labels removed. Raises ------ @@ -3778,7 +3843,7 @@ def drop(self, labels=None, axis=0, index=None, columns=None, Examples -------- - >>> s = pd.Series(data=np.arange(3), index=['A','B','C']) + >>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C']) >>> s A 0 B 1 @@ -3787,7 +3852,7 @@ def drop(self, labels=None, axis=0, index=None, columns=None, Drop labels B en C - >>> s.drop(labels=['B','C']) + >>> s.drop(labels=['B', 'C']) A 0 dtype: int64 @@ -3960,7 +4025,8 @@ def isin(self, values): Returns ------- - isin : Series (bool dtype) + Series + Series of booleans indicating if each element is in values. Raises ------ @@ -4019,7 +4085,8 @@ def between(self, left, right, inclusive=True): Returns ------- Series - Each element will be a boolean. + Series representing whether each element is between left and + right (inclusive). See Also -------- @@ -4101,27 +4168,27 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, Parameters ---------- - path : string file path or file handle / StringIO - sep : string, default ',' - Field delimiter - parse_dates : boolean, default True - Parse dates. Different default from read_table + path : str, file path, or file handle / StringIO + sep : str, default ',' + Field delimiter. + parse_dates : bool, default True + Parse dates. Different default from read_table. header : int, default None - Row to use as header (skip prior rows) + Row to use as header (skip prior rows). index_col : int or sequence, default 0 Column to use for index. If a sequence is given, a MultiIndex - is used. Different default from read_table - encoding : string, optional - a string representing the encoding to use if the contents are - non-ascii, for python versions prior to 3 - infer_datetime_format : boolean, default False + is used. Different default from read_table. + encoding : str, optional + A string representing the encoding to use if the contents are + non-ascii, for python versions prior to 3. + infer_datetime_format : bool, default False If True and `parse_dates` is True for a column, try to infer the datetime format based on the first datetime string. If the format can be inferred, there often will be a large parsing speed-up. Returns ------- - y : Series + Series See Also -------- @@ -4322,19 +4389,21 @@ def valid(self, inplace=False, **kwargs): def to_timestamp(self, freq=None, how='start', copy=True): """ - Cast to datetimeindex of timestamps, at *beginning* of period. + Cast to DatetimeIndex of Timestamps, at *beginning* of period. Parameters ---------- - freq : string, default frequency of PeriodIndex - Desired frequency + freq : str, default frequency of PeriodIndex + Desired frequency. how : {'s', 'e', 'start', 'end'} Convention for converting period to timestamp; start of period - vs. end + vs. end. + copy : bool, default True + Whether or not to return a copy. Returns ------- - ts : Series with DatetimeIndex + Series with DatetimeIndex """ new_values = self._values if copy: @@ -4351,11 +4420,15 @@ def to_period(self, freq=None, copy=True): Parameters ---------- - freq : string, default + freq : str, default None + Frequency associated with the PeriodIndex. + copy : bool, default True + Whether or not to return a copy. Returns ------- - ts : Series with PeriodIndex + Series + Series with index converted to PeriodIndex. """ new_values = self._values if copy: