From 3e0ee89e7ebbf98659ea18dbed5b1bb345c6880b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 25 Jan 2019 06:22:03 -0600 Subject: [PATCH 1/4] API: Remove IntervalArray from top-level --- doc/source/api/arrays.rst | 4 ++-- doc/source/whatsnew/v0.24.0.rst | 4 ++-- pandas/core/api.py | 1 - pandas/core/arrays/array_.py | 2 +- pandas/tests/api/test_api.py | 1 - 5 files changed, 5 insertions(+), 7 deletions(-) diff --git a/doc/source/api/arrays.rst b/doc/source/api/arrays.rst index 5ecc5181af22c..ff1b9f3cecaac 100644 --- a/doc/source/api/arrays.rst +++ b/doc/source/api/arrays.rst @@ -288,12 +288,12 @@ Properties Interval.overlaps Interval.right -A collection of intervals may be stored in an :class:`IntervalArray`. +A collection of intervals may be stored in an :class:`arrays.IntervalArray`. .. autosummary:: :toctree: generated/ - IntervalArray + arrays.IntervalArray IntervalDtype .. _api.arrays.integer_na: diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index f0f99d2def136..d3d8863df9fd5 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -225,7 +225,7 @@ from the ``Series``: ser.array pser.array -These return an instance of :class:`IntervalArray` or :class:`arrays.PeriodArray`, +These return an instance of :class:`arrays.IntervalArray` or :class:`arrays.PeriodArray`, the new extension arrays that back interval and period data. .. warning:: @@ -411,7 +411,7 @@ Other Enhancements - :meth:`Categorical.from_codes` now can take a ``dtype`` parameter as an alternative to passing ``categories`` and ``ordered`` (:issue:`24398`). - New attribute ``__git_version__`` will return git commit sha of current build (:issue:`21295`). - Compatibility with Matplotlib 3.0 (:issue:`22790`). -- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) +- Added :meth:`Interval.overlaps`, :meth:`arrays.IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) - :func:`read_fwf` now accepts keyword ``infer_nrows`` (:issue:`15138`). - :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`) - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`, :issue:`24466`) diff --git a/pandas/core/api.py b/pandas/core/api.py index afc929c39086c..8c92287e212a6 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -4,7 +4,6 @@ import numpy as np -from pandas.core.arrays import IntervalArray from pandas.core.arrays.integer import ( Int8Dtype, Int16Dtype, diff --git a/pandas/core/arrays/array_.py b/pandas/core/arrays/array_.py index c7be8e3f745c4..41d623c7efd9c 100644 --- a/pandas/core/arrays/array_.py +++ b/pandas/core/arrays/array_.py @@ -50,7 +50,7 @@ def array(data, # type: Sequence[object] ============================== ===================================== Scalar Type Array Type ============================== ===================================== - :class:`pandas.Interval` :class:`pandas.IntervalArray` + :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray` :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 07cf358c765b3..599ab9a3c5f7c 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -46,7 +46,6 @@ class TestPDApi(Base): 'Series', 'SparseArray', 'SparseDataFrame', 'SparseDtype', 'SparseSeries', 'Timedelta', 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex', - 'IntervalArray', 'CategoricalDtype', 'PeriodDtype', 'IntervalDtype', 'DatetimeTZDtype', 'Int8Dtype', 'Int16Dtype', 'Int32Dtype', 'Int64Dtype', From ffaecf1b3c029bd4b21be49f591720bce58b306a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 25 Jan 2019 07:03:01 -0600 Subject: [PATCH 2/4] fixups --- pandas/core/arrays/interval.py | 2 +- pandas/tests/arrays/test_array.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 45470e03c041a..82f985698bd1b 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -115,7 +115,7 @@ A new ``IntervalArray`` can be constructed directly from an array-like of ``Interval`` objects: - >>> pd.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) + >>> pandas.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) IntervalArray([(0, 1], (1, 5]], closed='right', dtype='interval[int64]') diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 4a51fd63d963b..9fea1989e46df 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -74,7 +74,7 @@ # Interval ([pd.Interval(1, 2), pd.Interval(3, 4)], 'interval', - pd.IntervalArray.from_tuples([(1, 2), (3, 4)])), + pd.arrays.IntervalArray.from_tuples([(1, 2), (3, 4)])), # Sparse ([0, 1], 'Sparse[int64]', pd.SparseArray([0, 1], dtype='int64')), @@ -129,7 +129,7 @@ def test_array_copy(): # interval ([pd.Interval(0, 1), pd.Interval(1, 2)], - pd.IntervalArray.from_breaks([0, 1, 2])), + pd.arrays.IntervalArray.from_breaks([0, 1, 2])), # datetime ([pd.Timestamp('2000',), pd.Timestamp('2001')], From e79cdcf8fa44370ae5bcf7c02bba60136c63d08d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 25 Jan 2019 07:15:36 -0600 Subject: [PATCH 3/4] Update pandas/core/arrays/interval.py Co-Authored-By: TomAugspurger --- pandas/core/arrays/interval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 82f985698bd1b..67d7281a91b4c 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -115,7 +115,7 @@ A new ``IntervalArray`` can be constructed directly from an array-like of ``Interval`` objects: - >>> pandas.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) + >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) IntervalArray([(0, 1], (1, 5]], closed='right', dtype='interval[int64]') From 86ec7b9951d5097a460e42931bce6ed9ee8f51ae Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 25 Jan 2019 08:18:32 -0600 Subject: [PATCH 4/4] doctests --- pandas/core/arrays/interval.py | 27 ++++++++++++++------------- pandas/core/indexes/interval.py | 23 ++++++++++++----------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 67d7281a91b4c..1e671c7bd956a 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -32,6 +32,7 @@ _shared_docs_kwargs = dict( klass='IntervalArray', + qualname='arrays.IntervalArray', name='' ) @@ -248,8 +249,8 @@ def _from_factorized(cls, values, original): Examples -------- - >>> pd.%(klass)s.from_breaks([0, 1, 2, 3]) - %(klass)s([(0, 1], (1, 2], (2, 3]] + >>> pd.%(qualname)s.from_breaks([0, 1, 2, 3]) + %(klass)s([(0, 1], (1, 2], (2, 3]], closed='right', dtype='interval[int64]') """ @@ -311,7 +312,7 @@ def from_breaks(cls, breaks, closed='right', copy=False, dtype=None): Examples -------- >>> %(klass)s.from_arrays([0, 1, 2], [1, 2, 3]) - %(klass)s([(0, 1], (1, 2], (2, 3]] + %(klass)s([(0, 1], (1, 2], (2, 3]], closed='right', dtype='interval[int64]') """ @@ -354,16 +355,16 @@ def from_arrays(cls, left, right, closed='right', copy=False, dtype=None): Examples -------- - >>> pd.%(klass)s.from_intervals([pd.Interval(0, 1), + >>> pd.%(qualname)s.from_intervals([pd.Interval(0, 1), ... pd.Interval(1, 2)]) - %(klass)s([(0, 1], (1, 2]] + %(klass)s([(0, 1], (1, 2]], closed='right', dtype='interval[int64]') The generic Index constructor work identically when it infers an array of all intervals: >>> pd.Index([pd.Interval(0, 1), pd.Interval(1, 2)]) - %(klass)s([(0, 1], (1, 2]] + %(klass)s([(0, 1], (1, 2]], closed='right', dtype='interval[int64]') """ @@ -394,7 +395,7 @@ def from_arrays(cls, left, right, closed='right', copy=False, dtype=None): Examples -------- - >>> pd.%(klass)s.from_tuples([(0, 1), (1, 2)]) + >>> pd.%(qualname)s.from_tuples([(0, 1), (1, 2)]) %(klass)s([(0, 1], (1, 2]], closed='right', dtype='interval[int64]') """ @@ -891,13 +892,13 @@ def closed(self): Examples -------- - >>> index = pd.interval_range(0, 3) - >>> index - %(klass)s([(0, 1], (1, 2], (2, 3]] + >>> index = pd.interval_range(0, 3) + >>> index + IntervalIndex([(0, 1], (1, 2], (2, 3]], closed='right', dtype='interval[int64]') - >>> index.set_closed('both') - %(klass)s([[0, 1], [1, 2], [2, 3]] + >>> index.set_closed('both') + IntervalIndex([[0, 1], [1, 2], [2, 3]], closed='both', dtype='interval[int64]') """ @@ -1039,7 +1040,7 @@ def repeat(self, repeats, axis=None): Examples -------- - >>> intervals = pd.%(klass)s.from_tuples([(0, 1), (1, 3), (2, 4)]) + >>> intervals = pd.%(qualname)s.from_tuples([(0, 1), (1, 3), (2, 4)]) >>> intervals %(klass)s([(0, 1], (1, 3], (2, 4]], closed='right', diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 2a6044fb0a08b..0210560aaa21f 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -38,6 +38,7 @@ _index_doc_kwargs.update( dict(klass='IntervalIndex', + qualname="IntervalIndex", target_klass='IntervalIndex or list of Intervals', name=textwrap.dedent("""\ name : object, optional @@ -282,10 +283,10 @@ def contains(self, key): examples=""" Examples -------- - >>> idx = pd.IntervalIndex.from_arrays([0, np.nan, 2], [1, np.nan, 3]) - >>> idx.to_tuples() + >>> idx = pd.IntervalIndex.from_arrays([0, np.nan, 2], [1, np.nan, 3]) + >>> idx.to_tuples() Index([(0.0, 1.0), (nan, nan), (2.0, 3.0)], dtype='object') - >>> idx.to_tuples(na_tuple=False) + >>> idx.to_tuples(na_tuple=False) Index([(0.0, 1.0), nan, (2.0, 3.0)], dtype='object')""", )) def to_tuples(self, na_tuple=True): @@ -1201,15 +1202,15 @@ def interval_range(start=None, end=None, periods=None, freq=None, Numeric ``start`` and ``end`` is supported. >>> pd.interval_range(start=0, end=5) - IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] + IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], closed='right', dtype='interval[int64]') Additionally, datetime-like input is also supported. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), - end=pd.Timestamp('2017-01-04')) + ... end=pd.Timestamp('2017-01-04')) IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], - (2017-01-03, 2017-01-04]] + (2017-01-03, 2017-01-04]], closed='right', dtype='interval[datetime64[ns]]') The ``freq`` parameter specifies the frequency between the left and right. @@ -1217,23 +1218,23 @@ def interval_range(start=None, end=None, periods=None, freq=None, numeric ``start`` and ``end``, the frequency must also be numeric. >>> pd.interval_range(start=0, periods=4, freq=1.5) - IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] + IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), - periods=3, freq='MS') + ... periods=3, freq='MS') IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], - (2017-03-01, 2017-04-01]] + (2017-03-01, 2017-04-01]], closed='right', dtype='interval[datetime64[ns]]') Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). >>> pd.interval_range(start=0, end=6, periods=4) - IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] + IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], closed='right', dtype='interval[float64]') @@ -1241,7 +1242,7 @@ def interval_range(start=None, end=None, periods=None, freq=None, intervals within the ``IntervalIndex`` are closed. >>> pd.interval_range(end=5, periods=4, closed='both') - IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] + IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], closed='both', dtype='interval[int64]') """ start = com.maybe_box_datetimelike(start)