Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Add uint64 support to IntervalTree #20651

Merged
merged 2 commits into from
Apr 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1065,6 +1065,7 @@ Indexing
- Bug in ``Index`` subclasses constructors that ignore unexpected keyword arguments (:issue:`19348`)
- Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`)
- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`).
- Bug in :class:`IntervalIndex` where some indexing operations were not supported for overlapping or non-monotonic ``uint64`` data (:issue:`20636`)

MultiIndex
^^^^^^^^^^
Expand Down
5 changes: 3 additions & 2 deletions pandas/_libs/intervaltree.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""

from numpy cimport (
int64_t, int32_t, float64_t, float32_t,
int64_t, int32_t, float64_t, float32_t, uint64_t,
ndarray,
PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take)
import numpy as np
Expand All @@ -24,6 +24,7 @@ ctypedef fused scalar_t:
float32_t
int64_t
int32_t
uint64_t
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not entirely sure if this is necessary

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's fine. Even if your tests pass without it, I would still keep it for consistency.



#----------------------------------------------------------------------
Expand Down Expand Up @@ -205,7 +206,7 @@ cdef sort_values_and_indices(all_values, all_indices, subset):
{{py:

nodes = []
for dtype in ['float32', 'float64', 'int32', 'int64']:
for dtype in ['float32', 'float64', 'int32', 'int64', 'uint64']:
for closed, cmp_left, cmp_right in [
('left', '<=', '<'),
('right', '<', '<='),
Expand Down
64 changes: 33 additions & 31 deletions pandas/tests/indexes/interval/test_interval_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,36 +12,37 @@ def closed(request):
return request.param


@pytest.fixture(
scope='class', params=['int32', 'int64', 'float32', 'float64', 'uint64'])
def dtype(request):
return request.param


@pytest.fixture(scope='class')
def tree(dtype):
left = np.arange(5, dtype=dtype)
return IntervalTree(left, left + 2)


class TestIntervalTree(object):
def setup_method(self, method):
def gentree(dtype):
left = np.arange(5, dtype=dtype)
right = left + 2
return IntervalTree(left, right)

self.tree = gentree('int64')
self.trees = {dtype: gentree(dtype)
for dtype in ['int32', 'int64', 'float32', 'float64']}

def test_get_loc(self):
for dtype, tree in self.trees.items():
tm.assert_numpy_array_equal(tree.get_loc(1),
np.array([0], dtype='int64'))
tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)),
np.array([0, 1], dtype='int64'))
with pytest.raises(KeyError):
tree.get_loc(-1)

def test_get_indexer(self):
for dtype, tree in self.trees.items():
tm.assert_numpy_array_equal(
tree.get_indexer(np.array([1.0, 5.5, 6.5])),
np.array([0, 4, -1], dtype='int64'))
with pytest.raises(KeyError):
tree.get_indexer(np.array([3.0]))

def test_get_indexer_non_unique(self):
indexer, missing = self.tree.get_indexer_non_unique(

def test_get_loc(self, tree):
tm.assert_numpy_array_equal(tree.get_loc(1),
np.array([0], dtype='int64'))
tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)),
np.array([0, 1], dtype='int64'))
with pytest.raises(KeyError):
tree.get_loc(-1)

def test_get_indexer(self, tree):
tm.assert_numpy_array_equal(
tree.get_indexer(np.array([1.0, 5.5, 6.5])),
np.array([0, 4, -1], dtype='int64'))
with pytest.raises(KeyError):
tree.get_indexer(np.array([3.0]))

def test_get_indexer_non_unique(self, tree):
indexer, missing = tree.get_indexer_non_unique(
np.array([1.0, 2.0, 6.5]))
tm.assert_numpy_array_equal(indexer[:1],
np.array([0], dtype='int64'))
Expand All @@ -51,8 +52,9 @@ def test_get_indexer_non_unique(self):
np.array([-1], dtype='int64'))
tm.assert_numpy_array_equal(missing, np.array([2], dtype='int64'))

def test_duplicates(self):
tree = IntervalTree([0, 0, 0], [1, 1, 1])
def test_duplicates(self, dtype):
left = np.array([0, 0, 0], dtype=dtype)
tree = IntervalTree(left, left + 1)
tm.assert_numpy_array_equal(np.sort(tree.get_loc(0.5)),
np.array([0, 1, 2], dtype='int64'))

Expand Down