diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 979dad6db0838..22f7104debf10 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -41,11 +41,13 @@ cdef inline bint is_definitely_invalid_key(object val): cpdef get_value_at(ndarray arr, object loc, object tz=None): + obj = util.get_value_at(arr, loc) + if arr.descr.type_num == NPY_DATETIME: - return Timestamp(util.get_value_at(arr, loc), tz=tz) + return Timestamp(obj, tz=tz) elif arr.descr.type_num == NPY_TIMEDELTA: - return Timedelta(util.get_value_at(arr, loc)) - return util.get_value_at(arr, loc) + return Timedelta(obj) + return obj # Don't populate hash tables in monotonic indexes larger than this @@ -102,6 +104,9 @@ cdef class IndexEngine: arr[loc] = value cpdef get_loc(self, object val): + cdef: + Py_ssize_t loc + if is_definitely_invalid_key(val): raise TypeError("'{val}' is an invalid key".format(val=val)) @@ -114,7 +119,7 @@ cdef class IndexEngine: loc = _bin_search(values, val) # .searchsorted(val, side='left') if loc >= len(values): raise KeyError(val) - if util.get_value_at(values, loc) != val: + if values[loc] != val: raise KeyError(val) return loc @@ -352,22 +357,22 @@ cdef Py_ssize_t _bin_search(ndarray values, object val) except -1: Py_ssize_t mid = 0, lo = 0, hi = len(values) - 1 object pval - if hi == 0 or (hi > 0 and val > util.get_value_at(values, hi)): + if hi == 0 or (hi > 0 and val > values[hi]): return len(values) while lo < hi: mid = (lo + hi) // 2 - pval = util.get_value_at(values, mid) + pval = values[mid] if val < pval: hi = mid elif val > pval: lo = mid + 1 else: - while mid > 0 and val == util.get_value_at(values, mid - 1): + while mid > 0 and val == values[mid - 1]: mid -= 1 return mid - if val <= util.get_value_at(values, mid): + if val <= values[mid]: return mid else: return mid + 1 @@ -387,13 +392,16 @@ cdef class DatetimeEngine(Int64Engine): return 'M8[ns]' def __contains__(self, object val): + cdef: + int64_t loc + if self.over_size_threshold and self.is_monotonic_increasing: if not self.is_unique: return self._get_loc_duplicates(val) values = self._get_index_values() conv = maybe_datetimelike_to_i8(val) loc = values.searchsorted(conv, side='left') - return util.get_value_at(values, loc) == conv + return values[loc] == conv self._ensure_mapping_populated() return maybe_datetimelike_to_i8(val) in self.mapping @@ -405,6 +413,8 @@ cdef class DatetimeEngine(Int64Engine): return algos.is_monotonic(values, timelike=True) cpdef get_loc(self, object val): + cdef: + int64_t loc if is_definitely_invalid_key(val): raise TypeError @@ -422,7 +432,7 @@ cdef class DatetimeEngine(Int64Engine): self._date_check_type(val) raise KeyError(val) - if loc == len(values) or util.get_value_at(values, loc) != conv: + if loc == len(values) or values[loc] != conv: raise KeyError(val) return loc diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 1c2f80b832201..a3a50644e58f3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -782,8 +782,16 @@ def generate_slices(const int64_t[:] labels, Py_ssize_t ngroups): return starts, ends -def indices_fast(object index, const int64_t[:] labels, list keys, +def indices_fast(ndarray index, const int64_t[:] labels, list keys, list sorted_labels): + """ + Parameters + ---------- + index : ndarray + labels : ndarray[int64] + keys : list + sorted_labels : list[ndarray[int64]] + """ cdef: Py_ssize_t i, j, k, lab, cur, start, n = len(labels) dict result = {} @@ -803,8 +811,7 @@ def indices_fast(object index, const int64_t[:] labels, list keys, if lab != -1: tup = PyTuple_New(k) for j in range(k): - val = util.get_value_at(keys[j], - sorted_labels[j][i - 1]) + val = keys[j][sorted_labels[j][i - 1]] PyTuple_SET_ITEM(tup, j, val) Py_INCREF(val) @@ -814,8 +821,7 @@ def indices_fast(object index, const int64_t[:] labels, list keys, tup = PyTuple_New(k) for j in range(k): - val = util.get_value_at(keys[j], - sorted_labels[j][n - 1]) + val = keys[j][sorted_labels[j][n - 1]] PyTuple_SET_ITEM(tup, j, val) Py_INCREF(val) result[tup] = index[start:] diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 34eb9412451c5..0eac0e94f0beb 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -121,7 +121,7 @@ cdef class Reducer: for i in range(self.nresults): if has_ndarray_labels: - name = util.get_value_at(labels, i) + name = labels[i] elif has_labels: # labels is an ExtensionArray name = labels[i] diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index e6edad656d430..94810369785d3 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -303,8 +303,8 @@ def get_flattened_iterator(comp_ids, ngroups, levels, labels): def get_indexer_dict(label_list, keys): - """ return a diction of {labels} -> {indexers} """ - shape = list(map(len, keys)) + """ return a dict of {labels} -> {indexers} """ + shape = [len(x) for x in keys] group_index = get_group_index(label_list, shape, sort=True, xnull=True) ngroups = (