Skip to content

Commit

Permalink
CLN: address TODOs, FIXMEs (#44258)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Nov 1, 2021
1 parent cb83977 commit e8d3136
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 13 deletions.
30 changes: 28 additions & 2 deletions pandas/_libs/join.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,9 @@ def left_join_indexer_unique(
ndarray[numeric_object_t] left,
ndarray[numeric_object_t] right
):
"""
Both left and right are strictly monotonic increasing.
"""
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[intp_t] indexer
Expand Down Expand Up @@ -311,6 +314,9 @@ def left_join_indexer_unique(
def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
"""
Two-pass algorithm for monotonic indexes. Handles many-to-one merges.
Both left and right are monotonic increasing, but at least one of them
is non-unique (if both were unique we'd use left_join_indexer_unique).
"""
cdef:
Py_ssize_t i, j, k, nright, nleft, count
Expand All @@ -321,6 +327,7 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
nleft = len(left)
nright = len(right)

# First pass is to find the size 'count' of our output indexers.
i = 0
j = 0
count = 0
Expand All @@ -334,6 +341,8 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
rval = right[j]

if lval == rval:
# This block is identical across
# left_join_indexer, inner_join_indexer, outer_join_indexer
count += 1
if i < nleft - 1:
if j < nright - 1 and right[j + 1] == rval:
Expand Down Expand Up @@ -398,12 +407,14 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
# end of the road
break
elif lval < rval:
# i.e. lval not in right; we keep for left_join_indexer
lindexer[count] = i
rindexer[count] = -1
result[count] = left[i]
result[count] = lval
count += 1
i += 1
else:
# i.e. rval not in left; we discard for left_join_indexer
j += 1

return result, lindexer, rindexer
Expand All @@ -414,6 +425,8 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
"""
Two-pass algorithm for monotonic indexes. Handles many-to-one merges.
Both left and right are monotonic increasing but not necessarily unique.
"""
cdef:
Py_ssize_t i, j, k, nright, nleft, count
Expand All @@ -424,6 +437,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
nleft = len(left)
nright = len(right)

# First pass is to find the size 'count' of our output indexers.
i = 0
j = 0
count = 0
Expand Down Expand Up @@ -453,8 +467,10 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
# end of the road
break
elif lval < rval:
# i.e. lval not in right; we discard for inner_indexer
i += 1
else:
# i.e. rval not in left; we discard for inner_indexer
j += 1

# do it again now that result size is known
Expand All @@ -478,7 +494,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
if lval == rval:
lindexer[count] = i
rindexer[count] = j
result[count] = rval
result[count] = lval
count += 1
if i < nleft - 1:
if j < nright - 1 and right[j + 1] == rval:
Expand All @@ -495,8 +511,10 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
# end of the road
break
elif lval < rval:
# i.e. lval not in right; we discard for inner_indexer
i += 1
else:
# i.e. rval not in left; we discard for inner_indexer
j += 1

return result, lindexer, rindexer
Expand All @@ -505,6 +523,9 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
@cython.wraparound(False)
@cython.boundscheck(False)
def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
"""
Both left and right are monotonic increasing but not necessarily unique.
"""
cdef:
Py_ssize_t i, j, nright, nleft, count
numeric_object_t lval, rval
Expand All @@ -514,6 +535,9 @@ def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
nleft = len(left)
nright = len(right)

# First pass is to find the size 'count' of our output indexers.
# count will be length of left plus the number of elements of right not in
# left (counting duplicates)
i = 0
j = 0
count = 0
Expand Down Expand Up @@ -616,12 +640,14 @@ def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
# end of the road
break
elif lval < rval:
# i.e. lval not in right; we keep for outer_join_indexer
lindexer[count] = i
rindexer[count] = -1
result[count] = lval
count += 1
i += 1
else:
# i.e. rval not in left; we keep for outer_join_indexer
lindexer[count] = -1
rindexer[count] = j
result[count] = rval
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil:
@cython.wraparound(False)
@cython.boundscheck(False)
def get_start_end_field(const int64_t[:] dtindex, str field,
object freqstr=None, int month_kw=12):
str freqstr=None, int month_kw=12):
"""
Given an int64-based datetime index return array of indicators
of whether timestamps are at the start/end of the month/quarter/year
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/array_algos/putmask.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
import numpy as np

from pandas._libs import lib
from pandas._typing import ArrayLike
from pandas._typing import (
ArrayLike,
npt,
)

from pandas.core.dtypes.cast import (
convert_scalar_for_putitemlike,
Expand All @@ -26,13 +29,14 @@
from pandas.core.arrays import ExtensionArray


def putmask_inplace(values: ArrayLike, mask: np.ndarray, value: Any) -> None:
def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any) -> None:
"""
ExtensionArray-compatible implementation of np.putmask. The main
difference is we do not handle repeating or truncating like numpy.
Parameters
----------
values: np.ndarray or ExtensionArray
mask : np.ndarray[bool]
We assume extract_bool_array has already been called.
value : Any
Expand All @@ -51,6 +55,7 @@ def putmask_inplace(values: ArrayLike, mask: np.ndarray, value: Any) -> None:
)
):
# GH#19266 using np.putmask gives unexpected results with listlike value
# along with object dtype
if is_list_like(value) and len(value) == len(values):
values[mask] = value[mask]
else:
Expand Down
1 change: 0 additions & 1 deletion pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1259,7 +1259,6 @@ def __from_arrow__(
return IntervalArray._concat_same_type(results)

def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
# NB: this doesn't handle checking for closed match
if not all(isinstance(x, IntervalDtype) for x in dtypes):
return None

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,14 @@ def is_scalar_indexer(indexer, ndim: int) -> bool:
return False


def is_empty_indexer(indexer, arr_value: np.ndarray) -> bool:
def is_empty_indexer(indexer, arr_value: ArrayLike) -> bool:
"""
Check if we have an empty indexer.
Parameters
----------
indexer : object
arr_value : np.ndarray
arr_value : np.ndarray or ExtensionArray
Returns
-------
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3123,7 +3123,9 @@ def _union(self, other: Index, sort):
and not (self.has_duplicates and other.has_duplicates)
and self._can_use_libjoin
):
# Both are unique and monotonic, so can use outer join
# Both are monotonic and at least one is unique, so can use outer join
# (actually don't need either unique, but without this restriction
# test_union_same_value_duplicated_in_both fails)
try:
return self._outer_indexer(other)[0]
except (TypeError, IncompatibleFrequency):
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -918,7 +918,7 @@ def setitem(self, indexer, value):
check_setitem_lengths(indexer, value, values)

if is_empty_indexer(indexer, arr_value):
# GH#8669 empty indexers
# GH#8669 empty indexers, test_loc_setitem_boolean_mask_allfalse
pass

elif is_scalar_indexer(indexer, self.ndim):
Expand Down Expand Up @@ -1698,7 +1698,7 @@ def putmask(self, mask, new) -> list[Block]:
mask = extract_bool_array(mask)

if not self._can_hold_element(new):
return self.astype(_dtype_obj).putmask(mask, new)
return self.coerce_to_target_dtype(new).putmask(mask, new)

arr = self.values
arr.T.putmask(mask, new)
Expand Down Expand Up @@ -1755,7 +1755,9 @@ def fillna(
# We support filling a DatetimeTZ with a `value` whose timezone
# is different by coercing to object.
# TODO: don't special-case td64
return self.astype(_dtype_obj).fillna(value, limit, inplace, downcast)
return self.coerce_to_target_dtype(value).fillna(
value, limit, inplace, downcast
)

values = self.values
values = values if inplace else values.copy()
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -2075,7 +2075,7 @@ def test_td64arr_div_numeric_array(
with pytest.raises(TypeError, match=pattern):
vector.astype(object) / tdser

def test_td64arr_mul_int_series(self, box_with_array, names, request):
def test_td64arr_mul_int_series(self, box_with_array, names):
# GH#19042 test for correct name attachment
box = box_with_array
exname = get_expected_name(box, names)
Expand Down

0 comments on commit e8d3136

Please sign in to comment.