CLN: address TODOs, FIXMEs (#44258)

pandas-dev · Nov 1, 2021 · e8d3136 · e8d3136
1 parent cb83977
commit e8d3136
Show file tree

Hide file tree

Showing 8 changed files with 47 additions and 13 deletions.
diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
@@ -264,6 +264,9 @@ def left_join_indexer_unique(
     ndarray[numeric_object_t] left,
     ndarray[numeric_object_t] right
 ):
+    """
+    Both left and right are strictly monotonic increasing.
+    """
     cdef:
         Py_ssize_t i, j, nleft, nright
         ndarray[intp_t] indexer
@@ -311,6 +314,9 @@ def left_join_indexer_unique(
 def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
     """
     Two-pass algorithm for monotonic indexes. Handles many-to-one merges.
+
+    Both left and right are monotonic increasing, but at least one of them
+    is non-unique (if both were unique we'd use left_join_indexer_unique).
     """
     cdef:
         Py_ssize_t i, j, k, nright, nleft, count
@@ -321,6 +327,7 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
     nleft = len(left)
     nright = len(right)
 
+    # First pass is to find the size 'count' of our output indexers.
     i = 0
     j = 0
     count = 0
@@ -334,6 +341,8 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
             rval = right[j]
 
             if lval == rval:
+                # This block is identical across
+                #  left_join_indexer, inner_join_indexer, outer_join_indexer
                 count += 1
                 if i < nleft - 1:
                     if j < nright - 1 and right[j + 1] == rval:
@@ -398,12 +407,14 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
                     # end of the road
                     break
             elif lval < rval:
+                # i.e. lval not in right; we keep for left_join_indexer
                 lindexer[count] = i
                 rindexer[count] = -1
-                result[count] = left[i]
+                result[count] = lval
                 count += 1
                 i += 1
             else:
+                # i.e. rval not in left; we discard for left_join_indexer
                 j += 1
 
     return result, lindexer, rindexer
@@ -414,6 +425,8 @@ def left_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
 def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
     """
     Two-pass algorithm for monotonic indexes. Handles many-to-one merges.
+
+    Both left and right are monotonic increasing but not necessarily unique.
     """
     cdef:
         Py_ssize_t i, j, k, nright, nleft, count
@@ -424,6 +437,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
     nleft = len(left)
     nright = len(right)
 
+    # First pass is to find the size 'count' of our output indexers.
     i = 0
     j = 0
     count = 0
@@ -453,8 +467,10 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
                     # end of the road
                     break
             elif lval < rval:
+                # i.e. lval not in right; we discard for inner_indexer
                 i += 1
             else:
+                # i.e. rval not in left; we discard for inner_indexer
                 j += 1
 
     # do it again now that result size is known
@@ -478,7 +494,7 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
             if lval == rval:
                 lindexer[count] = i
                 rindexer[count] = j
-                result[count] = rval
+                result[count] = lval
                 count += 1
                 if i < nleft - 1:
                     if j < nright - 1 and right[j + 1] == rval:
@@ -495,8 +511,10 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
                     # end of the road
                     break
             elif lval < rval:
+                # i.e. lval not in right; we discard for inner_indexer
                 i += 1
             else:
+                # i.e. rval not in left; we discard for inner_indexer
                 j += 1
 
     return result, lindexer, rindexer
@@ -505,6 +523,9 @@ def inner_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t] right):
+    """
+    Both left and right are monotonic increasing but not necessarily unique.
+    """
     cdef:
         Py_ssize_t i, j, nright, nleft, count
         numeric_object_t lval, rval
@@ -514,6 +535,9 @@ def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
     nleft = len(left)
     nright = len(right)
 
+    # First pass is to find the size 'count' of our output indexers.
+    # count will be length of left plus the number of elements of right not in
+    # left (counting duplicates)
     i = 0
     j = 0
     count = 0
@@ -616,12 +640,14 @@ def outer_join_indexer(ndarray[numeric_object_t] left, ndarray[numeric_object_t]
                     # end of the road
                     break
             elif lval < rval:
+                # i.e. lval not in right; we keep for outer_join_indexer
                 lindexer[count] = i
                 rindexer[count] = -1
                 result[count] = lval
                 count += 1
                 i += 1
             else:
+                # i.e. rval not in left; we keep for outer_join_indexer
                 lindexer[count] = -1
                 rindexer[count] = j
                 result[count] = rval

diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx
@@ -198,7 +198,7 @@ cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil:
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def get_start_end_field(const int64_t[:] dtindex, str field,
-                        object freqstr=None, int month_kw=12):
+                        str freqstr=None, int month_kw=12):
     """
     Given an int64-based datetime index return array of indicators
     of whether timestamps are at the start/end of the month/quarter/year

diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py
@@ -9,7 +9,10 @@
 import numpy as np
 
 from pandas._libs import lib
-from pandas._typing import ArrayLike
+from pandas._typing import (
+    ArrayLike,
+    npt,
+)
 
 from pandas.core.dtypes.cast import (
     convert_scalar_for_putitemlike,
@@ -26,13 +29,14 @@
 from pandas.core.arrays import ExtensionArray
 
 
-def putmask_inplace(values: ArrayLike, mask: np.ndarray, value: Any) -> None:
+def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any) -> None:
     """
     ExtensionArray-compatible implementation of np.putmask.  The main
     difference is we do not handle repeating or truncating like numpy.
 
     Parameters
     ----------
+    values: np.ndarray or ExtensionArray
     mask : np.ndarray[bool]
         We assume extract_bool_array has already been called.
     value : Any
@@ -51,6 +55,7 @@ def putmask_inplace(values: ArrayLike, mask: np.ndarray, value: Any) -> None:
         )
     ):
         # GH#19266 using np.putmask gives unexpected results with listlike value
+        #  along with object dtype
         if is_list_like(value) and len(value) == len(values):
             values[mask] = value[mask]
         else:

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -1259,7 +1259,6 @@ def __from_arrow__(
         return IntervalArray._concat_same_type(results)
 
     def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
-        # NB: this doesn't handle checking for closed match
         if not all(isinstance(x, IntervalDtype) for x in dtypes):
             return None
 

diff --git a/pandas/core/indexers/utils.py b/pandas/core/indexers/utils.py
@@ -104,14 +104,14 @@ def is_scalar_indexer(indexer, ndim: int) -> bool:
     return False
 
 
-def is_empty_indexer(indexer, arr_value: np.ndarray) -> bool:
+def is_empty_indexer(indexer, arr_value: ArrayLike) -> bool:
     """
     Check if we have an empty indexer.
 
     Parameters
     ----------
     indexer : object
-    arr_value : np.ndarray
+    arr_value : np.ndarray or ExtensionArray
 
     Returns
     -------

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -3123,7 +3123,9 @@ def _union(self, other: Index, sort):
             and not (self.has_duplicates and other.has_duplicates)
             and self._can_use_libjoin
         ):
-            # Both are unique and monotonic, so can use outer join
+            # Both are monotonic and at least one is unique, so can use outer join
+            #  (actually don't need either unique, but without this restriction
+            #  test_union_same_value_duplicated_in_both fails)
             try:
                 return self._outer_indexer(other)[0]
             except (TypeError, IncompatibleFrequency):

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -918,7 +918,7 @@ def setitem(self, indexer, value):
         check_setitem_lengths(indexer, value, values)
 
         if is_empty_indexer(indexer, arr_value):
-            # GH#8669 empty indexers
+            # GH#8669 empty indexers, test_loc_setitem_boolean_mask_allfalse
             pass
 
         elif is_scalar_indexer(indexer, self.ndim):
@@ -1698,7 +1698,7 @@ def putmask(self, mask, new) -> list[Block]:
         mask = extract_bool_array(mask)
 
         if not self._can_hold_element(new):
-            return self.astype(_dtype_obj).putmask(mask, new)
+            return self.coerce_to_target_dtype(new).putmask(mask, new)
 
         arr = self.values
         arr.T.putmask(mask, new)
@@ -1755,7 +1755,9 @@ def fillna(
             # We support filling a DatetimeTZ with a `value` whose timezone
             #  is different by coercing to object.
             # TODO: don't special-case td64
-            return self.astype(_dtype_obj).fillna(value, limit, inplace, downcast)
+            return self.coerce_to_target_dtype(value).fillna(
+                value, limit, inplace, downcast
+            )
 
         values = self.values
         values = values if inplace else values.copy()

diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
@@ -2075,7 +2075,7 @@ def test_td64arr_div_numeric_array(
         with pytest.raises(TypeError, match=pattern):
             vector.astype(object) / tdser
 
-    def test_td64arr_mul_int_series(self, box_with_array, names, request):
+    def test_td64arr_mul_int_series(self, box_with_array, names):
         # GH#19042 test for correct name attachment
         box = box_with_array
         exname = get_expected_name(box, names)