Use fused types for _take_2d (pandas-dev#22917)

jreback · Oct 5, 2018 · d523d9f · d523d9f
1 parent ee27fab
commit d523d9f
Show file tree

Hide file tree

Showing 5 changed files with 81 additions and 70 deletions.
diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
@@ -2,7 +2,6 @@
 Template for each `dtype` helper function using 1-d template
 
 # 1-d template
-- map_indices
 - pad
 - pad_1d
 - pad_2d

diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in
@@ -24,17 +24,8 @@ dtypes = [('object', 'object', 'Infinity()', 'NegInfinity()'),
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-{{if dtype == 'object'}}
-
-
 def rank_1d_{{dtype}}(object in_arr, ties_method='average',
                       ascending=True, na_option='keep', pct=False):
-{{else}}
-
-
-def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
-                      na_option='keep', pct=False):
-{{endif}}
     """
     Fast NaN-friendly version of scipy.stats.rankdata
     """

diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in
@@ -260,33 +260,39 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
 
 {{endfor}}
 
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 # take_2d internal function
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 
-{{py:
-
-# dtype, ctype, init_result
-dtypes = [('float64', 'float64_t', 'np.empty_like(values)'),
-          ('uint64', 'uint64_t', 'np.empty_like(values)'),
-          ('object', 'object', 'values.copy()'),
-          ('int64', 'int64_t', 'np.empty_like(values)')]
-}}
+ctypedef fused take_t:
+    float64_t
+    uint64_t
+    int64_t
+    object
 
-{{for dtype, ctype, init_result in dtypes}}
 
-cdef _take_2d_{{dtype}}(ndarray[{{ctype}}, ndim=2] values, object idx):
+cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
     cdef:
         Py_ssize_t i, j, N, K
         ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx
-        ndarray[{{ctype}}, ndim=2] result
+        ndarray[take_t, ndim=2] result
         object val
 
     N, K = (<object> values).shape
-    result = {{init_result}}
+
+    if take_t is object:
+        # evaluated at compile-time
+        result = values.copy()
+    else:
+        result = np.empty_like(values)
+
     for i in range(N):
         for j in range(K):
             result[i, j] = values[i, indexer[i, j]]
     return result
 
-{{endfor}}
+
+_take_2d_object = _take_2d[object]
+_take_2d_float64 = _take_2d[float64_t]
+_take_2d_int64 = _take_2d[int64_t]
+_take_2d_uint64 = _take_2d[uint64_t]
diff --git a/pandas/_libs/join_func_helper.pxi.in b/pandas/_libs/join_func_helper.pxi.in
@@ -68,21 +68,21 @@ def asof_join_backward_{{on_dtype}}_by_{{by_dtype}}(
 
         # find last position in right whose value is less than left's
         if allow_exact_matches:
-            while right_pos < right_size and\
-                right_values[right_pos] <= left_values[left_pos]:
+            while (right_pos < right_size and
+                   right_values[right_pos] <= left_values[left_pos]):
                 hash_table.set_item(right_by_values[right_pos], right_pos)
                 right_pos += 1
         else:
-            while right_pos < right_size and\
-                right_values[right_pos] < left_values[left_pos]:
+            while (right_pos < right_size and
+                   right_values[right_pos] < left_values[left_pos]):
                 hash_table.set_item(right_by_values[right_pos], right_pos)
                 right_pos += 1
         right_pos -= 1
 
         # save positions as the desired index
         by_value = left_by_values[left_pos]
-        found_right_pos = hash_table.get_item(by_value)\
-                          if by_value in hash_table else -1
+        found_right_pos = (hash_table.get_item(by_value)
+                           if by_value in hash_table else -1)
         left_indexer[left_pos] = left_pos
         right_indexer[left_pos] = found_right_pos
 
@@ -133,21 +133,21 @@ def asof_join_forward_{{on_dtype}}_by_{{by_dtype}}(
 
         # find first position in right whose value is greater than left's
         if allow_exact_matches:
-            while right_pos >= 0 and\
-                right_values[right_pos] >= left_values[left_pos]:
+            while (right_pos >= 0 and
+                   right_values[right_pos] >= left_values[left_pos]):
                 hash_table.set_item(right_by_values[right_pos], right_pos)
                 right_pos -= 1
         else:
-            while right_pos >= 0 and\
-                right_values[right_pos] > left_values[left_pos]:
+            while (right_pos >= 0 and
+                   right_values[right_pos] > left_values[left_pos]):
                 hash_table.set_item(right_by_values[right_pos], right_pos)
                 right_pos -= 1
         right_pos += 1
 
         # save positions as the desired index
         by_value = left_by_values[left_pos]
-        found_right_pos = hash_table.get_item(by_value)\
-                          if by_value in hash_table else -1
+        found_right_pos = (hash_table.get_item(by_value)
+                           if by_value in hash_table else -1)
         left_indexer[left_pos] = left_pos
         right_indexer[left_pos] = found_right_pos
 
@@ -259,12 +259,12 @@ def asof_join_backward_{{on_dtype}}(
 
         # find last position in right whose value is less than left's
         if allow_exact_matches:
-            while right_pos < right_size and\
-                right_values[right_pos] <= left_values[left_pos]:
+            while (right_pos < right_size and
+                   right_values[right_pos] <= left_values[left_pos]):
                 right_pos += 1
         else:
-            while right_pos < right_size and\
-                right_values[right_pos] < left_values[left_pos]:
+            while (right_pos < right_size and
+                   right_values[right_pos] < left_values[left_pos]):
                 right_pos += 1
         right_pos -= 1
 
@@ -313,19 +313,19 @@ def asof_join_forward_{{on_dtype}}(
 
         # find first position in right whose value is greater than left's
         if allow_exact_matches:
-            while right_pos >= 0 and\
-                right_values[right_pos] >= left_values[left_pos]:
+            while (right_pos >= 0 and
+                   right_values[right_pos] >= left_values[left_pos]):
                 right_pos -= 1
         else:
-            while right_pos >= 0 and\
-                right_values[right_pos] > left_values[left_pos]:
+            while (right_pos >= 0 and
+                   right_values[right_pos] > left_values[left_pos]):
                 right_pos -= 1
         right_pos += 1
 
         # save positions as the desired index
         left_indexer[left_pos] = left_pos
-        right_indexer[left_pos] = right_pos\
-                                  if right_pos != right_size else -1
+        right_indexer[left_pos] = (right_pos
+                                   if right_pos != right_size else -1)
 
         # if needed, verify that tolerance is met
         if has_tolerance and right_pos != right_size:

diff --git a/pandas/_libs/join_helper.pxi.in b/pandas/_libs/join_helper.pxi.in
@@ -4,42 +4,30 @@ Template for each `dtype` helper function for join
 WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 """
 
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 # left_join_indexer, inner_join_indexer, outer_join_indexer
-#----------------------------------------------------------------------
+# ----------------------------------------------------------------------
 
-{{py:
-
-# name, c_type, dtype
-dtypes = [('float64', 'float64_t', 'np.float64'),
-          ('float32', 'float32_t', 'np.float32'),
-          ('object', 'object', 'object'),
-          ('int32', 'int32_t', 'np.int32'),
-          ('int64', 'int64_t', 'np.int64'),
-          ('uint64', 'uint64_t', 'np.uint64')]
-
-def get_dispatch(dtypes):
-
-    for name, c_type, dtype in dtypes:
-        yield name, c_type, dtype
-
-}}
+ctypedef fused join_t:
+    float64_t
+    float32_t
+    object
+    int32_t
+    int64_t
+    uint64_t
 
-{{for name, c_type, dtype in get_dispatch(dtypes)}}
 
 # Joins on ordered, unique indices
 
 # right might contain non-unique values
 
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def left_join_indexer_unique_{{name}}(ndarray[{{c_type}}] left,
-                                      ndarray[{{c_type}}] right):
+def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right):
     cdef:
         Py_ssize_t i, j, nleft, nright
         ndarray[int64_t] indexer
-        {{c_type}} lval, rval
+        join_t lval, rval
 
     i = 0
     j = 0
@@ -78,6 +66,33 @@ def left_join_indexer_unique_{{name}}(ndarray[{{c_type}}] left,
     return indexer
 
 
+left_join_indexer_unique_float64 = left_join_indexer_unique["float64_t"]
+left_join_indexer_unique_float32 = left_join_indexer_unique["float32_t"]
+left_join_indexer_unique_object = left_join_indexer_unique["object"]
+left_join_indexer_unique_int32 = left_join_indexer_unique["int32_t"]
+left_join_indexer_unique_int64 = left_join_indexer_unique["int64_t"]
+left_join_indexer_unique_uint64 = left_join_indexer_unique["uint64_t"]
+
+
+{{py:
+
+# name, c_type, dtype
+dtypes = [('float64', 'float64_t', 'np.float64'),
+          ('float32', 'float32_t', 'np.float32'),
+          ('object', 'object', 'object'),
+          ('int32', 'int32_t', 'np.int32'),
+          ('int64', 'int64_t', 'np.int64'),
+          ('uint64', 'uint64_t', 'np.uint64')]
+
+def get_dispatch(dtypes):
+
+    for name, c_type, dtype in dtypes:
+        yield name, c_type, dtype
+
+}}
+
+{{for name, c_type, dtype in get_dispatch(dtypes)}}
+
 # @cython.wraparound(False)
 # @cython.boundscheck(False)
 def left_join_indexer_{{name}}(ndarray[{{c_type}}] left,