Skip to content

Commit

Permalink
Use fused types for _take_2d (pandas-dev#22917)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Oct 5, 2018
1 parent ee27fab commit d523d9f
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 70 deletions.
1 change: 0 additions & 1 deletion pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Template for each `dtype` helper function using 1-d template

# 1-d template
- map_indices
- pad
- pad_1d
- pad_2d
Expand Down
9 changes: 0 additions & 9 deletions pandas/_libs/algos_rank_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,8 @@ dtypes = [('object', 'object', 'Infinity()', 'NegInfinity()'),

@cython.wraparound(False)
@cython.boundscheck(False)
{{if dtype == 'object'}}


def rank_1d_{{dtype}}(object in_arr, ties_method='average',
ascending=True, na_option='keep', pct=False):
{{else}}


def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
na_option='keep', pct=False):
{{endif}}
"""
Fast NaN-friendly version of scipy.stats.rankdata
"""
Expand Down
36 changes: 21 additions & 15 deletions pandas/_libs/algos_take_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -260,33 +260,39 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,

{{endfor}}

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# take_2d internal function
#----------------------------------------------------------------------
# ----------------------------------------------------------------------

{{py:

# dtype, ctype, init_result
dtypes = [('float64', 'float64_t', 'np.empty_like(values)'),
('uint64', 'uint64_t', 'np.empty_like(values)'),
('object', 'object', 'values.copy()'),
('int64', 'int64_t', 'np.empty_like(values)')]
}}
ctypedef fused take_t:
float64_t
uint64_t
int64_t
object

{{for dtype, ctype, init_result in dtypes}}

cdef _take_2d_{{dtype}}(ndarray[{{ctype}}, ndim=2] values, object idx):
cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
cdef:
Py_ssize_t i, j, N, K
ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx
ndarray[{{ctype}}, ndim=2] result
ndarray[take_t, ndim=2] result
object val

N, K = (<object> values).shape
result = {{init_result}}

if take_t is object:
# evaluated at compile-time
result = values.copy()
else:
result = np.empty_like(values)

for i in range(N):
for j in range(K):
result[i, j] = values[i, indexer[i, j]]
return result

{{endfor}}

_take_2d_object = _take_2d[object]
_take_2d_float64 = _take_2d[float64_t]
_take_2d_int64 = _take_2d[int64_t]
_take_2d_uint64 = _take_2d[uint64_t]
44 changes: 22 additions & 22 deletions pandas/_libs/join_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,21 @@ def asof_join_backward_{{on_dtype}}_by_{{by_dtype}}(

# find last position in right whose value is less than left's
if allow_exact_matches:
while right_pos < right_size and\
right_values[right_pos] <= left_values[left_pos]:
while (right_pos < right_size and
right_values[right_pos] <= left_values[left_pos]):
hash_table.set_item(right_by_values[right_pos], right_pos)
right_pos += 1
else:
while right_pos < right_size and\
right_values[right_pos] < left_values[left_pos]:
while (right_pos < right_size and
right_values[right_pos] < left_values[left_pos]):
hash_table.set_item(right_by_values[right_pos], right_pos)
right_pos += 1
right_pos -= 1

# save positions as the desired index
by_value = left_by_values[left_pos]
found_right_pos = hash_table.get_item(by_value)\
if by_value in hash_table else -1
found_right_pos = (hash_table.get_item(by_value)
if by_value in hash_table else -1)
left_indexer[left_pos] = left_pos
right_indexer[left_pos] = found_right_pos

Expand Down Expand Up @@ -133,21 +133,21 @@ def asof_join_forward_{{on_dtype}}_by_{{by_dtype}}(

# find first position in right whose value is greater than left's
if allow_exact_matches:
while right_pos >= 0 and\
right_values[right_pos] >= left_values[left_pos]:
while (right_pos >= 0 and
right_values[right_pos] >= left_values[left_pos]):
hash_table.set_item(right_by_values[right_pos], right_pos)
right_pos -= 1
else:
while right_pos >= 0 and\
right_values[right_pos] > left_values[left_pos]:
while (right_pos >= 0 and
right_values[right_pos] > left_values[left_pos]):
hash_table.set_item(right_by_values[right_pos], right_pos)
right_pos -= 1
right_pos += 1

# save positions as the desired index
by_value = left_by_values[left_pos]
found_right_pos = hash_table.get_item(by_value)\
if by_value in hash_table else -1
found_right_pos = (hash_table.get_item(by_value)
if by_value in hash_table else -1)
left_indexer[left_pos] = left_pos
right_indexer[left_pos] = found_right_pos

Expand Down Expand Up @@ -259,12 +259,12 @@ def asof_join_backward_{{on_dtype}}(

# find last position in right whose value is less than left's
if allow_exact_matches:
while right_pos < right_size and\
right_values[right_pos] <= left_values[left_pos]:
while (right_pos < right_size and
right_values[right_pos] <= left_values[left_pos]):
right_pos += 1
else:
while right_pos < right_size and\
right_values[right_pos] < left_values[left_pos]:
while (right_pos < right_size and
right_values[right_pos] < left_values[left_pos]):
right_pos += 1
right_pos -= 1

Expand Down Expand Up @@ -313,19 +313,19 @@ def asof_join_forward_{{on_dtype}}(

# find first position in right whose value is greater than left's
if allow_exact_matches:
while right_pos >= 0 and\
right_values[right_pos] >= left_values[left_pos]:
while (right_pos >= 0 and
right_values[right_pos] >= left_values[left_pos]):
right_pos -= 1
else:
while right_pos >= 0 and\
right_values[right_pos] > left_values[left_pos]:
while (right_pos >= 0 and
right_values[right_pos] > left_values[left_pos]):
right_pos -= 1
right_pos += 1

# save positions as the desired index
left_indexer[left_pos] = left_pos
right_indexer[left_pos] = right_pos\
if right_pos != right_size else -1
right_indexer[left_pos] = (right_pos
if right_pos != right_size else -1)

# if needed, verify that tolerance is met
if has_tolerance and right_pos != right_size:
Expand Down
61 changes: 38 additions & 23 deletions pandas/_libs/join_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,42 +4,30 @@ Template for each `dtype` helper function for join
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# left_join_indexer, inner_join_indexer, outer_join_indexer
#----------------------------------------------------------------------
# ----------------------------------------------------------------------

{{py:

# name, c_type, dtype
dtypes = [('float64', 'float64_t', 'np.float64'),
('float32', 'float32_t', 'np.float32'),
('object', 'object', 'object'),
('int32', 'int32_t', 'np.int32'),
('int64', 'int64_t', 'np.int64'),
('uint64', 'uint64_t', 'np.uint64')]

def get_dispatch(dtypes):

for name, c_type, dtype in dtypes:
yield name, c_type, dtype

}}
ctypedef fused join_t:
float64_t
float32_t
object
int32_t
int64_t
uint64_t

{{for name, c_type, dtype in get_dispatch(dtypes)}}

# Joins on ordered, unique indices

# right might contain non-unique values


@cython.wraparound(False)
@cython.boundscheck(False)
def left_join_indexer_unique_{{name}}(ndarray[{{c_type}}] left,
ndarray[{{c_type}}] right):
def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right):
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t] indexer
{{c_type}} lval, rval
join_t lval, rval

i = 0
j = 0
Expand Down Expand Up @@ -78,6 +66,33 @@ def left_join_indexer_unique_{{name}}(ndarray[{{c_type}}] left,
return indexer


left_join_indexer_unique_float64 = left_join_indexer_unique["float64_t"]
left_join_indexer_unique_float32 = left_join_indexer_unique["float32_t"]
left_join_indexer_unique_object = left_join_indexer_unique["object"]
left_join_indexer_unique_int32 = left_join_indexer_unique["int32_t"]
left_join_indexer_unique_int64 = left_join_indexer_unique["int64_t"]
left_join_indexer_unique_uint64 = left_join_indexer_unique["uint64_t"]


{{py:

# name, c_type, dtype
dtypes = [('float64', 'float64_t', 'np.float64'),
('float32', 'float32_t', 'np.float32'),
('object', 'object', 'object'),
('int32', 'int32_t', 'np.int32'),
('int64', 'int64_t', 'np.int64'),
('uint64', 'uint64_t', 'np.uint64')]

def get_dispatch(dtypes):

for name, c_type, dtype in dtypes:
yield name, c_type, dtype

}}

{{for name, c_type, dtype in get_dispatch(dtypes)}}

# @cython.wraparound(False)
# @cython.boundscheck(False)
def left_join_indexer_{{name}}(ndarray[{{c_type}}] left,
Expand Down

0 comments on commit d523d9f

Please sign in to comment.