Skip to content

Commit

Permalink
FIX/#285
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Jul 16, 2022
1 parent bbf97f0 commit bffda9d
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions opteryx/third_party/pyarrow_ops/helpers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import numpy


Expand All @@ -8,7 +9,13 @@ def groupify_array(arr):
# - 2. Count per unique
# - 3. Sort index
# - 4. Begin index per unique
dic, counts = numpy.unique(arr, return_counts=True, equal_nan=True)

# ADDED FOR OPTERYX
# Python 3.7 doesn't support equal_nan
if sys.version_info <= (3, 7):
dic, counts = numpy.unique(arr, return_counts=True)
else:
dic, counts = numpy.unique(arr, return_counts=True, equal_nan=True)
sort_idx = numpy.argsort(arr)
return dic, counts, sort_idx, [0] + numpy.cumsum(counts)[:-1].tolist()

Expand Down Expand Up @@ -38,8 +45,11 @@ def columns_to_array(table, columns):
)
# not sure why - but this cannot be a generator
return numpy.array(
[numpy.nan if (el != el) or (el is None) else el for el in column_values]
) # nosemgrep
[
numpy.nan if (el != el) or (el is None) else el # nosemgrep
for el in column_values
]
)

values = (c.to_numpy() for c in table.select(columns).itercolumns())
return numpy.array(list(map(_hash, zip(*values))))

0 comments on commit bffda9d

Please sign in to comment.