Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support implicit array conversion with query-planning enabled #15378

Merged
merged 12 commits into from
Apr 5, 2024
31 changes: 31 additions & 0 deletions python/dask_cudf/dask_cudf/expr/_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,34 @@ class Index(DXIndex):
get_collection_type.register(cudf.DataFrame, lambda _: DataFrame)
get_collection_type.register(cudf.Series, lambda _: Series)
get_collection_type.register(cudf.BaseIndex, lambda _: Index)


##
## Support conversion to GPU-backed Array collections
##


try:
from dask_expr._backends import create_array_collection

@get_collection_type.register_lazy("cupy")
def _register_cupy():
import cupy

@get_collection_type.register(cupy.ndarray)
def get_collection_type_cupy_array(_):
return create_array_collection

@get_collection_type.register_lazy("cupyx")
def _register_cupyx():
# Needed for cuml
from cupyx.scipy.sparse import spmatrix

@get_collection_type.register(spmatrix)
def get_collection_type_csr_matrix(_):
return create_array_collection

except ImportError:
# Older version of dask-expr.
# Implicit conversion to array wont work.
pass
34 changes: 34 additions & 0 deletions python/dask_cudf/dask_cudf/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,3 +913,37 @@ def test_categorical_dtype_round_trip():
actual = ds.compute()
expected = pds.compute()
assert actual.dtype.ordered == expected.dtype.ordered


def test_implicit_array_conversion_cupy():
s = cudf.Series(range(10))
ds = dask_cudf.from_cudf(s, npartitions=2)

def func(x):
return x.values

# Need to compute the dask collection for now.
# See: https://github.com/dask/dask/issues/11017
result = ds.map_partitions(func, meta=s.values).compute()
expect = func(s)

dask.array.assert_eq(result, expect)


def test_implicit_array_conversion_cupy_sparse():
cupyx = pytest.importorskip("cupyx")

s = cudf.Series(range(10), dtype="float32")
ds = dask_cudf.from_cudf(s, npartitions=2)

def func(x):
return cupyx.scipy.sparse.csr_matrix(x.values)

# Need to compute the dask collection for now.
# See: https://github.com/dask/dask/issues/11017
result = ds.map_partitions(func, meta=s.values).compute()
expect = func(s)

# NOTE: The calculation here doesn't need to make sense.
# We just need to make sure we get the right type back.
assert type(result) == type(expect)
Loading