rapidsai · charlesbluca · Aug 27, 2021 · Aug 27, 2021 · Aug 31, 2021 · Aug 31, 2021
diff --git a/dask_sql/physical/utils/sort.py b/dask_sql/physical/utils/sort.py
@@ -3,7 +3,12 @@
 import dask.dataframe as dd
 import pandas as pd
 
-from dask_sql.utils import make_pickable_without_dask_sql, new_temporary_column
+from dask_sql.utils import make_pickable_without_dask_sql
+
+try:
+    import dask_cudf
+except ImportError:
+    dask_cudf = None
 
 
 def apply_sort(
@@ -12,6 +17,22 @@ def apply_sort(
     sort_ascending: List[bool],
     sort_null_first: List[bool],
 ) -> dd.DataFrame:
+    # Try fast path for multi-column sorting before falling back to
+    # sort_partition_func.  Tools like dask-cudf have a limited but fast
+    # multi-column sort implementation.  We check if any sorting/null sorting
+    # is required.  If so, we fall back to default sorting implementation
+    if (
+        dask_cudf is not None
+        and isinstance(df, dask_cudf.DataFrame)
+        and all(sort_ascending)
+        and not any(sort_null_first)
+    ):
+        try:
+            df = df.sort_values(sort_columns, ignore_index=True)
+            return df.persist()
 return df.persist() 
 return df.persist() 
+        except ValueError:
+            pass
+
     # Split the first column. We need to handle this one with set_index
     first_sort_column = sort_columns[0]
     first_sort_ascending = sort_ascending[0]

diff --git a/tests/integration/fixtures.py b/tests/integration/fixtures.py
@@ -9,6 +9,11 @@
 from dask.distributed import Client
 from pandas.testing import assert_frame_equal
 
+try:
+    import dask_cudf
+except ImportError:
+    dask_cudf = None
+
 
 @pytest.fixture()
 def timeseries_df(c):
@@ -117,6 +122,9 @@ def c(
     for df_name, df in dfs.items():
         dask_df = dd.from_pandas(df, npartitions=3)
         c.create_table(df_name, dask_df)
+        if dask_cudf is not None:
+            cudf_df = dask_cudf.from_dask_dataframe(dask_df)
+            c.create_table("cudf_" + df_name, cudf_df)
 
     yield c
 

diff --git a/tests/integration/test_dask_cudf.py b/tests/integration/test_dask_cudf.py
@@ -0,0 +1,30 @@
+import pytest
+
+pytest.importorskip("dask_cudf")
+
+from cudf.testing._utils import assert_eq
+
+
+def test_cudf_order_by(c):
+    df = c.sql(
+        """
+    SELECT
+        *
+    FROM cudf_user_table_1
+    ORDER BY user_id
+    """
+    ).compute()
+
+    expected_df = (
+        c.sql(
+            """
+    SELECT
+        *
+    FROM cudf_user_table_1
+    """
+        )
+        .sort_values(by="user_id", ignore_index=True)
+        .compute()
+    )
+
+    assert_eq(df, expected_df)
diff --git a/tests/integration/test_show.py b/tests/integration/test_show.py
@@ -2,6 +2,11 @@
 import pytest
 from pandas.testing import assert_frame_equal
 
+try:
+    import dask_cudf
+except ImportError:
+    dask_cudf = None
+
 
 def test_schemas(c):
     df = c.sql("SHOW SCHEMAS")
@@ -36,6 +41,27 @@ def test_tables(c):
                 "string_table",
                 "datetime_table",
             ]
+            if dask_cudf is None
+            else [
+                "df_simple",
+                "cudf_df_simple",
+                "df",
+                "cudf_df",
+                "user_table_1",
+                "cudf_user_table_1",
+                "user_table_2",
+                "cudf_user_table_2",
+                "long_table",
+                "cudf_long_table",
+                "user_table_inf",
+                "cudf_user_table_inf",
+                "user_table_nan",
+                "cudf_user_table_nan",
+                "string_table",
+                "cudf_string_table",
+                "datetime_table",
+                "cudf_datetime_table",
+            ]
         }
     )