Update for thrust 1.17 and fixes to accommodate for cuDF Buffer refac…

…tor (#4871) Authors: - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - William Hicks (https://github.com/wphicks) - Corey J. Nolet (https://github.com/cjnolet) - Victor Lafargue (https://github.com/viclafargue) URL: #4871
rapidsai · Aug 25, 2022 · a825caa · a825caa
1 parent 9418e65
commit a825caa
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 27 deletions.
diff --git a/cpp/cmake/thirdparty/get_gputreeshap.cmake b/cpp/cmake/thirdparty/get_gputreeshap.cmake
@@ -68,4 +68,4 @@ function(find_and_configure_gputreeshap)
 
 endfunction()
 
-find_and_configure_gputreeshap(PINNED_TAG c78fe621e429117cbca45e7b23eb5c3b6280fa3a)
+find_and_configure_gputreeshap(PINNED_TAG 78e3548cda5e7c263d125e8c10e733ebf2c4ebbd)
diff --git a/cpp/src/decisiontree/batched-levelalgo/kernels/builder_kernels.cuh b/cpp/src/decisiontree/batched-levelalgo/kernels/builder_kernels.cuh
@@ -221,7 +221,7 @@ __global__ void excess_sample_with_replacement_kernel(
     // compute the adjacent differences according to the functor
     // TODO: Replace deprecated 'FlagHeads' with 'SubtractLeft' when it is available
     BlockAdjacentDifferenceT(temp_storage.diff)
-      .FlagHeads(mask, items, mask, CustomDifference<IdxT>());
+      .SubtractLeft(items, mask, CustomDifference<IdxT>(), mask[0]);
 
     __syncthreads();
 

diff --git a/python/cuml/common/array.py b/python/cuml/common/array.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,10 +18,11 @@
 import numpy as np
 import operator
 import nvtx
+import rmm
 
 # Temporarily disabled due to CUDA 11.0 issue
 # https://github.com/rapidsai/cuml/issues/4332
-# from rmm import DeviceBuffer
+from rmm import DeviceBuffer
 from cudf import DataFrame
 from cudf import Series
 from cudf.core.buffer import Buffer
@@ -31,6 +32,7 @@
 from cuml.common.memory_utils import _strides_to_order
 from cuml.common.memory_utils import class_with_cupy_rmm
 from numba import cuda
+from typing import Tuple
 
 
 @class_with_cupy_rmm(ignore_pattern=["serialize"])
@@ -137,26 +139,45 @@ def __init__(self,
 
         ary_interface = False
 
-        # Base class (Buffer) constructor call
         size, shape = _get_size_from_shape(shape, dtype)
 
         if not memview_construction and not detailed_construction:
-            # Convert to cupy array and manually specify the ptr, size and
-            # owner. This is to avoid the restriction on Buffer that requires
-            # all data be u8
             cupy_data = cp.asarray(data)
-            flattened_data = cupy_data.data.ptr
 
             # Size for Buffer is not the same as for cupy. Use nbytes
             size = cupy_data.nbytes
             owner = cupy_data if cupy_data.flags.owndata else data
-        else:
-            flattened_data = data
+            data = cupy_data
 
         self._index = index
-        super().__init__(data=flattened_data,
-                         owner=owner,
-                         size=size)
+
+        if hasattr(data, "__array_interface__"):
+            ary_interface = data.__array_interface__
+        elif hasattr(data, "__cuda_array_interface__"):
+            ary_interface = data.__cuda_array_interface__
+
+        # We no longer call the super init due to the refactor
+        if isinstance(data, int):
+            if size is None:
+                raise ValueError(
+                    "size must be specified when `data` is an integer"
+                )
+            if size < 0:
+                raise ValueError("size cannot be negative")
+            self._ptr = data
+            self._size = size
+            self._owner = owner
+
+        elif isinstance(data, DeviceBuffer) or isinstance(data, Buffer):
+            self._ptr = data.ptr
+            self._size = data.size
+            self._owner = data
+
+        elif ary_interface:
+            ptr = ary_interface["data"][0] or 0
+            self._ptr = ptr
+            self._size = size
+            self._owner = data
 
         # Post processing of meta data
         if detailed_construction:
@@ -165,16 +186,7 @@ def __init__(self,
             self.order = order
             self.strides = _order_to_strides(order, shape, dtype)
 
-        elif hasattr(data, "__array_interface__"):
-            ary_interface = data.__array_interface__
-
-        elif hasattr(data, "__cuda_array_interface__"):
-            ary_interface = data.__cuda_array_interface__
-
-        else:
-            raise TypeError("Unrecognized data type: %s" % str(type(data)))
-
-        if ary_interface:
+        elif ary_interface:
             self.shape = ary_interface['shape']
             self.dtype = np.dtype(ary_interface['typestr'])
             if ary_interface.get('strides', None) is None:
@@ -185,6 +197,9 @@ def __init__(self,
                 self.strides = ary_interface['strides']
                 self.order = _strides_to_order(self.strides, self.dtype)
 
+        else:
+            raise TypeError("Unrecognized data type: %s" % str(type(data)))
+
     # We use the index as a property to allow for validation/processing
     # in the future if needed
     @property
@@ -311,18 +326,49 @@ def to_output(self, output_type='cupy', output_dtype=None):
 
         return self
 
+    @nvtx.annotate(message="common.CumlArray.host_serialize", category="utils",
+                   domain="cuml_python")
+    def host_serialize(self):
+        header, frames = self.device_serialize()
+        header["writeable"] = len(frames) * (None,)
+        frames = [
+            f.to_host_array().data if c else memoryview(f)
+            for c, f in zip(header["is-cuda"], frames)
+        ]
+        return header, frames
+
     @nvtx.annotate(message="common.CumlArray.serialize", category="utils",
                    domain="cuml_python")
-    def serialize(self):
+    def serialize(self) -> Tuple[dict, list]:
         header, frames = super().serialize()
         header["constructor-kwargs"] = {
             "dtype": self.dtype.str,
             "shape": self.shape,
             "order": self.order,
         }
-        frames = [Buffer(f) for f in frames]
+        frames = [CumlArray(f) for f in frames]
         return header, frames
 
+    @nvtx.annotate(message="common.CumlArray.copy", category="utils",
+                   domain="cuml_python")
+    def copy(self) -> Buffer:
+        """
+        Create a new Buffer containing a copy of the data contained
+        in this Buffer.
+        """
+        from rmm._lib.device_buffer import copy_device_to_ptr
+
+        out = Buffer.empty(size=self.size)
+        copy_device_to_ptr(self.ptr, out.ptr, self.size)
+        return out
+
+    @nvtx.annotate(message="common.CumlArray.to_host_array", category="utils",
+                   domain="cuml_python")
+    def to_host_array(self):
+        data = np.empty((self.size,), "u1")
+        rmm._lib.device_buffer.copy_ptr_to_host(self.ptr, data)
+        return data
+
     @classmethod
     @nvtx.annotate(message="common.CumlArray.empty", category="utils",
                    domain="cuml_python")
@@ -420,7 +466,7 @@ def ones(cls,
 def _check_low_level_type(data):
     if isinstance(data, CumlArray):
         return False
-    elif not (
+    if not (
         hasattr(data, "__array_interface__")
         or hasattr(data, "__cuda_array_interface__")
         # Temporarily disabled due to CUDA 11.0 issue