diff --git a/test/arrayimpl.py b/test/arrayimpl.py index 5df8cd9f9..84ebf7b05 100644 --- a/test/arrayimpl.py +++ b/test/arrayimpl.py @@ -240,6 +240,47 @@ def size(self): return self.array.size +try: + import dlpackimpl as dlpack +except ImportError: + dlpack = None + +class BaseDLPackCPU(object): + + def __dlpack_device__(self): + return (dlpack.DLDeviceType.kDLCPU, 0) + + def __dlpack__(self, stream=None): + assert stream is None + capsule = dlpack.make_py_capsule(self.array) + return capsule + + def as_raw(self): + return self + + +if dlpack is not None and array is not None: + + @add_backend + class DLPackArray(BaseDLPackCPU, ArrayArray): + + backend = 'dlpack-array' + + def __init__(self, arg, typecode, shape=None): + super(DLPackArray, self).__init__(arg, typecode, shape) + + +if dlpack is not None and numpy is not None: + + @add_backend + class DLPackNumPy(BaseDLPackCPU, ArrayNumPy): + + backend = 'dlpack-numpy' + + def __init__(self, arg, typecode, shape=None): + super(DLPackNumPy, self).__init__(arg, typecode, shape) + + def typestr(typecode, itemsize): typestr = '' if sys.byteorder == 'little': diff --git a/test/dlpackimpl.py b/test/dlpackimpl.py new file mode 100644 index 000000000..9279e1261 --- /dev/null +++ b/test/dlpackimpl.py @@ -0,0 +1,230 @@ +import sys +import ctypes +try: + from enum import IntEnum +except ImportError: + IntEnum = object +if hasattr(sys, 'pypy_version_info'): + raise ImportError("unsupported on PyPy") + +class DLDeviceType(IntEnum): + kDLCPU = 1 + kDLCUDA = 2 + kDLCUDAHost = 3 + kDLOpenCL = 4 + kDLVulkan = 7 + kDLMetal = 8 + kDLVPI = 9 + kDLROCM = 10 + kDLROCMHost = 11 + kDLExtDev = 12 + kDLCUDAManaged = 13 + +class DLDevice(ctypes.Structure): + _fields_ = [ + ("device_type", ctypes.c_uint), + ("device_id", ctypes.c_int), + ] + +class DLDataTypeCode(IntEnum): + kDLInt = 0 + kDLUInt = 1 + kDLFloat = 2 + kDLOpaqueHandle = 3 + kDLBfloat = 4 + kDLComplex = 5 + +class DLDataType(ctypes.Structure): + _fields_ = [ + ("code", ctypes.c_uint8), + ("bits", ctypes.c_uint8), + ("lanes", ctypes.c_uint16), + ] + +class DLTensor(ctypes.Structure): + _fields_ = [ + ("data", ctypes.c_void_p), + ("device", DLDevice), + ("ndim", ctypes.c_int), + ("dtype", DLDataType), + ("shape", ctypes.POINTER(ctypes.c_int64)), + ("strides", ctypes.POINTER(ctypes.c_int64)), + ("byte_offset", ctypes.c_uint64), + ] + +DLManagedTensorDeleter = ctypes.CFUNCTYPE(None, ctypes.c_void_p) + +class DLManagedTensor(ctypes.Structure): + _fields_ = [ + ("dl_tensor", DLTensor), + ("manager_ctx", ctypes.c_void_p), + ("deleter", DLManagedTensorDeleter), +] + +pyapi = ctypes.pythonapi + +DLManagedTensor_p = ctypes.POINTER(DLManagedTensor) + +Py_IncRef = pyapi.Py_IncRef +Py_IncRef.restype = None +Py_IncRef.argtypes = [ctypes.py_object] + +Py_DecRef = pyapi.Py_DecRef +Py_DecRef.restype = None +Py_DecRef.argtypes = [ctypes.py_object] + +PyCapsule_Destructor = ctypes.CFUNCTYPE(None, ctypes.c_void_p) + +PyCapsule_New = pyapi.PyCapsule_New +PyCapsule_New.restype = ctypes.py_object +PyCapsule_New.argtypes = [ctypes.c_void_p, ctypes.c_char_p, PyCapsule_Destructor] + +PyCapsule_IsValid = pyapi.PyCapsule_IsValid +PyCapsule_IsValid.restype = ctypes.c_int +PyCapsule_IsValid.argtypes = [ctypes.py_object] + +PyCapsule_GetPointer = pyapi.PyCapsule_GetPointer +PyCapsule_GetPointer.restype = ctypes.c_void_p +PyCapsule_GetPointer.argtypes = [ctypes.py_object, ctypes.c_char_p] + +PyCapsule_SetContext = pyapi.PyCapsule_SetContext +PyCapsule_SetContext.restype = ctypes.c_int +PyCapsule_SetContext.argtypes = [ctypes.py_object, ctypes.c_void_p] + +PyCapsule_GetContext = pyapi.PyCapsule_GetContext +PyCapsule_GetContext.restype = ctypes.c_void_p +PyCapsule_GetContext.argtypes = [ctypes.py_object] + + +def make_dl_datatype(typecode, itemsize): + code = None + bits = itemsize * 8 + lanes = 1 + if typecode in "bhilqnp": + code = DLDataTypeCode.kDLInt + if typecode in "BHILQNP": + code = DLDataTypeCode.kDLUInt + if typecode in "efdg": + code = DLDataTypeCode.kDLFloat + if typecode in "FDG": + code = DLDataTypeCode.kDLComplex + if typecode == "G" and itemsize == 32: + code = DLDataTypeCode.kDLFloat + bits //= 2 + lanes *= 2 + datatype = DLDataType() + datatype.code = code + datatype.bits = bits + datatype.lanes = lanes + return datatype + + +def make_dl_shape(shape, order=None, strides=None): + null = ctypes.cast(0, ctypes.POINTER(ctypes.c_int64)) + if isinstance(shape, int): + shape = [shape] + ndim = len(shape) + if ndim == 0: + shape = null + strides = null + else: + shape = (ctypes.c_int64*ndim)(*shape) + if order == 'C': + size = 1 + strides = [] + for i in range(ndim-1, -1, -1): + strides.append(size) + size *= shape[i] + strides = (ctypes.c_int64*ndim)(*strides) + elif order == 'F': + size = 1 + strides = [] + for i in range(ndim): + strides.append(size) + size *= shape[i] + strides = (ctypes.c_int64*ndim)(*strides) + elif strides is not None: + strides = (ctypes.c_int64*ndim)(*strides) + else: + strides = null + return ndim, shape, strides + + +def make_dl_tensor(obj): + try: + data, size = obj.buffer_info() + typecode = obj.typecode + itemsize = obj.itemsize + except AttributeError: + data = obj.ctypes.data + size = obj.size + typecode = obj.dtype.char + itemsize = obj.itemsize + + device = DLDevice(DLDeviceType.kDLCPU, 0) + datatype = make_dl_datatype(typecode, itemsize) + ndim, shape, strides = make_dl_shape(size) + + dltensor = DLTensor() + dltensor.data = data if size > 0 else 0 + dltensor.device = device + dltensor.ndim = ndim + dltensor.dtype = datatype + dltensor.shape = shape + dltensor.strides = strides + dltensor.byte_offset = 0 + return dltensor + + +def make_dl_manager_ctx(obj): + py_obj = ctypes.py_object(obj) + if False: Py_IncRef(py_obj) + void_p = ctypes.c_void_p.from_buffer(py_obj) + return void_p + + +@DLManagedTensorDeleter +def dl_managed_tensor_deleter(void_p): + managed = ctypes.cast(void_p, DLManagedTensor_p) + manager_ctx = managed.contents.manager_ctx + py_obj = ctypes.cast(manager_ctx, ctypes.py_object) + if False: Py_DecRef(py_obj) + + +def make_dl_managed_tensor(obj): + managed = DLManagedTensor() + managed.dl_tensor = make_dl_tensor(obj) + managed.manager_ctx = make_dl_manager_ctx(obj) + managed.deleter = dl_managed_tensor_deleter + return managed + + +def make_py_context(context): + py_obj = ctypes.py_object(context) + Py_IncRef(py_obj) + context = ctypes.c_void_p.from_buffer(py_obj) + return ctypes.c_void_p(context.value) + + +@PyCapsule_Destructor +def py_capsule_destructor(void_p): + capsule = ctypes.cast(void_p, ctypes.py_object) + if PyCapsule_IsValid(capsule, b"dltensor"): + pointer = PyCapsule_GetPointer(capsule, b"dltensor") + managed = ctypes.cast(pointer, DLManagedTensor_p) + deleter = managed.contents.deleter + if deleter: + deleter(managed) + context = PyCapsule_GetContext(capsule) + managed = ctypes.cast(context, ctypes.py_object) + Py_DecRef(managed) + + +def make_py_capsule(managed): + if not isinstance(managed, DLManagedTensor): + managed = make_dl_managed_tensor(managed) + pointer = ctypes.pointer(managed) + capsule = PyCapsule_New(pointer, b"dltensor", py_capsule_destructor) + context = make_py_context(managed) + PyCapsule_SetContext(capsule, context) + return capsule diff --git a/test/test_msgspec.py b/test/test_msgspec.py index 8bd1f8207..dafeef8ab 100644 --- a/test/test_msgspec.py +++ b/test/test_msgspec.py @@ -41,21 +41,10 @@ # --- -class GPUBuf(object): +class BaseBuf(object): - def __init__(self, typecode, initializer, readonly=False): + def __init__(self, typecode, initializer): self._buf = array.array(typecode, initializer) - address = self._buf.buffer_info()[0] - typecode = self._buf.typecode - itemsize = self._buf.itemsize - self.__cuda_array_interface__ = dict( - version = 0, - data = (address, readonly), - typestr = typestr(typecode, itemsize), - shape = (len(self._buf), 1, 1), - strides = (itemsize,) * 3, - descr = [('', typestr(typecode, itemsize))], - ) def __eq__(self, other): return self._buf == other._buf @@ -72,6 +61,54 @@ def __getitem__(self, item): def __setitem__(self, item, value): self._buf[item] = value._buf +# --- + +try: + import dlpackimpl as dlpack +except ImportError: + dlpack = None + +class DLPackBuf(BaseBuf): + + def __init__(self, typecode, initializer): + super(DLPackBuf, self).__init__(typecode, initializer) + self.managed = dlpack.make_dl_managed_tensor(self._buf) + + def __del__(self): + self.managed = None + if not pypy and sys.getrefcount(self._buf) > 2: + raise RuntimeError('dlpack: possible reference leak') + + def __dlpack_device__(self): + device = self.managed.dl_tensor.device + return (device.device_type, device.device_id) + + def __dlpack__(self, stream=None): + managed = self.managed + if managed.dl_tensor.device.device_type == \ + dlpack.DLDeviceType.kDLCPU: + assert stream == None + capsule = dlpack.make_py_capsule(managed) + return capsule + +# --- + +class GPUBuf(BaseBuf): + + def __init__(self, typecode, initializer, readonly=False): + super(GPUBuf, self).__init__(typecode, initializer) + address = self._buf.buffer_info()[0] + typecode = self._buf.typecode + itemsize = self._buf.itemsize + self.__cuda_array_interface__ = dict( + version = 0, + data = (address, readonly), + typestr = typestr(typecode, itemsize), + shape = (len(self._buf), 1, 1), + strides = (itemsize,) * 3, + descr = [('', typestr(typecode, itemsize))], + ) + cupy_issue_2259 = False if cupy is not None: @@ -386,6 +423,15 @@ def testNotContiguous(self): Sendrecv, sbuf, rbuf) +@unittest.skipIf(array is None, 'array') +@unittest.skipIf(dlpack is None, 'dlpack') +class TestMessageSimpleDLPackBuf(unittest.TestCase, + BaseTestMessageSimpleArray): + + def array(self, typecode, initializer): + return DLPackBuf(typecode, initializer) + + @unittest.skipIf(array is None, 'array') class TestMessageSimpleGPUBuf(unittest.TestCase, BaseTestMessageSimpleArray): @@ -470,6 +516,125 @@ def testNotContiguous(self): # --- +@unittest.skipIf(array is None, 'array') +@unittest.skipIf(dlpack is None, 'dlpack') +class TestMessageDLPackBuf(unittest.TestCase): + + def testDevice(self): + buf = DLPackBuf('i', [0,1,2,3]) + buf.__dlpack_device__ = None + MPI.Get_address(buf) + buf.__dlpack_device__ = lambda: None + self.assertRaises(TypeError, MPI.Get_address, buf) + buf.__dlpack_device__ = lambda: (None, 0) + self.assertRaises(TypeError, MPI.Get_address, buf) + buf.__dlpack_device__ = lambda: (1, None) + self.assertRaises(TypeError, MPI.Get_address, buf) + buf.__dlpack_device__ = lambda: (1,) + self.assertRaises(ValueError, MPI.Get_address, buf) + buf.__dlpack_device__ = lambda: (1, 0, 1) + self.assertRaises(ValueError, MPI.Get_address, buf) + del buf.__dlpack_device__ + MPI.Get_address(buf) + + def testCapsule(self): + buf = DLPackBuf('i', [0,1,2,3]) + # + capsule = buf.__dlpack__() + MPI.Get_address(buf) + MPI.Get_address(buf) + del capsule + # + capsule = buf.__dlpack__() + retvals = [capsule] * 2 + buf.__dlpack__ = lambda *args, **kwargs: retvals.pop() + MPI.Get_address(buf) + self.assertRaises(BufferError, MPI.Get_address, buf) + del buf.__dlpack__ + del capsule + # + buf.__dlpack__ = lambda *args, **kwargs: None + self.assertRaises(BufferError, MPI.Get_address, buf) + del buf.__dlpack__ + + def testNdim(self): + buf = DLPackBuf('i', [0,1,2,3]) + dltensor = buf.managed.dl_tensor + # + for ndim in (2, 1, 0): + dltensor.ndim = ndim + MPI.Get_address(buf) + # + dltensor.ndim = -1 + self.assertRaises(BufferError, MPI.Get_address, buf) + # + del dltensor + + def testShape(self): + buf = DLPackBuf('i', [0,1,2,3]) + dltensor = buf.managed.dl_tensor + # + dltensor.ndim = 1 + dltensor.shape[0] = -1 + self.assertRaises(BufferError, MPI.Get_address, buf) + # + dltensor.ndim = 0 + dltensor.shape = None + MPI.Get_address(buf) + # + dltensor.ndim = 1 + dltensor.shape = None + self.assertRaises(BufferError, MPI.Get_address, buf) + # + del dltensor + + def testStrides(self): + buf = DLPackBuf('i', range(8)) + dltensor = buf.managed.dl_tensor + # + for order in ('C', 'F'): + dltensor.ndim, dltensor.shape, dltensor.strides = \ + dlpack.make_dl_shape([2, 2, 2], order=order) + MPI.Get_address(buf) + dltensor.strides[0] = -1 + self.assertRaises(BufferError, MPI.Get_address, buf) + # + del dltensor + + def testContiguous(self): + buf = DLPackBuf('i', range(8)) + dltensor = buf.managed.dl_tensor + # + dltensor.ndim, dltensor.shape, dltensor.strides = \ + dlpack.make_dl_shape([2, 2, 2], order='C') + s = dltensor.strides + strides = [s[i] for i in range(dltensor.ndim)] + s[0], s[1], s[2] = [strides[i] for i in [0, 1, 2]] + MPI.Get_address(buf) + s[0], s[1], s[2] = [strides[i] for i in [2, 1, 0]] + MPI.Get_address(buf) + s[0], s[1], s[2] = [strides[i] for i in [0, 2, 1]] + self.assertRaises(BufferError, MPI.Get_address, buf) + s[0], s[1], s[2] = [strides[i] for i in [1, 0, 2]] + self.assertRaises(BufferError, MPI.Get_address, buf) + del s + # + del dltensor + + def testByteOffset(self): + buf = DLPackBuf('B', [0,1,2,3]) + dltensor = buf.managed.dl_tensor + # + dltensor.ndim = 1 + for i in range(len(buf)): + dltensor.byte_offset = i + mem = MPI.memory(buf) + self.assertEqual(mem[0], buf[i]) + # + del dltensor + +# --- + @unittest.skipIf(array is None, 'array') class TestMessageGPUBufInterface(unittest.TestCase):