New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add GPU tests for DLPack support #59
Closed
Closed
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
9893755
dlpack: Add support for DLPack
dalcinl 7338c11
dlpack: Add exhaustive tests for DLPack interface
dalcinl 4acb6bd
simplify a bit DLPack support and add comment
leofang 237359e
add DLPackCuPy arrayimpl
leofang 47669e0
test if __dlpack_device__ is defined
leofang 442fcb9
rename GPUBuf to CAIBuf for clarity
leofang 79f64d5
rename DLPackBuf to DLPackCPUBuf for clarity
leofang 0780e63
add simple message tests via DLPack GPU support
leofang 0d955ae
fix DLPackCuPy tests
leofang File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
#------------------------------------------------------------------------------ | ||
# Below is dlpack.h (as of v0.6) | ||
|
||
cdef extern from * nogil: | ||
ctypedef unsigned char uint8_t | ||
ctypedef unsigned short uint16_t | ||
ctypedef signed long long int64_t | ||
ctypedef unsigned long long uint64_t | ||
|
||
ctypedef enum DLDeviceType: | ||
kDLCPU = 1 | ||
kDLCUDA = 2 | ||
kDLCUDAHost = 3 | ||
kDLOpenCL = 4 | ||
kDLVulkan = 7 | ||
kDLMetal = 8 | ||
kDLVPI = 9 | ||
kDLROCM = 10 | ||
kDLROCMHost = 11 | ||
kDLExtDev = 12 | ||
kDLCUDAManaged = 13 | ||
|
||
ctypedef struct DLDevice: | ||
DLDeviceType device_type | ||
int device_id | ||
|
||
ctypedef enum DLDataTypeCode: | ||
kDLInt = 0 | ||
kDLUInt = 1 | ||
kDLFloat = 2 | ||
kDLOpaqueHandle = 3 | ||
kDLBfloat = 4 | ||
kDLComplex = 5 | ||
|
||
ctypedef struct DLDataType: | ||
uint8_t code | ||
uint8_t bits | ||
uint16_t lanes | ||
|
||
ctypedef struct DLTensor: | ||
void *data | ||
DLDevice device | ||
int ndim | ||
DLDataType dtype | ||
int64_t *shape | ||
int64_t *strides | ||
uint64_t byte_offset | ||
|
||
ctypedef struct DLManagedTensor: | ||
DLTensor dl_tensor | ||
void *manager_ctx | ||
void (*deleter)(DLManagedTensor *) | ||
|
||
#------------------------------------------------------------------------------ | ||
|
||
cdef extern from "Python.h": | ||
void* PyCapsule_GetPointer(object, const char[]) except? NULL | ||
int PyCapsule_SetName(object, const char[]) except -1 | ||
int PyCapsule_IsValid(object, const char[]) | ||
|
||
#------------------------------------------------------------------------------ | ||
|
||
cdef inline int dlpack_is_contig(const DLTensor *dltensor, char order) nogil: | ||
cdef int i, ndim = dltensor.ndim | ||
cdef int64_t *shape = dltensor.shape | ||
cdef int64_t *strides = dltensor.strides | ||
cdef int64_t start, step, index, size = 1 | ||
if strides == NULL: | ||
if ndim > 1 and order == c'F': | ||
return 0 | ||
return 1 | ||
if order == c'F': | ||
start = 0 | ||
step = 1 | ||
else: | ||
start = ndim - 1 | ||
step = -1 | ||
for i from 0 <= i < ndim: | ||
index = start + step * i | ||
if size != strides[index]: | ||
return 0 | ||
size *= shape[index] | ||
return 1 | ||
|
||
cdef inline int dlpack_check_shape(const DLTensor *dltensor) except -1: | ||
cdef int i, ndim = dltensor.ndim | ||
if ndim < 0: | ||
raise BufferError("dlpack: number of dimensions is negative") | ||
if ndim > 0 and dltensor.shape == NULL: | ||
raise BufferError("dlpack: shape is NULL") | ||
for i from 0 <= i < ndim: | ||
if dltensor.shape[i] < 0: | ||
raise BufferError("dlpack: shape item is negative") | ||
if dltensor.strides != NULL: | ||
for i from 0 <= i < ndim: | ||
if dltensor.strides[i] < 0: | ||
raise BufferError("dlpack: strides item is negative") | ||
return 0 | ||
|
||
cdef inline int dlpack_check_contig(const DLTensor *dltensor) except -1: | ||
if dltensor.strides == NULL: return 0 | ||
if dlpack_is_contig(dltensor, c'C'): return 0 | ||
if dlpack_is_contig(dltensor, c'F'): return 0 | ||
raise BufferError("dlpack: buffer is not contiguous") | ||
|
||
cdef inline void *dlpack_get_data(const DLTensor *dltensor) nogil: | ||
return <char*> dltensor.data + dltensor.byte_offset | ||
|
||
cdef inline Py_ssize_t dlpack_get_size(const DLTensor *dltensor) nogil: | ||
cdef int i, ndim = dltensor.ndim | ||
cdef int64_t *shape = dltensor.shape | ||
cdef Py_ssize_t bits = dltensor.dtype.bits | ||
cdef Py_ssize_t lanes = dltensor.dtype.lanes | ||
cdef Py_ssize_t size = 1 | ||
for i from 0 <= i < ndim: | ||
size *= <Py_ssize_t> shape[i] | ||
size *= (bits * lanes + 7) // 8 | ||
return size | ||
|
||
cdef inline char *dlpack_get_format(const DLTensor *dltensor) nogil: | ||
cdef unsigned int code = dltensor.dtype.code | ||
cdef unsigned int bits = dltensor.dtype.bits | ||
if dltensor.dtype.lanes != 1: return BYTE_FMT | ||
if code == kDLInt: | ||
if bits == 1*8: return b"i1" | ||
if bits == 2*8: return b"i2" | ||
if bits == 4*8: return b"i4" | ||
if bits == 8*8: return b"i8" | ||
if code == kDLUInt: | ||
if bits == 1*8: return b"u1" | ||
if bits == 2*8: return b"u2" | ||
if bits == 4*8: return b"u4" | ||
if bits == 8*8: return b"u8" | ||
if code == kDLFloat: | ||
if bits == 2*8: return b"f2" | ||
if bits == 4*8: return b"f4" | ||
if bits == 8*8: return b"f8" | ||
if bits == 12*8: return b"f12" | ||
if bits == 16*8: return b"f16" | ||
if code == kDLComplex: | ||
if bits == 4*8: return b"c4" | ||
if bits == 8*8: return b"c8" | ||
if bits == 16*8: return b"c16" | ||
if bits == 24*8: return b"c24" | ||
if bits == 32*8: return b"c32" | ||
return BYTE_FMT | ||
|
||
cdef inline Py_ssize_t dlpack_get_itemsize(const DLTensor *dltensor) nogil: | ||
if dltensor.dtype.lanes != 1: return 1 | ||
return (dltensor.dtype.bits + 7) // 8 | ||
|
||
#------------------------------------------------------------------------------ | ||
|
||
cdef int Py_CheckDLPackBuffer(object obj): | ||
try: return <bint>hasattr(obj, '__dlpack__') | ||
except: return 0 | ||
|
||
cdef int Py_GetDLPackBuffer(object obj, Py_buffer *view, int flags) except -1: | ||
cdef object dlpack | ||
cdef object dlpack_device | ||
cdef int device_type | ||
cdef int device_id | ||
cdef object capsule | ||
cdef DLManagedTensor *managed | ||
cdef const DLTensor *dltensor | ||
cdef void *buf | ||
cdef Py_ssize_t size | ||
cdef bint readonly | ||
cdef bint fixnull | ||
|
||
try: | ||
dlpack = obj.__dlpack__ | ||
dlpack_device = obj.__dlpack_device__ | ||
except AttributeError: | ||
raise NotImplementedError("dlpack: missing support") | ||
|
||
device_type, device_id = dlpack_device() | ||
if device_type == kDLCPU: | ||
capsule = dlpack() | ||
else: | ||
capsule = dlpack(stream=-1) | ||
if not PyCapsule_IsValid(capsule, b"dltensor"): | ||
raise BufferError("dlpack: invalid capsule object") | ||
|
||
managed = <DLManagedTensor*> PyCapsule_GetPointer(capsule, b"dltensor") | ||
dltensor = &managed.dl_tensor | ||
|
||
try: | ||
dlpack_check_shape(dltensor) | ||
dlpack_check_contig(dltensor) | ||
|
||
buf = dlpack_get_data(dltensor) | ||
size = dlpack_get_size(dltensor) | ||
readonly = 0 | ||
|
||
fixnull = (buf == NULL and size == 0) | ||
if fixnull: buf = &fixnull | ||
PyBuffer_FillInfo(view, obj, buf, size, readonly, flags) | ||
if fixnull: view.buf = NULL | ||
|
||
if (flags & PyBUF_FORMAT) == PyBUF_FORMAT: | ||
view.format = dlpack_get_format(dltensor) | ||
if view.format != BYTE_FMT: | ||
view.itemsize = dlpack_get_itemsize(dltensor) | ||
finally: | ||
if managed.deleter != NULL: | ||
managed.deleter(managed) | ||
PyCapsule_SetName(capsule, b"used_dltensor") | ||
del capsule | ||
return 0 | ||
|
||
#------------------------------------------------------------------------------ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why
stream=-1
and notstream=stream
? Is it because of the previoussynchronize()
call?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
-1
comes from the Consumer, which is mpi4py. The previoussynchronize()
is just mimicking users doing the right job (recall the change we did withas_raw()
in #60).