Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CUDA: Add mapped_array_like and pinned_array_like #6092

Merged
merged 1 commit into from
Sep 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/cuda-reference/memory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ Memory Management
.. autofunction:: numba.cuda.device_array
.. autofunction:: numba.cuda.device_array_like
.. autofunction:: numba.cuda.pinned_array
.. autofunction:: numba.cuda.pinned_array_like
.. autofunction:: numba.cuda.mapped_array
.. autofunction:: numba.cuda.mapped_array_like
.. autofunction:: numba.cuda.pinned
.. autofunction:: numba.cuda.mapped

Expand Down
15 changes: 15 additions & 0 deletions docs/source/cuda/memory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,21 @@ Pinned memory
:noindex:
.. autofunction:: numba.cuda.pinned_array
:noindex:
.. autofunction:: numba.cuda.pinned_array_like
:noindex:


Mapped memory
=============

.. autofunction:: numba.cuda.mapped
:noindex:
.. autofunction:: numba.cuda.mapped_array
:noindex:
.. autofunction:: numba.cuda.mapped_array_like
:noindex:



Streams
=======
Expand Down
64 changes: 50 additions & 14 deletions numba/cuda/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ def device_array(shape, dtype=np.float, strides=None, order='C', stream=0):
def pinned_array(shape, dtype=np.float, strides=None, order='C'):
"""pinned_array(shape, dtype=np.float, strides=None, order='C')

Allocate a np.ndarray with a buffer that is pinned (pagelocked).
Similar to np.empty().
Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
(pagelocked). Similar to :func:`np.empty() <numpy.empty>`.
"""
shape, strides, dtype = _prepare_shape_strides_dtype(shape, strides, dtype,
order)
Expand Down Expand Up @@ -227,23 +227,22 @@ def _fill_stride_by_order(shape, dtype, order):
return tuple(strides)


def device_array_like(ary, stream=0):
"""Call cuda.devicearray() with information from the array.
def _contiguous_strides_like_array(ary):
"""
Given an array, compute strides for a new contiguous array of the same
shape.
"""
# Avoid attempting to recompute strides if the default strides will be
# sufficient to create a contiguous array.
if ary.flags['C_CONTIGUOUS'] or ary.ndim <= 1:
return device_array(shape=ary.shape, dtype=ary.dtype, stream=stream)
elif ary.flags['F_CONTIGUOUS']:
return device_array(shape=ary.shape, dtype=ary.dtype, order='F',
stream=stream)
# Don't recompute strides if the default strides will be sufficient to
# create a contiguous array.
if ary.flags['C_CONTIGUOUS'] or ary.flags['F_CONTIGUOUS'] or ary.ndim <= 1:
return None

# Otherwise, we need to compute new strides using an algorithm adapted from
# NumPy v1.17.4's PyArray_NewLikeArrayWithShape in
# core/src/multiarray/ctors.c. We permute the strides in ascending order
# then compute the stride for the dimensions with the same permutation.

# Stride permuation. E.g. a stride array (4, -2, 12) becomes
# Stride permutation. E.g. a stride array (4, -2, 12) becomes
# [(1, -2), (0, 4), (2, 12)]
strideperm = [ x for x in enumerate(ary.strides) ]
strideperm.sort(key = lambda x: x[1])
Expand All @@ -254,10 +253,47 @@ def device_array_like(ary, stream=0):
for i_perm, _ in strideperm:
strides[i_perm] = stride
stride *= ary.shape[i_perm]
strides = tuple(strides)
return tuple(strides)


def _order_like_array(ary):
if ary.flags['F_CONTIGUOUS'] and not ary.flags['C_CONTIGUOUS']:
return 'F'
else:
return 'C'


def device_array_like(ary, stream=0):
"""
Call :func:`device_array() <numba.cuda.device_array>` with information from
the array.
"""
strides = _contiguous_strides_like_array(ary)
order = _order_like_array(ary)
return device_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
stream=stream)
order=order, stream=stream)


def mapped_array_like(ary, stream=0, portable=False, wc=False):
"""
Call :func:`mapped_array() <numba.cuda.mapped_array>` with the information
from the array.
"""
strides = _contiguous_strides_like_array(ary)
order = _order_like_array(ary)
return mapped_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
order=order, stream=stream, portable=portable, wc=wc)


def pinned_array_like(ary):
"""
Call :func:`pinned_array() <numba.cuda.pinned_array>` with the information
from the array.
"""
strides = _contiguous_strides_like_array(ary)
order = _order_like_array(ary)
return pinned_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
order=order)


# Stream helper
Expand Down
2 changes: 1 addition & 1 deletion numba/cuda/simulator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .api import *
from .reduction import Reduce
from .cudadrv.devicearray import (device_array, device_array_like, pinned,
pinned_array, to_device, auto_device)
pinned_array, pinned_array_like, to_device, auto_device)
from .cudadrv import devicearray
from .cudadrv.devices import require_context, gpus
from .cudadrv.devices import get_context as current_context
Expand Down
46 changes: 32 additions & 14 deletions numba/cuda/simulator/cudadrv/devicearray.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,35 +277,53 @@ def device_array(*args, **kwargs):
return FakeCUDAArray(np.ndarray(*args, **kwargs), stream=stream)


def device_array_like(ary, stream=0):
# Avoid attempting to recompute strides if the default strides will be
# sufficient to create a contiguous array.
if ary.flags['C_CONTIGUOUS'] or ary.ndim <= 1:
return FakeCUDAArray(np.ndarray(shape=ary.shape, dtype=ary.dtype))
elif ary.flags['F_CONTIGUOUS']:
return FakeCUDAArray(np.ndarray(shape=ary.shape, dtype=ary.dtype,
order='F'))
def _contiguous_strides_like_array(ary):
"""
Given an array, compute strides for a new contiguous array of the same
shape.
"""
# Don't recompute strides if the default strides will be sufficient to
# create a contiguous array.
if ary.flags['C_CONTIGUOUS'] or ary.flags['F_CONTIGUOUS'] or ary.ndim <= 1:
return None

# Otherwise, we need to compute new strides using an algorithm adapted from
# NumPy's v1.17.4's PyArray_NewLikeArrayWithShape in
# NumPy v1.17.4's PyArray_NewLikeArrayWithShape in
# core/src/multiarray/ctors.c. We permute the strides in ascending order
# then compute the stride for the dimensions with the same permutation.

# Stride permuation. E.g. a stride array (4, -2, 12) becomes
# Stride permutation. E.g. a stride array (4, -2, 12) becomes
# [(1, -2), (0, 4), (2, 12)]
strideperm = [x for x in enumerate(ary.strides)]
strideperm.sort(key=lambda x: x[1])
strideperm = [ x for x in enumerate(ary.strides) ]
strideperm.sort(key = lambda x: x[1])

# Compute new strides using permutation
strides = [0] * len(ary.strides)
stride = ary.dtype.itemsize
for i_perm, _ in strideperm:
strides[i_perm] = stride
stride *= ary.shape[i_perm]
strides = tuple(strides)
return tuple(strides)

return FakeCUDAArray(np.ndarray(shape=ary.shape, dtype=ary.dtype, strides=strides))

def _order_like_array(ary):
if ary.flags['F_CONTIGUOUS'] and not ary.flags['C_CONTIGUOUS']:
return 'F'
else:
return 'C'


def device_array_like(ary, stream=0):
strides = _contiguous_strides_like_array(ary)
order = _order_like_array(ary)
return device_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
order=order)

def pinned_array_like(ary):
strides = _contiguous_strides_like_array(ary)
order = _order_like_array(ary)
return pinned_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
order=order)

def auto_device(ary, stream=0, copy=True):
if isinstance(ary, FakeCUDAArray):
Expand Down