Skip to content

Commit

Permalink
Merge pull request #6092 from gmarkall/grm-array-like-additions
Browse files Browse the repository at this point in the history
CUDA: Add mapped_array_like and pinned_array_like
  • Loading branch information
sklam committed Sep 8, 2020
2 parents 9adc4a2 + 7733f69 commit 46f663c
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 97 deletions.
2 changes: 2 additions & 0 deletions docs/source/cuda-reference/memory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ Memory Management
.. autofunction:: numba.cuda.device_array
.. autofunction:: numba.cuda.device_array_like
.. autofunction:: numba.cuda.pinned_array
.. autofunction:: numba.cuda.pinned_array_like
.. autofunction:: numba.cuda.mapped_array
.. autofunction:: numba.cuda.mapped_array_like
.. autofunction:: numba.cuda.pinned
.. autofunction:: numba.cuda.mapped

Expand Down
15 changes: 15 additions & 0 deletions docs/source/cuda/memory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,21 @@ Pinned memory
:noindex:
.. autofunction:: numba.cuda.pinned_array
:noindex:
.. autofunction:: numba.cuda.pinned_array_like
:noindex:


Mapped memory
=============

.. autofunction:: numba.cuda.mapped
:noindex:
.. autofunction:: numba.cuda.mapped_array
:noindex:
.. autofunction:: numba.cuda.mapped_array_like
:noindex:



Streams
=======
Expand Down
64 changes: 50 additions & 14 deletions numba/cuda/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ def device_array(shape, dtype=np.float, strides=None, order='C', stream=0):
def pinned_array(shape, dtype=np.float, strides=None, order='C'):
"""pinned_array(shape, dtype=np.float, strides=None, order='C')
Allocate a np.ndarray with a buffer that is pinned (pagelocked).
Similar to np.empty().
Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
(pagelocked). Similar to :func:`np.empty() <numpy.empty>`.
"""
shape, strides, dtype = _prepare_shape_strides_dtype(shape, strides, dtype,
order)
Expand Down Expand Up @@ -222,23 +222,22 @@ def _fill_stride_by_order(shape, dtype, order):
return tuple(strides)


def device_array_like(ary, stream=0):
"""Call cuda.devicearray() with information from the array.
def _contiguous_strides_like_array(ary):
"""
Given an array, compute strides for a new contiguous array of the same
shape.
"""
# Avoid attempting to recompute strides if the default strides will be
# sufficient to create a contiguous array.
if ary.flags['C_CONTIGUOUS'] or ary.ndim <= 1:
return device_array(shape=ary.shape, dtype=ary.dtype, stream=stream)
elif ary.flags['F_CONTIGUOUS']:
return device_array(shape=ary.shape, dtype=ary.dtype, order='F',
stream=stream)
# Don't recompute strides if the default strides will be sufficient to
# create a contiguous array.
if ary.flags['C_CONTIGUOUS'] or ary.flags['F_CONTIGUOUS'] or ary.ndim <= 1:
return None

# Otherwise, we need to compute new strides using an algorithm adapted from
# NumPy v1.17.4's PyArray_NewLikeArrayWithShape in
# core/src/multiarray/ctors.c. We permute the strides in ascending order
# then compute the stride for the dimensions with the same permutation.

# Stride permuation. E.g. a stride array (4, -2, 12) becomes
# Stride permutation. E.g. a stride array (4, -2, 12) becomes
# [(1, -2), (0, 4), (2, 12)]
strideperm = [ x for x in enumerate(ary.strides) ]
strideperm.sort(key = lambda x: x[1])
Expand All @@ -249,10 +248,47 @@ def device_array_like(ary, stream=0):
for i_perm, _ in strideperm:
strides[i_perm] = stride
stride *= ary.shape[i_perm]
strides = tuple(strides)
return tuple(strides)


def _order_like_array(ary):
if ary.flags['F_CONTIGUOUS'] and not ary.flags['C_CONTIGUOUS']:
return 'F'
else:
return 'C'


def device_array_like(ary, stream=0):
"""
Call :func:`device_array() <numba.cuda.device_array>` with information from
the array.
"""
strides = _contiguous_strides_like_array(ary)
order = _order_like_array(ary)
return device_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
stream=stream)
order=order, stream=stream)


def mapped_array_like(ary, stream=0, portable=False, wc=False):
"""
Call :func:`mapped_array() <numba.cuda.mapped_array>` with the information
from the array.
"""
strides = _contiguous_strides_like_array(ary)
order = _order_like_array(ary)
return mapped_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
order=order, stream=stream, portable=portable, wc=wc)


def pinned_array_like(ary):
"""
Call :func:`pinned_array() <numba.cuda.pinned_array>` with the information
from the array.
"""
strides = _contiguous_strides_like_array(ary)
order = _order_like_array(ary)
return pinned_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
order=order)


# Stream helper
Expand Down
2 changes: 1 addition & 1 deletion numba/cuda/simulator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .api import *
from .reduction import Reduce
from .cudadrv.devicearray import (device_array, device_array_like, pinned,
pinned_array, to_device, auto_device)
pinned_array, pinned_array_like, to_device, auto_device)
from .cudadrv import devicearray
from .cudadrv.devices import require_context, gpus
from .cudadrv.devices import get_context as current_context
Expand Down
46 changes: 32 additions & 14 deletions numba/cuda/simulator/cudadrv/devicearray.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,35 +277,53 @@ def device_array(*args, **kwargs):
return FakeCUDAArray(np.ndarray(*args, **kwargs), stream=stream)


def device_array_like(ary, stream=0):
# Avoid attempting to recompute strides if the default strides will be
# sufficient to create a contiguous array.
if ary.flags['C_CONTIGUOUS'] or ary.ndim <= 1:
return FakeCUDAArray(np.ndarray(shape=ary.shape, dtype=ary.dtype))
elif ary.flags['F_CONTIGUOUS']:
return FakeCUDAArray(np.ndarray(shape=ary.shape, dtype=ary.dtype,
order='F'))
def _contiguous_strides_like_array(ary):
"""
Given an array, compute strides for a new contiguous array of the same
shape.
"""
# Don't recompute strides if the default strides will be sufficient to
# create a contiguous array.
if ary.flags['C_CONTIGUOUS'] or ary.flags['F_CONTIGUOUS'] or ary.ndim <= 1:
return None

# Otherwise, we need to compute new strides using an algorithm adapted from
# NumPy's v1.17.4's PyArray_NewLikeArrayWithShape in
# NumPy v1.17.4's PyArray_NewLikeArrayWithShape in
# core/src/multiarray/ctors.c. We permute the strides in ascending order
# then compute the stride for the dimensions with the same permutation.

# Stride permuation. E.g. a stride array (4, -2, 12) becomes
# Stride permutation. E.g. a stride array (4, -2, 12) becomes
# [(1, -2), (0, 4), (2, 12)]
strideperm = [x for x in enumerate(ary.strides)]
strideperm.sort(key=lambda x: x[1])
strideperm = [ x for x in enumerate(ary.strides) ]
strideperm.sort(key = lambda x: x[1])

# Compute new strides using permutation
strides = [0] * len(ary.strides)
stride = ary.dtype.itemsize
for i_perm, _ in strideperm:
strides[i_perm] = stride
stride *= ary.shape[i_perm]
strides = tuple(strides)
return tuple(strides)

return FakeCUDAArray(np.ndarray(shape=ary.shape, dtype=ary.dtype, strides=strides))

def _order_like_array(ary):
if ary.flags['F_CONTIGUOUS'] and not ary.flags['C_CONTIGUOUS']:
return 'F'
else:
return 'C'


def device_array_like(ary, stream=0):
strides = _contiguous_strides_like_array(ary)
order = _order_like_array(ary)
return device_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
order=order)

def pinned_array_like(ary):
strides = _contiguous_strides_like_array(ary)
order = _order_like_array(ary)
return pinned_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
order=order)

def auto_device(ary, stream=0, copy=True):
if isinstance(ary, FakeCUDAArray):
Expand Down

0 comments on commit 46f663c

Please sign in to comment.