Merge pull request #6092 from gmarkall/grm-array-like-additions

CUDA: Add mapped_array_like and pinned_array_like
numba · Sep 8, 2020 · 46f663c · 46f663c
2 parents 9adc4a2 + 7733f69
commit 46f663c
Show file tree

Hide file tree

Showing 6 changed files with 209 additions and 97 deletions.
diff --git a/docs/source/cuda-reference/memory.rst b/docs/source/cuda-reference/memory.rst
@@ -5,7 +5,9 @@ Memory Management
 .. autofunction:: numba.cuda.device_array
 .. autofunction:: numba.cuda.device_array_like
 .. autofunction:: numba.cuda.pinned_array
+.. autofunction:: numba.cuda.pinned_array_like
 .. autofunction:: numba.cuda.mapped_array
+.. autofunction:: numba.cuda.mapped_array_like
 .. autofunction:: numba.cuda.pinned
 .. autofunction:: numba.cuda.mapped
 

diff --git a/docs/source/cuda/memory.rst b/docs/source/cuda/memory.rst
@@ -52,6 +52,21 @@ Pinned memory
    :noindex:
 .. autofunction:: numba.cuda.pinned_array
    :noindex:
+.. autofunction:: numba.cuda.pinned_array_like
+   :noindex:
+
+
+Mapped memory
+=============
+
+.. autofunction:: numba.cuda.mapped
+   :noindex:
+.. autofunction:: numba.cuda.mapped_array
+   :noindex:
+.. autofunction:: numba.cuda.mapped_array_like
+   :noindex:
+
+
 
 Streams
 =======

diff --git a/numba/cuda/api.py b/numba/cuda/api.py
@@ -125,8 +125,8 @@ def device_array(shape, dtype=np.float, strides=None, order='C', stream=0):
 def pinned_array(shape, dtype=np.float, strides=None, order='C'):
     """pinned_array(shape, dtype=np.float, strides=None, order='C')
 
-    Allocate a np.ndarray with a buffer that is pinned (pagelocked).
-    Similar to np.empty().
+    Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
+    (pagelocked).  Similar to :func:`np.empty() <numpy.empty>`.
     """
     shape, strides, dtype = _prepare_shape_strides_dtype(shape, strides, dtype,
                                                          order)
@@ -222,23 +222,22 @@ def _fill_stride_by_order(shape, dtype, order):
     return tuple(strides)
 
 
-def device_array_like(ary, stream=0):
-    """Call cuda.devicearray() with information from the array.
+def _contiguous_strides_like_array(ary):
+    """
+    Given an array, compute strides for a new contiguous array of the same
+    shape.
     """
-    # Avoid attempting to recompute strides if the default strides will be
-    # sufficient to create a contiguous array.
-    if ary.flags['C_CONTIGUOUS'] or ary.ndim <= 1:
-        return device_array(shape=ary.shape, dtype=ary.dtype, stream=stream)
-    elif ary.flags['F_CONTIGUOUS']:
-        return device_array(shape=ary.shape, dtype=ary.dtype, order='F',
-                            stream=stream)
+    # Don't recompute strides if the default strides will be sufficient to
+    # create a contiguous array.
+    if ary.flags['C_CONTIGUOUS'] or ary.flags['F_CONTIGUOUS'] or ary.ndim <= 1:
+        return None
 
     # Otherwise, we need to compute new strides using an algorithm adapted from
     # NumPy v1.17.4's PyArray_NewLikeArrayWithShape in
     # core/src/multiarray/ctors.c. We permute the strides in ascending order
     # then compute the stride for the dimensions with the same permutation.
 
-    # Stride permuation. E.g. a stride array (4, -2, 12) becomes
+    # Stride permutation. E.g. a stride array (4, -2, 12) becomes
     # [(1, -2), (0, 4), (2, 12)]
     strideperm = [ x for x in enumerate(ary.strides) ]
     strideperm.sort(key = lambda x: x[1])
@@ -249,10 +248,47 @@ def device_array_like(ary, stream=0):
     for i_perm, _ in strideperm:
         strides[i_perm] = stride
         stride *= ary.shape[i_perm]
-    strides = tuple(strides)
+    return tuple(strides)
 
+
+def _order_like_array(ary):
+    if ary.flags['F_CONTIGUOUS'] and not ary.flags['C_CONTIGUOUS']:
+        return 'F'
+    else:
+        return 'C'
+
+
+def device_array_like(ary, stream=0):
+    """
+    Call :func:`device_array() <numba.cuda.device_array>` with information from
+    the array.
+    """
+    strides = _contiguous_strides_like_array(ary)
+    order = _order_like_array(ary)
     return device_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
-                        stream=stream)
+                        order=order, stream=stream)
+
+
+def mapped_array_like(ary, stream=0, portable=False, wc=False):
+    """
+    Call :func:`mapped_array() <numba.cuda.mapped_array>` with the information
+    from the array.
+    """
+    strides = _contiguous_strides_like_array(ary)
+    order = _order_like_array(ary)
+    return mapped_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
+                        order=order, stream=stream, portable=portable, wc=wc)
+
+
+def pinned_array_like(ary):
+    """
+    Call :func:`pinned_array() <numba.cuda.pinned_array>` with the information
+    from the array.
+    """
+    strides = _contiguous_strides_like_array(ary)
+    order = _order_like_array(ary)
+    return pinned_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
+                        order=order)
 
 
 # Stream helper

diff --git a/numba/cuda/simulator/__init__.py b/numba/cuda/simulator/__init__.py
@@ -1,7 +1,7 @@
 from .api import *
 from .reduction import Reduce
 from .cudadrv.devicearray import (device_array, device_array_like, pinned,
-                    pinned_array, to_device, auto_device)
+                    pinned_array, pinned_array_like, to_device, auto_device)
 from .cudadrv import devicearray
 from .cudadrv.devices import require_context, gpus
 from .cudadrv.devices import get_context as current_context

diff --git a/numba/cuda/simulator/cudadrv/devicearray.py b/numba/cuda/simulator/cudadrv/devicearray.py
@@ -277,35 +277,53 @@ def device_array(*args, **kwargs):
     return FakeCUDAArray(np.ndarray(*args, **kwargs), stream=stream)
 
 
-def device_array_like(ary, stream=0):
-    # Avoid attempting to recompute strides if the default strides will be
-    # sufficient to create a contiguous array.
-    if ary.flags['C_CONTIGUOUS'] or ary.ndim <= 1:
-        return FakeCUDAArray(np.ndarray(shape=ary.shape, dtype=ary.dtype))
-    elif ary.flags['F_CONTIGUOUS']:
-        return FakeCUDAArray(np.ndarray(shape=ary.shape, dtype=ary.dtype,
-                                        order='F'))
+def _contiguous_strides_like_array(ary):
+    """
+    Given an array, compute strides for a new contiguous array of the same
+    shape.
+    """
+    # Don't recompute strides if the default strides will be sufficient to
+    # create a contiguous array.
+    if ary.flags['C_CONTIGUOUS'] or ary.flags['F_CONTIGUOUS'] or ary.ndim <= 1:
+        return None
 
     # Otherwise, we need to compute new strides using an algorithm adapted from
-    # NumPy's v1.17.4's PyArray_NewLikeArrayWithShape in
+    # NumPy v1.17.4's PyArray_NewLikeArrayWithShape in
     # core/src/multiarray/ctors.c. We permute the strides in ascending order
     # then compute the stride for the dimensions with the same permutation.
 
-    # Stride permuation. E.g. a stride array (4, -2, 12) becomes
+    # Stride permutation. E.g. a stride array (4, -2, 12) becomes
     # [(1, -2), (0, 4), (2, 12)]
-    strideperm = [x for x in enumerate(ary.strides)]
-    strideperm.sort(key=lambda x: x[1])
+    strideperm = [ x for x in enumerate(ary.strides) ]
+    strideperm.sort(key = lambda x: x[1])
 
     # Compute new strides using permutation
     strides = [0] * len(ary.strides)
     stride = ary.dtype.itemsize
     for i_perm, _ in strideperm:
         strides[i_perm] = stride
         stride *= ary.shape[i_perm]
-    strides = tuple(strides)
+    return tuple(strides)
 
-    return FakeCUDAArray(np.ndarray(shape=ary.shape, dtype=ary.dtype, strides=strides))
 
+def _order_like_array(ary):
+    if ary.flags['F_CONTIGUOUS'] and not ary.flags['C_CONTIGUOUS']:
+        return 'F'
+    else:
+        return 'C'
+
+
+def device_array_like(ary, stream=0):
+    strides = _contiguous_strides_like_array(ary)
+    order = _order_like_array(ary)
+    return device_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
+                        order=order)
+
+def pinned_array_like(ary):
+    strides = _contiguous_strides_like_array(ary)
+    order = _order_like_array(ary)
+    return pinned_array(shape=ary.shape, dtype=ary.dtype, strides=strides,
+                        order=order)
 
 def auto_device(ary, stream=0, copy=True):
     if isinstance(ary, FakeCUDAArray):