Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Records in CUDA Const Memory #3186

Merged
merged 7 commits into from
Sep 10, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 5 additions & 21 deletions numba/cuda/cudaimpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,29 +100,13 @@ def ptx_sreg_impl(context, builder, sig, args):
def ptx_cmem_arylike(context, builder, sig, args):
lmod = builder.module
[arr] = args
flat = arr.flatten(order='A')
aryty = sig.return_type
dtype = aryty.dtype

if isinstance(dtype, types.Complex):
elemtype = (types.float32
if dtype == types.complex64
else types.float64)
constvals = []
for i in range(flat.size):
elem = flat[i]
real = context.get_constant(elemtype, elem.real)
imag = context.get_constant(elemtype, elem.imag)
constvals.extend([real, imag])

elif dtype in types.number_domain:
constvals = [context.get_constant(dtype, flat[i])
for i in range(flat.size)]

else:
raise TypeError("unsupport type: %s" % dtype)

constary = lc.Constant.array(constvals[0].type, constvals)
constvals = [
context.get_constant(types.byte, i)
for i in arr.flatten(order='A').data.tobytes()
Copy link
Member

@sklam sklam Aug 9, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is causing error on windows and python2.7:

test_const_array (numba.cuda.tests.cudapy.test_constmem.TestCudaConstantMemory)
test_const_array_2d (numba.cuda.tests.cudapy.test_constmem.TestCudaConstantMemory)
test_const_array_3d (numba.cuda.tests.cudapy.test_constmem.TestCudaConstantMemory)
test_const_record (numba.cuda.tests.cudapy.test_constmem.TestCudaConstantMemory)
test_const_record_align (numba.cuda.tests.cudapy.test_constmem.TestCudaConstantMemory)



File "..\_test_env\lib\site-packages\numba\cuda\tests\cudapy\test_constmem.py", line 60:
def cuconstRecAlign(A, B, C, D, E):
    Z = cuda.const.array_like(CONST_RECORD_ALIGN)
    ^
[1] During: lowering "$0.5 = call ptx.cmem.arylike($0.4, kws=[], args=[Var($0.4, c:\conda64\conda-bld\numba_1533830313780\_test_env\lib\site-packages\numba\cuda\tests\cudapy\test_constmem.py (60))], func=ptx.cmem.arylike, vararg=None)" at c:\conda64\conda-bld\numba_1533830313780\_test_env\lib\site-packages\numba\cuda\tests\cudapy\test_constmem.py (60)

'buffer' object has no attribute 'tobytes'

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

likely a python2.7 error

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've pushed a fix for this - it uses this instead

]
constary = lc.Constant.array(Type.int(8), constvals)

addrspace = nvvm.ADDRSPACE_CONSTANT
gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem",
Expand Down
4 changes: 4 additions & 0 deletions numba/cuda/simulator/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ def __getitem__(self, configuration):
def bind(self):
pass


def specialize(self, *args):
return self

def forall(self, ntasks, tpb=0, stream=0, sharedmem=0):
return self[ntasks, 1, stream, sharedmem]

Expand Down
123 changes: 110 additions & 13 deletions numba/cuda/tests/cudapy/test_constmem.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,35 @@

from numba import cuda
from numba.cuda.testing import unittest, SerialMixin

from numba.config import ENABLE_CUDASIM

CONST1D = np.arange(10, dtype=np.float64) / 2.
CONST2D = np.asfortranarray(
np.arange(100, dtype=np.int32).reshape(10, 10))
CONST3D = ((np.arange(5*5*5, dtype=np.complex64).reshape(5, 5, 5) + 1j) /
2j)
np.arange(100, dtype=np.int32).reshape(10, 10))
CONST3D = ((np.arange(5 * 5 * 5, dtype=np.complex64).reshape(5, 5, 5) + 1j) /
2j)
CONST_RECORD = np.array(
[(1.0, 2), (3.0, 4)],
dtype=[('x', float), ('y', int)])
CONST_RECORD_ALIGN = np.array(
[(1, 2, 3, 0xDEADBEEF, 8), (4, 5, 6, 0xBEEFDEAD, 10)],
dtype=np.dtype(
dtype=[
('a', np.uint8),
('b', np.uint8),
('x', np.uint8),
('y', np.uint32),
('z', np.uint8),
],
align=True))


def cuconst(A):
C = cuda.const.array_like(CONST1D)
i = cuda.grid(1)
A[i] = C[i]

# +1 or it'll be loaded & stored as a u32
A[i] = C[i] + 1.0


def cuconst2d(A):
Expand All @@ -33,28 +49,109 @@ def cuconst3d(A):
A[i, j, k] = C[i, j, k]


def cuconstRec(A, B):
C = cuda.const.array_like(CONST_RECORD)
i = cuda.grid(1)
A[i] = C[i]['x']
B[i] = C[i]['y']


def cuconstRecAlign(A, B, C, D, E):
Z = cuda.const.array_like(CONST_RECORD_ALIGN)
i = cuda.grid(1)
A[i] = Z[i]['a']
B[i] = Z[i]['b']
C[i] = Z[i]['x']
D[i] = Z[i]['y']
E[i] = Z[i]['z']


class TestCudaConstantMemory(SerialMixin, unittest.TestCase):
def test_const_array(self):
jcuconst = cuda.jit('void(float64[:])')(cuconst)
self.assertTrue('.const' in jcuconst.ptx)
A = np.empty_like(CONST1D)
A = np.zeros_like(CONST1D)
jcuconst[2, 5](A)
self.assertTrue(np.all(A == CONST1D))
self.assertTrue(np.all(A == CONST1D + 1))

if not ENABLE_CUDASIM:
self.assertIn(
'ld.const.f64',
jcuconst.ptx,
"as we're adding to it, load as a double")

def test_const_array_2d(self):
jcuconst2d = cuda.jit('void(int32[:,:])')(cuconst2d)
self.assertTrue('.const' in jcuconst2d.ptx)
A = np.empty_like(CONST2D, order='C')
jcuconst2d[(2,2), (5,5)](A)
A = np.zeros_like(CONST2D, order='C')
jcuconst2d[(2, 2), (5, 5)](A)
self.assertTrue(np.all(A == CONST2D))

if not ENABLE_CUDASIM:
self.assertIn(
'ld.const.u32',
jcuconst2d.ptx,
"load the ints as ints")

def test_const_array_3d(self):
jcuconst3d = cuda.jit('void(complex64[:,:,:])')(cuconst3d)
self.assertTrue('.const' in jcuconst3d.ptx)
A = np.empty_like(CONST3D, order='F')
A = np.zeros_like(CONST3D, order='F')
jcuconst3d[1, (5, 5, 5)](A)
self.assertTrue(np.all(A == CONST3D))

if not ENABLE_CUDASIM:
self.assertIn(
'ld.const.v2.u32',
jcuconst3d.ptx,
"load the two halves of the complex as u32s")


def test_const_record(self):
A = np.zeros(2, dtype=float)
B = np.zeros(2, dtype=int)
jcuconst = cuda.jit(cuconstRec).specialize(A, B)

if not ENABLE_CUDASIM:
self.assertIn(
'ld.const.v2.u64',
jcuconst.ptx,
"the compiler realises it doesn't even need to " \
"interpret the bytes as float!")

jcuconst[2, 1](A, B)
np.testing.assert_allclose(A, CONST_RECORD['x'])
np.testing.assert_allclose(B, CONST_RECORD['y'])

def test_const_record_align(self):
A = np.zeros(2, dtype=np.float64)
B = np.zeros(2, dtype=np.float64)
C = np.zeros(2, dtype=np.float64)
D = np.zeros(2, dtype=np.float64)
E = np.zeros(2, dtype=np.float64)
jcuconst = cuda.jit(cuconstRecAlign).specialize(A, B, C, D, E)

if not ENABLE_CUDASIM:
self.assertIn(
'ld.const.v4.u8',
jcuconst.ptx,
'load the first three bytes as a vector')

self.assertIn(
'ld.const.u32',
jcuconst.ptx,
'load the uint32 natively')

self.assertIn(
'ld.const.u8',
jcuconst.ptx,
'load the last byte by itself')


jcuconst[2, 1](A, B, C, D, E)
np.testing.assert_allclose(A, CONST_RECORD_ALIGN['a'])
np.testing.assert_allclose(B, CONST_RECORD_ALIGN['b'])
np.testing.assert_allclose(C, CONST_RECORD_ALIGN['x'])
np.testing.assert_allclose(D, CONST_RECORD_ALIGN['y'])
np.testing.assert_allclose(E, CONST_RECORD_ALIGN['z'])


if __name__ == '__main__':
unittest.main()