In [1]:
import dace
import cupy as cp

In [2]:
N = dace.symbol('N')
s = 33
@dace.program
def vector_copy_strides1d(A: dace.uint32[N] @ dace.dtypes.StorageType.GPU_Global, B: dace.uint32[N] @ dace.dtypes.StorageType.GPU_Global):
    for i in dace.map[0:N:s] @ dace.dtypes.ScheduleType.GPU_Device:
        A[i] = B[i]

sdfg = vector_copy_strides1d.to_sdfg()
sdfg


In [3]:
# Example input 
n = 69
A = cp.zeros((n,), dtype=cp.uint32)
B = cp.arange(0, n, dtype=cp.uint32)


# Strided copy from B to A
sdfg(A=A, B=B, N=n)

# Verify correctness
expected = cp.zeros((n,), dtype=cp.uint32)
expected[::s] = cp.arange(0, n, dtype=cp.uint32)[::s]
if cp.array_equal(A, expected):
    print("\n\nSUCCESS: A matches the expected result.")
else:
    print("\n\nERROR: A does not match the expected result.")




SUCCESS: A matches the expected result.
