Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix-gpu-transform-copy-out #1231

Merged
merged 8 commits into from
Mar 24, 2023
25 changes: 24 additions & 1 deletion dace/transformation/interstate/gpu_transform_sdfg.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
""" Contains inter-state transformations of an SDFG to run on the GPU. """

from dace import data, memlet, dtypes, registry, sdfg as sd, symbolic
from dace import data, memlet, dtypes, registry, sdfg as sd, symbolic, subsets as sbs, propagate_memlets_sdfg
from dace.sdfg import nodes, scope
from dace.sdfg import utils as sdutil
from dace.transformation import transformation, helpers as xfh
Expand Down Expand Up @@ -162,6 +162,9 @@ def apply(self, _, sdfg: sd.SDFG):
output_nodes = []
global_code_nodes: Dict[sd.SDFGState, nodes.Tasklet] = defaultdict(list)

# Propagate memlets to ensure that we can find the true array subsets that are written.
propagate_memlets_sdfg(sdfg)

for state in sdfg.nodes():
sdict = state.scope_dict()
for node in state.nodes():
Expand Down Expand Up @@ -214,6 +217,26 @@ def apply(self, _, sdfg: sd.SDFG):
name = sdfg.add_datadesc('gpu_' + onodename, newdesc, find_new_name=True)
cloned_arrays[onodename] = name

# The following ensures that when writing to a subset of an array, we don't overwrite the rest of the array
# when copying back to the host. This is done by adding the array to the `inputs_nodes,` while will copy
tbennun marked this conversation as resolved.
Show resolved Hide resolved
# the entire array to the GPU.
if (onodename, onode) not in input_nodes:
found_full_write = False
full_subset = sbs.Range.from_array(onode)
try:
for state in sdfg.nodes():
for node in state.nodes():
if (isinstance(node, nodes.AccessNode) and node.data == onodename):
for e in state.in_edges(node):
if e.data.get_dst_subset(e, state) == full_subset:
alexnick83 marked this conversation as resolved.
Show resolved Hide resolved
found_full_write = True
raise StopIteration
except StopIteration:
assert found_full_write
if not found_full_write:
input_nodes.append((onodename, onode))


# Replace nodes
for state in sdfg.nodes():
for node in state.nodes():
Expand Down
20 changes: 20 additions & 0 deletions tests/transformations/gpu_transform_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,26 @@ def main_program(a: dace.int32):
assert np.array_equal(out, np.array([0, 10] * 5, dtype=np.int32))


def test_write_subset():

@dace.program
def write_subset(A: dace.int32[20, 20]):
for i, j in dace.map[2:18, 2:18]:
A[i, j] = i + j

sdfg = write_subset.to_sdfg(simplify=True)
sdfg.apply_transformations(GPUTransformSDFG)

ref = np.ones((20, 20), dtype=np.int32)
val = np.copy(ref)

write_subset.f(ref)
sdfg(A=val)

assert np.array_equal(ref, val)


if __name__ == '__main__':
test_toplevel_transient_lifetime()
test_scalar_to_symbol_in_nested_sdfg()
test_write_subset()