Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Option for utilizing GPU global memory #1405

Merged
merged 4 commits into from
Oct 21, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 22 additions & 1 deletion dace/transformation/auto/auto_optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,11 +515,29 @@ def make_transients_persistent(sdfg: SDFG,
return result


def apply_gpu_storage(sdfg: SDFG) -> None:
""" Changes the storage of the SDFG's input and output data to GPU global memory. """

written_scalars = set()
for state in sdfg.nodes():
for node in state.data_nodes():
desc = node.desc(sdfg)
if isinstance(desc, dt.Scalar) and not desc.transient and state.in_degree(node) > 0:
written_scalars.add(node.data)

for name, desc in sdfg.arrays.items():
if not desc.transient and desc.storage == dtypes.StorageType.Default:
if isinstance(desc, dt.Scalar) and not name in written_scalars:
continue
desc.storage = dtypes.StorageType.GPU_Global


def auto_optimize(sdfg: SDFG,
device: dtypes.DeviceType,
validate: bool = True,
validate_all: bool = False,
symbols: Dict[str, int] = None) -> SDFG:
symbols: Dict[str, int] = None,
use_gpu_storage: bool = False) -> SDFG:
"""
Runs a basic sequence of transformations to optimize a given SDFG to decent
performance. In particular, performs the following:
Expand All @@ -539,6 +557,7 @@ def auto_optimize(sdfg: SDFG,
have been applied.
:param validate_all: If True, validates the SDFG after every step.
:param symbols: Optional dict that maps symbols (str/symbolic) to int/float
:param use_gpu_storage: If True, changes the storage of non-transient data to GPU global memory.
:return: The optimized SDFG.
:note: Operates in-place on the given SDFG.
:note: This function is still experimental and may harm correctness in
Expand All @@ -565,6 +584,8 @@ def auto_optimize(sdfg: SDFG,
# Apply GPU transformations and set library node implementations

if device == dtypes.DeviceType.GPU:
if use_gpu_storage:
apply_gpu_storage(sdfg)
sdfg.apply_gpu_transformations()
sdfg.simplify()

Expand Down