Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cutout-Assisted Debugging #1208

Merged
merged 17 commits into from
Mar 10, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions dace/codegen/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,9 @@ def generate_code(sdfg, validate=True) -> List[CodeObject]:
import shutil
import tempfile
with tempfile.TemporaryDirectory() as tmp_dir:
sdfg.save(f'{tmp_dir}/test.sdfg')
sdfg.save(f'{tmp_dir}/test.sdfg', hash=False)
sdfg2 = SDFG.from_file(f'{tmp_dir}/test.sdfg')
sdfg2.save(f'{tmp_dir}/test2.sdfg')
sdfg2.save(f'{tmp_dir}/test2.sdfg', hash=False)
print('Testing SDFG serialization...')
if not filecmp.cmp(f'{tmp_dir}/test.sdfg', f'{tmp_dir}/test2.sdfg'):
shutil.move(f"{tmp_dir}/test.sdfg", "test.sdfg")
Expand Down
56 changes: 56 additions & 0 deletions dace/codegen/instrumentation/data/data_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,33 @@ def on_sdfg_end(self, sdfg: SDFG, local_stream: CodeIOStream, global_stream: Cod
if sdfg.parent is None:
sdfg.append_exit_code('delete __state->serializer;\n')

def on_state_begin(self, sdfg: SDFG, state: SDFGState, local_stream: CodeIOStream, global_stream: CodeIOStream):
if state.symbol_instrument == dtypes.DataInstrumentationType.No_Instrumentation:
return

condition_preamble, condition_postamble = '', ''
condition: Optional[CodeBlock] = state.symbol_instrument_condition
if condition is not None and not condition.as_string == '1':
cond_string = None
if condition.language == dtypes.Language.CPP:
cond_string = condition.as_string
elif condition.language == dtypes.Language.Python:
cond_string = cppunparse.py2cpp(condition.code[0], expr_semicolon=False)
else:
warnings.warn('Unrecognized language %s in codeblock' % condition.language)
cond_string = condition.as_string
condition_preamble = f'if ({cond_string})' + ' {'
condition_postamble = '}'

state_id = sdfg.node_id(state)
local_stream.write(condition_preamble, sdfg, state_id)
defined_symbols = state.defined_symbols()
for sym, _ in defined_symbols.items():
local_stream.write(
f'__state->serializer->save_symbol("{sym}", "{state_id}", {cpp.sym2cpp(sym)});\n', sdfg, state_id
)
local_stream.write(condition_postamble, sdfg, state_id)

def on_node_end(self, sdfg: SDFG, state: SDFGState, node: nodes.AccessNode, outer_stream: CodeIOStream,
inner_stream: CodeIOStream, global_stream: CodeIOStream):
from dace.codegen.dispatcher import DefinedType # Avoid import loop
Expand Down Expand Up @@ -189,6 +216,35 @@ def on_sdfg_end(self, sdfg: SDFG, local_stream: CodeIOStream, global_stream: Cod
if sdfg.parent is None:
sdfg.append_exit_code('delete __state->serializer;\n')

def on_state_begin(self, sdfg: SDFG, state: SDFGState, local_stream: CodeIOStream, global_stream: CodeIOStream):
if state.symbol_instrument == dtypes.DataInstrumentationType.No_Instrumentation:
return

condition_preamble, condition_postamble = '', ''
condition: Optional[CodeBlock] = state.symbol_instrument_condition
if condition is not None and not condition.as_string == '1':
cond_string = None
if condition.language == dtypes.Language.CPP:
cond_string = condition.as_string
elif condition.language == dtypes.Language.Python:
cond_string = cppunparse.py2cpp(condition.code[0], expr_semicolon=False)
else:
warnings.warn('Unrecognized language %s in codeblock' % condition.language)
cond_string = condition.as_string
condition_preamble = f'if ({cond_string})' + ' {'
condition_postamble = '}'

state_id = sdfg.node_id(state)
local_stream.write(condition_preamble, sdfg, state_id)
defined_symbols = state.defined_symbols()
for sym, sym_type in defined_symbols.items():
local_stream.write(
f'{cpp.sym2cpp(sym)} = ' +
f'__state->serializer->restore_symbol("{sym}", "{state_id}", {cpp.sym2cpp(sym)});\n',
sdfg, state_id
)
local_stream.write(condition_postamble, sdfg, state_id)

def on_node_begin(self, sdfg: SDFG, state: SDFGState, node: nodes.AccessNode, outer_stream: CodeIOStream,
inner_stream: CodeIOStream, global_stream: CodeIOStream):
from dace.codegen.dispatcher import DefinedType # Avoid import loop
Expand Down
105 changes: 65 additions & 40 deletions dace/codegen/instrumentation/data/data_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os

from dace import dtypes, SDFG
from dace.data import ArrayLike # Type hint
from dace.data import ArrayLike, Number # Type hint

import numpy as np

Expand All @@ -19,10 +19,10 @@ class InstrumentedDataReport:
automatically for correctness checking / data debugging.

The folder structure of a data report is as follows:
/path/to/report/<array name>/<uuid>_<version>.bin
where <array name> is the array in the SDFG, <uuid> is a unique identifier to the access node from which
this array was saved, and <version> is a running number for the currently-saved array (e.g., when an access node is
written to multiple times in a loop).
/path/to/report/<array or symbol name>/<uuid>_<version>.bin
where <array or symbol name> is the array or symbol in the SDFG, <uuid> is a unique identifier to the access node
(or state for symbols) from which this array or symbol was saved, and <version> is a running number for the
currently-saved array or symbol (e.g., when an access node is written to multiple times in a loop).

The files themselves are direct binary representations of the whole data (with padding and strides), for complete
reproducibility. When accessed from the report, a numpy wrapper shows the user-accessible view of that array.
Expand All @@ -45,7 +45,7 @@ class InstrumentedDataReport:
sdfg: SDFG
folder: str
files: Dict[str, List[str]]
loaded_arrays: Dict[Tuple[str, int], ArrayLike]
loaded_values: Dict[Tuple[str, int], Union[ArrayLike, Number]]

def __init__(self, sdfg: SDFG, folder: str) -> None:
"""
Expand All @@ -57,7 +57,7 @@ def __init__(self, sdfg: SDFG, folder: str) -> None:
self.sdfg = sdfg
self.folder = folder
self.files = {}
self.loaded_arrays = {}
self.loaded_values = {}

# Prepare file mapping
array_names = os.listdir(folder)
Expand All @@ -76,7 +76,7 @@ def keys(self) -> Set[str]:
""" Returns the array names available in this data report. """
return self.files.keys()

def _read_file(self, filename: str, npdtype: np.dtype) -> Tuple[ArrayLike, ArrayLike]:
def _read_array_file(self, filename: str, npdtype: np.dtype) -> Tuple[ArrayLike, ArrayLike]:
"""
Reads a formatted instrumented data file.

Expand All @@ -95,46 +95,70 @@ def _read_file(self, filename: str, npdtype: np.dtype) -> Tuple[ArrayLike, Array
view = np.ndarray(shape, npdtype, buffer=nparr, strides=strides)
return nparr, view

def __getitem__(self, item: str) -> Union[ArrayLike, List[ArrayLike]]:
def _read_symbol_file(self, filename: str, npdtype: np.dtype) -> Number:
with open(filename, 'rb') as fp:
npclass = getattr(np, str(npdtype))
byteval = fp.read(npdtype.itemsize)
val = npclass(byteval)
return val

def __getitem__(self, item: str) -> Union[ArrayLike, Number, List[ArrayLike], List[Number]]:
"""
Returns the instrumented (saved) data from the report according to the data descriptor (array) name.
Returns the instrumented (saved) data from the report according to the data descriptor (array) or symbol name.

:param item: Name of the array to read.
:return: An array (if a single entry in the report is given) or a list of versions of the array across
the report.
:param item: Name of the array or symbol to read.
:return: An array (if a single entry in the report is given) or symbol, or a list of versions of the array
or symbol across the report.
"""
filenames = self.files[item]
desc = self.sdfg.arrays[item]
dtype: dtypes.typeclass = desc.dtype
npdtype = dtype.as_numpy_dtype()

results = []
for i, file in enumerate(filenames):
nparr, view = self._read_file(file, npdtype)
self.loaded_arrays[item, i] = nparr
results.append(view)
if item in self.sdfg.arrays:
desc = self.sdfg.arrays[item]
dtype: dtypes.typeclass = desc.dtype
npdtype = dtype.as_numpy_dtype()
for i, file in enumerate(filenames):
nparr, view = self._read_array_file(file, npdtype)
self.loaded_values[item, i] = nparr
results.append(view)
elif item in self.sdfg.symbols:
dtype: dtypes.typeclass = self.sdfg.symbols[item]
npdtype = dtype.as_numpy_dtype()
for i, file in enumerate(filenames):
val = self._read_symbol_file(file, npdtype)
self.loaded_values[item, i] = val
results.append(val)
else:
raise KeyError(f'Item {item} not found in report')

if len(results) == 1:
return results[0]
return results

def get_first_version(self, item: str) -> ArrayLike:
def get_first_version(self, item: str) -> Union[ArrayLike, Number]:
"""
Returns the first version of the instrumented (saved) data from the report according to the data descriptor
(array) name.
(array) or symbol name.

:param item: Name of the array to read.
:return: The array from the report.
:param item: Name of the array or symbol to read.
:return: The array or symbol value from the report.
"""
filenames = self.files[item]
desc = self.sdfg.arrays[item]
dtype: dtypes.typeclass = desc.dtype
npdtype = dtype.as_numpy_dtype()

file = next(iter(filenames))
nparr, view = self._read_file(file, npdtype)
self.loaded_arrays[item, 0] = nparr
return view
if item in self.sdfg.arrays:
desc = self.sdfg.arrays[item]
dtype: dtypes.typeclass = desc.dtype
npdtype = dtype.as_numpy_dtype()

file = next(iter(filenames))
nparr, view = self._read_array_file(file, npdtype)
self.loaded_values[item, 0] = nparr
return view
elif item in self.sdfg.symbols:
file = next(iter(filenames))
dtype: dtypes.typeclass = self.sdfg.symbols[item]
val = self._read_symbol_file(file, dtype.as_numpy_dtype())
self.loaded_values[item, 0] = val
else:
raise KeyError(f'Item not found in report: {item}')

def update_report(self):
"""
Expand All @@ -143,10 +167,11 @@ def update_report(self):

:see: dace.dtypes.DataInstrumentationType.Restore
"""
for (k, i), loaded in self.loaded_arrays.items():
dtype_bytes = loaded.dtype.itemsize
with open(self.files[k][i], 'wb') as fp:
fp.write(struct.pack('i', loaded.ndim))
fp.write(struct.pack('i' * loaded.ndim, *loaded.shape))
fp.write(struct.pack('i' * loaded.ndim, *(s // dtype_bytes for s in loaded.strides)))
loaded.tofile(fp)
for (k, i), loaded in self.loaded_values.items():
if isinstance(loaded, np.ndarray):
dtype_bytes = loaded.dtype.itemsize
with open(self.files[k][i], 'wb') as fp:
fp.write(struct.pack('i', loaded.ndim))
fp.write(struct.pack('i' * loaded.ndim, *loaded.shape))
fp.write(struct.pack('i' * loaded.ndim, *(s // dtype_bytes for s in loaded.strides)))
loaded.tofile(fp)
6 changes: 3 additions & 3 deletions dace/optimization/data_layout_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dace.optimization import cutout_tuner
from dace.sdfg.state import SDFGState
from dace.transformation import helpers as xfh
from dace.sdfg.analysis import cutout as cutter
from dace.sdfg.analysis.cutout import SDFGCutout
from dace.codegen.instrumentation.data import data_report

try:
Expand Down Expand Up @@ -44,12 +44,12 @@ def cutouts(self) -> Generator[Tuple[dace.SDFG, str], None, None]:
state_id = self._sdfg.node_id(state)
cutout_hash = f"{state_id}.{node_id}.{node.label}"
subgraph_nodes = state.scope_subgraph(node).nodes()
cutout = cutter.cutout_state(state, *subgraph_nodes)
cutout = SDFGCutout.singlestate_cutout(state, *subgraph_nodes)
yield cutout, cutout_hash
elif isinstance(node, (dace.nodes.LibraryNode, dace.nodes.Tasklet)):
cutout_hash = node.label.split("_")[-1]
subgraph_nodes = [node]
cutout = cutter.cutout_state(state, *subgraph_nodes)
cutout = SDFGCutout.singlestate_cutout(state, *subgraph_nodes)
yield cutout, cutout_hash

def space(self, cutout: dace.SDFG, groups: List[Set[str]] = None) -> Generator[Set[str], None, None]:
Expand Down
4 changes: 2 additions & 2 deletions dace/optimization/map_permutation_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from dace import SDFG, dtypes
from dace.optimization import cutout_tuner
from dace.transformation import helpers as xfh
from dace.sdfg.analysis import cutout as cutter
from dace.sdfg.analysis.cutout import SDFGCutout
from dace.codegen.instrumentation.data import data_report

try:
Expand All @@ -31,7 +31,7 @@ def cutouts(self) -> Generator[Tuple[dace.SDFGState, str], None, None]:
node_id = state.node_id(node)
state_id = self._sdfg.node_id(state)
subgraph_nodes = state.scope_subgraph(node).nodes()
cutout = cutter.cutout_state(state, *subgraph_nodes, make_copy=False)
cutout = SDFGCutout.singlestate_cutout(state, *subgraph_nodes, make_copy=False)
yield cutout, f"{state_id}.{node_id}.{node.label}"

def space(self, map_entry: dace.nodes.MapEntry, **kwargs) -> Generator[Tuple[str], None, None]:
Expand Down
5 changes: 2 additions & 3 deletions dace/optimization/map_tiling_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from dace.optimization import cutout_tuner
from dace.transformation import dataflow as df
from dace.transformation import helpers as xfh
from dace.sdfg.analysis import cutout as cutter
from dace.codegen.instrumentation.data import data_report
from dace.sdfg.analysis.cutout import SDFGCutout

try:
from tqdm import tqdm
Expand All @@ -33,7 +32,7 @@ def cutouts(self) -> Generator[Tuple[dace.SDFG, str], None, None]:
node_id = state.node_id(node)
state_id = self._sdfg.node_id(state)
subgraph_nodes = state.scope_subgraph(node).nodes()
cutout = cutter.cutout_state(state, *subgraph_nodes)
cutout = SDFGCutout.singlestate_cutout(state, *subgraph_nodes)
yield cutout, f"{state_id}.{node_id}.{node.label}"

def space(self, map_entry: dace.nodes.MapEntry) -> Generator[Tuple[int], None, None]:
Expand Down
14 changes: 7 additions & 7 deletions dace/optimization/on_the_fly_map_fusion_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from dace import SDFG, dtypes
from dace.optimization import cutout_tuner
from dace.sdfg.analysis import cutout as cutter
from dace.sdfg.analysis.cutout import SDFGCutout

from dace.transformation import subgraph as sg
from dace.transformation.estimator import enumeration as en
Expand All @@ -37,7 +37,7 @@ def cutouts(self):
nodes = state.nodes()

try:
cutout = cutter.cutout_state(state, *(nodes), make_copy=False)
cutout = SDFGCutout.singlestate_cutout(state, *(nodes), make_copy=False)
yield cutout, f"{nsdfg_id}.{state_id}.{state.label}"
except AttributeError:
continue
Expand Down Expand Up @@ -113,7 +113,7 @@ def apply(self, config: Tuple[int, List[int]], label: str, **kwargs) -> None:
sdfg = list(self._sdfg.all_sdfgs_recursive())[nsdfg_id]
state = sdfg.node(state_id)
nodes = state.nodes()
cutout = cutter.cutout_state(state, *(nodes), make_copy=False)
cutout = SDFGCutout.singlestate_cutout(state, *(nodes), make_copy=False)

map_ids = config[1]
maps_ = list(map(cutout.start_state.node, map_ids))
Expand All @@ -134,7 +134,7 @@ def _extract_patterns(self, configs: List[Tuple[str, List[int]]]):
state_id = int(state_id)
state = list(self._sdfg.all_sdfgs_recursive())[nsdfg_id].node(state_id)
nodes = state.nodes()
cutout = cutter.cutout_state(state, *(nodes), make_copy=False)
cutout = SDFGCutout.singlestate_cutout(state, *(nodes), make_copy=False)

pattern_desc = Counter()
fusion_id, map_ids = self.config_from_key(config, cutout)
Expand Down Expand Up @@ -178,7 +178,7 @@ def transfer(sdfg: dace.SDFG, tuner, k: int = 5):
continue

try:
cutout = cutter.cutout_state(state, *(state.nodes()), make_copy=False)
cutout = SDFGCutout.singlestate_cutout(state, *(state.nodes()), make_copy=False)
except AttributeError:
continue

Expand Down Expand Up @@ -216,7 +216,7 @@ def transfer(sdfg: dace.SDFG, tuner, k: int = 5):
continue

if base_runtime is None:
baseline = cutter.cutout_state(state, *(state.nodes()), make_copy=False)
baseline = SDFGCutout.singlestate_cutout(state, *(state.nodes()), make_copy=False)
baseline.start_state.instrument = dace.InstrumentationType.GPU_Events

dreport_ = {}
Expand Down Expand Up @@ -244,7 +244,7 @@ def transfer(sdfg: dace.SDFG, tuner, k: int = 5):
subgraph_maps.extend(maps_desc[desc][:num])

# Apply
experiment_sdfg_ = cutter.cutout_state(state, *(state.nodes()), make_copy=False)
experiment_sdfg_ = SDFGCutout.singlestate_cutout(state, *(state.nodes()), make_copy=False)
experiment_state_ = experiment_sdfg_.start_state
experiment_maps_ids = list(map(lambda me: experiment_state_.node_id(me), subgraph_maps))
experiment_sdfg = copy.deepcopy(experiment_sdfg_)
Expand Down