From 0b2e01ed3c342d956aaa63bc2b1aede27dd0ead2 Mon Sep 17 00:00:00 2001 From: "Wang, Eikan" Date: Thu, 2 Oct 2025 14:52:46 +0000 Subject: [PATCH 1/3] Generalize the cuda-bias test cases by replacing hardcoded cuda literal with the DEVICE variable --- test/test_examples.py | 2 +- test/test_misc.py | 6 +++--- test/test_print_ref_eager_mode.py | 9 +++++---- test/test_ref_eager.py | 11 ++++++----- test/test_tensor_descriptor.py | 2 +- test/test_type_propagation.py | 3 +++ 6 files changed, 19 insertions(+), 14 deletions(-) diff --git a/test/test_examples.py b/test/test_examples.py index b084d078b..b0a7df3d1 100644 --- a/test/test_examples.py +++ b/test/test_examples.py @@ -1132,7 +1132,7 @@ def test_kl_div(self): ), ) torch_kl_div = torch.nn.KLDivLoss(reduction="batchmean", log_target=False).to( - "cuda" + device=DEVICE ) self.assertExpectedJournal( check_example( diff --git a/test/test_misc.py b/test/test_misc.py index f09102e6c..7ae83f079 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -259,7 +259,7 @@ def test_tile_begin(x: torch.Tensor) -> torch.Tensor: out[tile_m.begin, tile_n.begin] = 1 return out - x = torch.randn(64, 64, device="cuda") + x = torch.randn(64, 64, device=DEVICE) config = helion.Config(block_sizes=[16, 16]) test_tile_begin.bind((x,)).to_triton_code(config) result = test_tile_begin.bind((x,)).compile_config(config)(x) @@ -272,7 +272,7 @@ def test_tile_end(x: torch.Tensor) -> torch.Tensor: out[tile_m.end, tile_n.end] = 1 return out - x = torch.randn(64, 64, device="cuda") + x = torch.randn(64, 64, device=DEVICE) config = helion.Config(block_sizes=[16, 16]) test_tile_end.bind((x,)).to_triton_code(config) result = test_tile_end.bind((x,)).compile_config(config)(x) @@ -285,7 +285,7 @@ def test_tile_id(x: torch.Tensor) -> torch.Tensor: out[tile_m.id, tile_n.id] = 1 return out - x = torch.randn(64, 64, device="cuda") + x = torch.randn(64, 64, device=DEVICE) config = helion.Config(block_sizes=[16, 16]) test_tile_id.bind((x,)).to_triton_code(config) result = test_tile_id.bind((x,)).compile_config(config)(x) diff --git a/test/test_print_ref_eager_mode.py b/test/test_print_ref_eager_mode.py index 11bc5b8b4..aed97336d 100644 --- a/test/test_print_ref_eager_mode.py +++ b/test/test_print_ref_eager_mode.py @@ -9,6 +9,7 @@ import helion from helion import exc +from helion._testing import DEVICE from helion._testing import TestCase import helion.language as hl @@ -35,8 +36,8 @@ def add(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: out[tile] = x[tile] + y[tile] return out - x = torch.randn([512, 512], device="cuda", dtype=torch.float16) - y = torch.randn([512, 512], device="cuda", dtype=torch.float16) + x = torch.randn([512, 512], device=DEVICE, dtype=torch.float16) + y = torch.randn([512, 512], device=DEVICE, dtype=torch.float16) torch.testing.assert_close(add(x, y), torch.add(x, y)) def test_normal_mode_code_print(self): @@ -61,8 +62,8 @@ def add(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: out[tile] = x[tile] + y[tile] return out - x = torch.randn([512, 512], device="cuda", dtype=torch.float16) - y = torch.randn([512, 512], device="cuda", dtype=torch.float16) + x = torch.randn([512, 512], device=DEVICE, dtype=torch.float16) + y = torch.randn([512, 512], device=DEVICE, dtype=torch.float16) torch.testing.assert_close(add(x, y), torch.add(x, y)) self.assertNotEqual( diff --git a/test/test_ref_eager.py b/test/test_ref_eager.py index 8a1d2f712..fd521cfcd 100644 --- a/test/test_ref_eager.py +++ b/test/test_ref_eager.py @@ -9,6 +9,7 @@ import helion from helion import exc +from helion._testing import DEVICE from helion._testing import TestCase from helion._testing import assert_ref_eager_mode import helion.language as hl @@ -32,8 +33,8 @@ def print_intermediate_tensor_kernel( out[tile_m, tile_n] = sum_val return out - x = torch.ones([2, 2], device="cuda", dtype=torch.float32) * 10.0 - y = torch.ones([2, 2], device="cuda", dtype=torch.float32) * 5.0 + x = torch.ones([2, 2], device=DEVICE, dtype=torch.float32) * 10.0 + y = torch.ones([2, 2], device=DEVICE, dtype=torch.float32) * 5.0 expected = x + y # Capture stdout to check print output @@ -67,7 +68,7 @@ def incorrect_kernel(x: torch.Tensor) -> torch.Tensor: pass # noqa: PIE790 return x - x = torch.ones([2, 2], device="cuda", dtype=torch.float32) * math.pi + x = torch.ones([2, 2], device=DEVICE, dtype=torch.float32) * math.pi # Capture stdout to check print output captured_output = io.StringIO() @@ -89,7 +90,7 @@ def kernel(x: torch.Tensor) -> torch.Tensor: return out with assert_ref_eager_mode(): - x = torch.randn(128, 128, device="cuda") + x = torch.randn(128, 128, device=DEVICE) result = kernel(x) expected = x * 2.0 torch.testing.assert_close(result, expected) @@ -107,7 +108,7 @@ def kernel(x: torch.Tensor) -> torch.Tensor: # Run the kernel to capture the warning message captured_stderr = io.StringIO() with contextlib.redirect_stderr(captured_stderr): - x = torch.randn(128, 128, device="cuda") + x = torch.randn(128, 128, device=DEVICE) kernel(x) stderr_output = captured_stderr.getvalue() diff --git a/test/test_tensor_descriptor.py b/test/test_tensor_descriptor.py index 54d0d0f1f..f3fb7da62 100644 --- a/test/test_tensor_descriptor.py +++ b/test/test_tensor_descriptor.py @@ -259,7 +259,7 @@ def matmul(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: block_sizes=[16, 16, 16], indexing="tensor_descriptor", ) - torch.cuda.synchronize() + torch.accelerator.synchronize() torch.testing.assert_close(result_large, expected, atol=1e-2, rtol=1e-2) self.assertIn(get_tensor_descriptor_fn_name(), code_large) diff --git a/test/test_type_propagation.py b/test/test_type_propagation.py index cf74dbcaf..105e12353 100644 --- a/test/test_type_propagation.py +++ b/test/test_type_propagation.py @@ -12,6 +12,7 @@ from helion._testing import RefEagerTestDisabled from helion._testing import TestCase from helion._testing import import_path +from helion._testing import skipIfXPU import helion.language as hl if TYPE_CHECKING: @@ -95,6 +96,7 @@ def test_matmul(self): ) self.assertExpectedJournal(output) + @skipIfXPU("CUDA-only") def test_cuda_device_properties(self): @helion.kernel def use_device_properties(x: torch.Tensor) -> torch.Tensor: @@ -116,6 +118,7 @@ def use_device_properties(x: torch.Tensor) -> torch.Tensor: output = type_propagation_report(use_device_properties, x) self.assertExpectedJournal(output) + @skipIfXPU("CUDA-only") def test_cuda_device_properties_unsupported_attribute(self): @helion.kernel def use_unsupported_property(x: torch.Tensor) -> torch.Tensor: From a1b3197ea2e388f87c5cf4139b492e561634edcb Mon Sep 17 00:00:00 2001 From: "Wang, Eikan" Date: Fri, 3 Oct 2025 01:51:44 +0000 Subject: [PATCH 2/3] Update line number --- test/test_type_propagation.expected | 50 ++++++++++++++--------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/test/test_type_propagation.expected b/test/test_type_propagation.expected index d372dbd13..d484c512f 100644 --- a/test/test_type_propagation.expected +++ b/test/test_type_propagation.expected @@ -502,59 +502,59 @@ def use_device_properties(x: torch.Tensor): # Attribute: LiteralType(device(type='cuda', index=0)) AttributeOrigin(value=ArgumentOrigin(name='x'), key='device') # Name: TensorType([x_size0], torch.float32) ArgumentOrigin(name='x') device = x.device - # Call: ClassType({'multi_processor_count': SymIntType(u0)}) SourceOrigin(location=) + # Call: ClassType({'multi_processor_count': SymIntType(u0)}) SourceOrigin(location=) # Attribute: CallableType(get_device_properties) AttributeOrigin(value=AttributeOrigin(value=GlobalOrigin(name='torch'), key='cuda'), key='get_device_properties') # Attribute: PythonModuleType(torch.cuda) AttributeOrigin(value=GlobalOrigin(name='torch'), key='cuda') # Name: PythonModuleType(torch) GlobalOrigin(name='torch') # Name: LiteralType(device(type='cuda', index=0)) AttributeOrigin(value=ArgumentOrigin(name='x'), key='device') props = torch.cuda.get_device_properties(device) - # Attribute: SymIntType(u0) AttributeOrigin(value=SourceOrigin(location=), key='multi_processor_count') - # Name: ClassType({'multi_processor_count': SymIntType(u0)}) SourceOrigin(location=) + # Attribute: SymIntType(u0) AttributeOrigin(value=SourceOrigin(location=), key='multi_processor_count') + # Name: ClassType({'multi_processor_count': SymIntType(u0)}) SourceOrigin(location=) sm_count = props.multi_processor_count # Subscript: SymIntType(s77) GetItemOrigin(value=AttributeOrigin(value=ArgumentOrigin(name='x'), key='shape'), key=0) # Attribute: SequenceType((SymIntType(s77), )) AttributeOrigin(value=ArgumentOrigin(name='x'), key='shape') # Name: TensorType([x_size0], torch.float32) ArgumentOrigin(name='x') - # Constant: LiteralType(0) SourceOrigin(location=) + # Constant: LiteralType(0) SourceOrigin(location=) n = x.shape[0] - # Call: TensorType([x_size0], torch.float32) SourceOrigin(location=) + # Call: TensorType([x_size0], torch.float32) SourceOrigin(location=) # Attribute: CallableType(_VariableFunctionsClass.zeros_like) AttributeOrigin(value=GlobalOrigin(name='torch'), key='zeros_like') # Name: PythonModuleType(torch) GlobalOrigin(name='torch') # Name: TensorType([x_size0], torch.float32) ArgumentOrigin(name='x') # For: loop_type=GRID out = torch.zeros_like(x) - # Call: IterType(GridIndexType(0)) SourceOrigin(location=) + # Call: IterType(GridIndexType(0)) SourceOrigin(location=) # Attribute: CallableType(grid) AttributeOrigin(value=GlobalOrigin(name='hl'), key='grid') # Name: PythonModuleType(helion.language) GlobalOrigin(name='hl') - # Name: SymIntType(u0) AttributeOrigin(value=SourceOrigin(location=), key='multi_processor_count') + # Name: SymIntType(u0) AttributeOrigin(value=SourceOrigin(location=), key='multi_processor_count') # For: loop_type=DEVICE for worker_id in hl.grid(sm_count): - # Call: IterType(GridIndexType(1)) DeviceOrigin(location=) + # Call: IterType(GridIndexType(1)) DeviceOrigin(location=) # Attribute: CallableType(grid) AttributeOrigin(value=GlobalOrigin(name='hl'), key='grid') # Name: PythonModuleType(helion.language) GlobalOrigin(name='hl') # Name: SymIntType(s77) GetItemOrigin(value=AttributeOrigin(value=ArgumentOrigin(name='x'), key='shape'), key=0) for i in hl.grid(n): - # BinOp: SymIntType(u0*u4 + u2) DeviceOrigin(location=) - # Name: GridIndexType(0) SourceOrigin(location=) - # BinOp: SymIntType(u0*u4) DeviceOrigin(location=) - # Name: GridIndexType(1) DeviceOrigin(location=) - # Name: SymIntType(u0) AttributeOrigin(value=SourceOrigin(location=), key='multi_processor_count') + # BinOp: SymIntType(u0*u4 + u2) DeviceOrigin(location=) + # Name: GridIndexType(0) SourceOrigin(location=) + # BinOp: SymIntType(u0*u4) DeviceOrigin(location=) + # Name: GridIndexType(1) DeviceOrigin(location=) + # Name: SymIntType(u0) AttributeOrigin(value=SourceOrigin(location=), key='multi_processor_count') idx = worker_id + i * sm_count - # Compare: SymBoolType(Eq(u11, 1)) DeviceOrigin(location=) - # Name: SymIntType(u0*u4 + u2) DeviceOrigin(location=) + # Compare: SymBoolType(Eq(u11, 1)) DeviceOrigin(location=) + # Name: SymIntType(u0*u4 + u2) DeviceOrigin(location=) # Name: SymIntType(s77) GetItemOrigin(value=AttributeOrigin(value=ArgumentOrigin(name='x'), key='shape'), key=0) if idx < n: - # Subscript: TensorType([], torch.float32) DeviceOrigin(location=) - # Name: TensorType([x_size0], torch.float32) SourceOrigin(location=) - # Name: SymIntType(u0*u4 + u2) DeviceOrigin(location=) - # Subscript: TensorType([], torch.float32) DeviceOrigin(location=) + # Subscript: TensorType([], torch.float32) DeviceOrigin(location=) + # Name: TensorType([x_size0], torch.float32) SourceOrigin(location=) + # Name: SymIntType(u0*u4 + u2) DeviceOrigin(location=) + # Subscript: TensorType([], torch.float32) DeviceOrigin(location=) # Name: TensorType([x_size0], torch.float32) ArgumentOrigin(name='x') - # Name: SymIntType(u0*u4 + u2) DeviceOrigin(location=) + # Name: SymIntType(u0*u4 + u2) DeviceOrigin(location=) out[idx] = x[idx] - # Name: TensorType([x_size0], torch.float32) SourceOrigin(location=) + # Name: TensorType([x_size0], torch.float32) SourceOrigin(location=) return out def if_else_graph_0(): - # File: .../test_type_propagation.py:112 in use_device_properties, code: out[idx] = x[idx] + # File: .../test_type_propagation.py:113 in use_device_properties, code: out[idx] = x[idx] x: "f32[s77]" = helion_language__tracing_ops__host_tensor('x') symnode: "Sym(u0*u4 + u2)" = helion_language__tracing_ops__get_symnode('u0*u4 + u2') load: "f32[]" = helion_language_memory_ops_load(x, [symnode], None, None); x = None @@ -563,21 +563,21 @@ def if_else_graph_0(): return [] def for_loop_1(): - # File: .../test_type_propagation.py:110 in use_device_properties, code: idx = worker_id + i * sm_count + # File: .../test_type_propagation.py:111 in use_device_properties, code: idx = worker_id + i * sm_count u4: "Sym(u4)" = helion_language__tracing_ops__get_symnode('u4') u0: "Sym(u0)" = helion_language__tracing_ops__get_symnode('u0') mul: "Sym(u0*u4)" = u4 * u0; u4 = u0 = None u2: "Sym(u2)" = helion_language__tracing_ops__get_symnode('u2') add: "Sym(u0*u4 + u2)" = u2 + mul; u2 = mul = None - # File: .../test_type_propagation.py:111 in use_device_properties, code: if idx < n: + # File: .../test_type_propagation.py:112 in use_device_properties, code: if idx < n: x_size0: "Sym(s77)" = helion_language__tracing_ops__get_symnode('x_size0') lt: "Sym(u0*u4 + u2 < s77)" = add < x_size0; add = x_size0 = None _if = helion_language__tracing_ops__if(lt, 0, []); lt = _if = None return [] def root_graph_2(): - # File: .../test_type_propagation.py:109 in use_device_properties, code: for i in hl.grid(n): + # File: .../test_type_propagation.py:110 in use_device_properties, code: for i in hl.grid(n): x_size0: "Sym(s77)" = helion_language__tracing_ops__get_symnode('x_size0') _for_loop = helion_language__tracing_ops__for_loop(1, [0], [x_size0], []); x_size0 = _for_loop = None return None From 4ec76e60512915803285821dfcc51fc0cbf12d68 Mon Sep 17 00:00:00 2001 From: "Wang, Eikan" Date: Fri, 3 Oct 2025 06:21:55 +0000 Subject: [PATCH 3/3] Update the line number information --- test/test_type_propagation.expected | 110 ++++++++++++++-------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/test/test_type_propagation.expected b/test/test_type_propagation.expected index d484c512f..d78885f5d 100644 --- a/test/test_type_propagation.expected +++ b/test/test_type_propagation.expected @@ -454,42 +454,42 @@ def root_graph_0(): --- assertExpectedJournal(TestTypePropagation.test_and_between_optional_tensors) def kernel(t: torch.Tensor, c: torch.Tensor | None=None, d: torch.Tensor | None=None): - # Call: TensorType([t_size0], torch.float32) SourceOrigin(location=) + # Call: TensorType([t_size0], torch.float32) SourceOrigin(location=) # Attribute: CallableType(_VariableFunctionsClass.empty_like) AttributeOrigin(value=GlobalOrigin(name='torch'), key='empty_like') # Name: PythonModuleType(torch) GlobalOrigin(name='torch') # Name: TensorType([t_size0], torch.float32) ArgumentOrigin(name='t') # For: loop_type=GRID a = torch.empty_like(t) - # Call: IterType(TileIndexType(0)) SourceOrigin(location=) + # Call: IterType(TileIndexType(0)) SourceOrigin(location=) # Attribute: CallableType(tile) AttributeOrigin(value=GlobalOrigin(name='hl'), key='tile') # Name: PythonModuleType(helion.language) GlobalOrigin(name='hl') - # Call: SymIntType(s13) SourceOrigin(location=) - # Attribute: TensorAttributeType AttributeOrigin(value=SourceOrigin(location=), key='size') - # Name: TensorType([t_size0], torch.float32) SourceOrigin(location=) - # Constant: LiteralType(0) SourceOrigin(location=) + # Call: SymIntType(s13) SourceOrigin(location=) + # Attribute: TensorAttributeType AttributeOrigin(value=SourceOrigin(location=), key='size') + # Name: TensorType([t_size0], torch.float32) SourceOrigin(location=) + # Constant: LiteralType(0) SourceOrigin(location=) for h in hl.tile(a.size(0)): - # BoolOp: LiteralType(False) DeviceOrigin(location=) - # Compare: LiteralType(False) DeviceOrigin(location=) + # BoolOp: LiteralType(False) DeviceOrigin(location=) + # Compare: LiteralType(False) DeviceOrigin(location=) # Name: LiteralType(None) ArgumentOrigin(name='c') - # Constant: LiteralType(None) DeviceOrigin(location=) - # Compare: LiteralType(False) DeviceOrigin(location=) + # Constant: LiteralType(None) DeviceOrigin(location=) + # Compare: LiteralType(False) DeviceOrigin(location=) # Name: LiteralType(None) ArgumentOrigin(name='d') - # Constant: LiteralType(None) DeviceOrigin(location=) + # Constant: LiteralType(None) DeviceOrigin(location=) if c is not None and d is not None: a[h] = t[h] + c[h] + d[h] else: - # Subscript: TensorType([block_size_0], torch.float32) DeviceOrigin(location=) - # Name: TensorType([t_size0], torch.float32) SourceOrigin(location=) - # Name: TileIndexType(0) SourceOrigin(location=) - # Subscript: TensorType([block_size_0], torch.float32) DeviceOrigin(location=) + # Subscript: TensorType([block_size_0], torch.float32) DeviceOrigin(location=) + # Name: TensorType([t_size0], torch.float32) SourceOrigin(location=) + # Name: TileIndexType(0) SourceOrigin(location=) + # Subscript: TensorType([block_size_0], torch.float32) DeviceOrigin(location=) # Name: TensorType([t_size0], torch.float32) ArgumentOrigin(name='t') - # Name: TileIndexType(0) SourceOrigin(location=) + # Name: TileIndexType(0) SourceOrigin(location=) a[h] = t[h] - # Name: TensorType([t_size0], torch.float32) SourceOrigin(location=) + # Name: TensorType([t_size0], torch.float32) SourceOrigin(location=) return a def root_graph_0(): - # File: .../test_type_propagation.py:148 in kernel, code: a[h] = t[h] + # File: .../test_type_propagation.py:151 in kernel, code: a[h] = t[h] t: "f32[s13]" = helion_language__tracing_ops__host_tensor('t') block_size_0: "Sym(u0)" = helion_language__tracing_ops__get_symnode('block_size_0') load: "f32[u0]" = helion_language_memory_ops_load(t, [block_size_0], None, None); t = None @@ -502,59 +502,59 @@ def use_device_properties(x: torch.Tensor): # Attribute: LiteralType(device(type='cuda', index=0)) AttributeOrigin(value=ArgumentOrigin(name='x'), key='device') # Name: TensorType([x_size0], torch.float32) ArgumentOrigin(name='x') device = x.device - # Call: ClassType({'multi_processor_count': SymIntType(u0)}) SourceOrigin(location=) + # Call: ClassType({'multi_processor_count': SymIntType(u0)}) SourceOrigin(location=) # Attribute: CallableType(get_device_properties) AttributeOrigin(value=AttributeOrigin(value=GlobalOrigin(name='torch'), key='cuda'), key='get_device_properties') # Attribute: PythonModuleType(torch.cuda) AttributeOrigin(value=GlobalOrigin(name='torch'), key='cuda') # Name: PythonModuleType(torch) GlobalOrigin(name='torch') # Name: LiteralType(device(type='cuda', index=0)) AttributeOrigin(value=ArgumentOrigin(name='x'), key='device') props = torch.cuda.get_device_properties(device) - # Attribute: SymIntType(u0) AttributeOrigin(value=SourceOrigin(location=), key='multi_processor_count') - # Name: ClassType({'multi_processor_count': SymIntType(u0)}) SourceOrigin(location=) + # Attribute: SymIntType(u0) AttributeOrigin(value=SourceOrigin(location=), key='multi_processor_count') + # Name: ClassType({'multi_processor_count': SymIntType(u0)}) SourceOrigin(location=) sm_count = props.multi_processor_count # Subscript: SymIntType(s77) GetItemOrigin(value=AttributeOrigin(value=ArgumentOrigin(name='x'), key='shape'), key=0) # Attribute: SequenceType((SymIntType(s77), )) AttributeOrigin(value=ArgumentOrigin(name='x'), key='shape') # Name: TensorType([x_size0], torch.float32) ArgumentOrigin(name='x') - # Constant: LiteralType(0) SourceOrigin(location=) + # Constant: LiteralType(0) SourceOrigin(location=) n = x.shape[0] - # Call: TensorType([x_size0], torch.float32) SourceOrigin(location=) + # Call: TensorType([x_size0], torch.float32) SourceOrigin(location=) # Attribute: CallableType(_VariableFunctionsClass.zeros_like) AttributeOrigin(value=GlobalOrigin(name='torch'), key='zeros_like') # Name: PythonModuleType(torch) GlobalOrigin(name='torch') # Name: TensorType([x_size0], torch.float32) ArgumentOrigin(name='x') # For: loop_type=GRID out = torch.zeros_like(x) - # Call: IterType(GridIndexType(0)) SourceOrigin(location=) + # Call: IterType(GridIndexType(0)) SourceOrigin(location=) # Attribute: CallableType(grid) AttributeOrigin(value=GlobalOrigin(name='hl'), key='grid') # Name: PythonModuleType(helion.language) GlobalOrigin(name='hl') - # Name: SymIntType(u0) AttributeOrigin(value=SourceOrigin(location=), key='multi_processor_count') + # Name: SymIntType(u0) AttributeOrigin(value=SourceOrigin(location=), key='multi_processor_count') # For: loop_type=DEVICE for worker_id in hl.grid(sm_count): - # Call: IterType(GridIndexType(1)) DeviceOrigin(location=) + # Call: IterType(GridIndexType(1)) DeviceOrigin(location=) # Attribute: CallableType(grid) AttributeOrigin(value=GlobalOrigin(name='hl'), key='grid') # Name: PythonModuleType(helion.language) GlobalOrigin(name='hl') # Name: SymIntType(s77) GetItemOrigin(value=AttributeOrigin(value=ArgumentOrigin(name='x'), key='shape'), key=0) for i in hl.grid(n): - # BinOp: SymIntType(u0*u4 + u2) DeviceOrigin(location=) - # Name: GridIndexType(0) SourceOrigin(location=) - # BinOp: SymIntType(u0*u4) DeviceOrigin(location=) - # Name: GridIndexType(1) DeviceOrigin(location=) - # Name: SymIntType(u0) AttributeOrigin(value=SourceOrigin(location=), key='multi_processor_count') + # BinOp: SymIntType(u0*u4 + u2) DeviceOrigin(location=) + # Name: GridIndexType(0) SourceOrigin(location=) + # BinOp: SymIntType(u0*u4) DeviceOrigin(location=) + # Name: GridIndexType(1) DeviceOrigin(location=) + # Name: SymIntType(u0) AttributeOrigin(value=SourceOrigin(location=), key='multi_processor_count') idx = worker_id + i * sm_count - # Compare: SymBoolType(Eq(u11, 1)) DeviceOrigin(location=) - # Name: SymIntType(u0*u4 + u2) DeviceOrigin(location=) + # Compare: SymBoolType(Eq(u11, 1)) DeviceOrigin(location=) + # Name: SymIntType(u0*u4 + u2) DeviceOrigin(location=) # Name: SymIntType(s77) GetItemOrigin(value=AttributeOrigin(value=ArgumentOrigin(name='x'), key='shape'), key=0) if idx < n: - # Subscript: TensorType([], torch.float32) DeviceOrigin(location=) - # Name: TensorType([x_size0], torch.float32) SourceOrigin(location=) - # Name: SymIntType(u0*u4 + u2) DeviceOrigin(location=) - # Subscript: TensorType([], torch.float32) DeviceOrigin(location=) + # Subscript: TensorType([], torch.float32) DeviceOrigin(location=) + # Name: TensorType([x_size0], torch.float32) SourceOrigin(location=) + # Name: SymIntType(u0*u4 + u2) DeviceOrigin(location=) + # Subscript: TensorType([], torch.float32) DeviceOrigin(location=) # Name: TensorType([x_size0], torch.float32) ArgumentOrigin(name='x') - # Name: SymIntType(u0*u4 + u2) DeviceOrigin(location=) + # Name: SymIntType(u0*u4 + u2) DeviceOrigin(location=) out[idx] = x[idx] - # Name: TensorType([x_size0], torch.float32) SourceOrigin(location=) + # Name: TensorType([x_size0], torch.float32) SourceOrigin(location=) return out def if_else_graph_0(): - # File: .../test_type_propagation.py:113 in use_device_properties, code: out[idx] = x[idx] + # File: .../test_type_propagation.py:114 in use_device_properties, code: out[idx] = x[idx] x: "f32[s77]" = helion_language__tracing_ops__host_tensor('x') symnode: "Sym(u0*u4 + u2)" = helion_language__tracing_ops__get_symnode('u0*u4 + u2') load: "f32[]" = helion_language_memory_ops_load(x, [symnode], None, None); x = None @@ -563,21 +563,21 @@ def if_else_graph_0(): return [] def for_loop_1(): - # File: .../test_type_propagation.py:111 in use_device_properties, code: idx = worker_id + i * sm_count + # File: .../test_type_propagation.py:112 in use_device_properties, code: idx = worker_id + i * sm_count u4: "Sym(u4)" = helion_language__tracing_ops__get_symnode('u4') u0: "Sym(u0)" = helion_language__tracing_ops__get_symnode('u0') mul: "Sym(u0*u4)" = u4 * u0; u4 = u0 = None u2: "Sym(u2)" = helion_language__tracing_ops__get_symnode('u2') add: "Sym(u0*u4 + u2)" = u2 + mul; u2 = mul = None - # File: .../test_type_propagation.py:112 in use_device_properties, code: if idx < n: + # File: .../test_type_propagation.py:113 in use_device_properties, code: if idx < n: x_size0: "Sym(s77)" = helion_language__tracing_ops__get_symnode('x_size0') lt: "Sym(u0*u4 + u2 < s77)" = add < x_size0; add = x_size0 = None _if = helion_language__tracing_ops__if(lt, 0, []); lt = _if = None return [] def root_graph_2(): - # File: .../test_type_propagation.py:110 in use_device_properties, code: for i in hl.grid(n): + # File: .../test_type_propagation.py:111 in use_device_properties, code: for i in hl.grid(n): x_size0: "Sym(s77)" = helion_language__tracing_ops__get_symnode('x_size0') _for_loop = helion_language__tracing_ops__for_loop(1, [0], [x_size0], []); x_size0 = _for_loop = None return None @@ -823,33 +823,33 @@ def root_graph_1(): --- assertExpectedJournal(TestTypePropagation.test_method_call) def fn(x): - # Call: TensorType([x_size0, x_size1], torch.int32) SourceOrigin(location=) + # Call: TensorType([x_size0, x_size1], torch.int32) SourceOrigin(location=) # Attribute: CallableType(_VariableFunctionsClass.empty_like) AttributeOrigin(value=GlobalOrigin(name='torch'), key='empty_like') # Name: PythonModuleType(torch) GlobalOrigin(name='torch') # Name: TensorType([x_size0, x_size1], torch.int32) ArgumentOrigin(name='x') # For: loop_type=GRID out = torch.empty_like(x) - # Call: IterType(SequenceType((TileIndexType(0), TileIndexType(1)))) SourceOrigin(location=) + # Call: IterType(SequenceType((TileIndexType(0), TileIndexType(1)))) SourceOrigin(location=) # Attribute: CallableType(tile) AttributeOrigin(value=GlobalOrigin(name='hl'), key='tile') # Name: PythonModuleType(helion.language) GlobalOrigin(name='hl') - # Call: SequenceType((SymIntType(s77), SymIntType(s27))) SourceOrigin(location=) + # Call: SequenceType((SymIntType(s77), SymIntType(s27))) SourceOrigin(location=) # Attribute: TensorAttributeType AttributeOrigin(value=ArgumentOrigin(name='x'), key='size') # Name: TensorType([x_size0, x_size1], torch.int32) ArgumentOrigin(name='x') for tile in hl.tile(x.size()): - # Subscript: TensorType([block_size_0, block_size_1], torch.int32) DeviceOrigin(location=) - # Name: TensorType([x_size0, x_size1], torch.int32) SourceOrigin(location=) - # Name: SequenceType((TileIndexType(0), TileIndexType(1))) SourceOrigin(location=) - # Call: TensorType([block_size_0, block_size_1], torch.float32) DeviceOrigin(location=) - # Attribute: TensorAttributeType AttributeOrigin(value=DeviceOrigin(location=), key='sin') - # Subscript: TensorType([block_size_0, block_size_1], torch.int32) DeviceOrigin(location=) + # Subscript: TensorType([block_size_0, block_size_1], torch.int32) DeviceOrigin(location=) + # Name: TensorType([x_size0, x_size1], torch.int32) SourceOrigin(location=) + # Name: SequenceType((TileIndexType(0), TileIndexType(1))) SourceOrigin(location=) + # Call: TensorType([block_size_0, block_size_1], torch.float32) DeviceOrigin(location=) + # Attribute: TensorAttributeType AttributeOrigin(value=DeviceOrigin(location=), key='sin') + # Subscript: TensorType([block_size_0, block_size_1], torch.int32) DeviceOrigin(location=) # Name: TensorType([x_size0, x_size1], torch.int32) ArgumentOrigin(name='x') - # Name: SequenceType((TileIndexType(0), TileIndexType(1))) SourceOrigin(location=) + # Name: SequenceType((TileIndexType(0), TileIndexType(1))) SourceOrigin(location=) out[tile] = x[tile].sin() - # Name: TensorType([x_size0, x_size1], torch.int32) SourceOrigin(location=) + # Name: TensorType([x_size0, x_size1], torch.int32) SourceOrigin(location=) return out def root_graph_0(): - # File: .../test_type_propagation.py:81 in fn, code: out[tile] = x[tile].sin() + # File: .../test_type_propagation.py:82 in fn, code: out[tile] = x[tile].sin() x: "i32[s77, s27]" = helion_language__tracing_ops__host_tensor('x') block_size_0: "Sym(u0)" = helion_language__tracing_ops__get_symnode('block_size_0') block_size_1: "Sym(u1)" = helion_language__tracing_ops__get_symnode('block_size_1')