Skip to content

Commit

Permalink
Fixing broken fill builtins that were double offsetting. (iree-org#17696
Browse files Browse the repository at this point in the history
)

This had been broken since inception for any fill that was not starting
at offset 0 of the target resource. By taking the offset as an argument
subsequent dispatch operand packing would offset by the binding range.
  • Loading branch information
benvanik committed Jun 19, 2024
1 parent 1997902 commit 7c41049
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

// stream.builtin.fill.i16
// Writes the i16 %value %count times at byte %offset of %out_binding.
// Writes the i16 %value %count times in the bound range of %out_binding.

stream.executable private @__builtin_fill_i16 {
stream.executable.export public @__builtin_fill_i16 workgroups(%arg0: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @__builtin_fill_i16(%value: i16, %offset: index, %count: index, %out_binding: !stream.binding) {
%out = stream.binding.subspan %out_binding[%offset] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi16>>{%count}
func.func @__builtin_fill_i16(%value: i16, %count: index, %out_binding: !stream.binding) {
%c0 = arith.constant 0 : index
%out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi16>>{%count}
%0 = tensor.empty(%count) : tensor<?xi16>
%1 = linalg.fill ins(%value : i16) outs(%0 : tensor<?xi16>) -> tensor<?xi16>
flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor<?xi16> -> !flow.dispatch.tensor<writeonly:tensor<?xi16>>{%count}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

// stream.builtin.fill.i32
// Writes the i32 %value %count times at byte %offset of %out_binding.
// Writes the i32 %value %count times in the bound range of %out_binding.

stream.executable private @__builtin_fill_i32 {
stream.executable.export public @__builtin_fill_i32 workgroups(%arg0: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @__builtin_fill_i32(%value: i32, %offset: index, %count: index, %out_binding: !stream.binding) {
%out = stream.binding.subspan %out_binding[%offset] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%count}
func.func @__builtin_fill_i32(%value: i32, %count: index, %out_binding: !stream.binding) {
%c0 = arith.constant 0 : index
%out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%count}
%0 = tensor.empty(%count) : tensor<?xi32>
%1 = linalg.fill ins(%value : i32) outs(%0 : tensor<?xi32>) -> tensor<?xi32>
flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor<?xi32> -> !flow.dispatch.tensor<writeonly:tensor<?xi32>>{%count}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

// stream.builtin.fill.i64
// Writes the i64 %value %count times at byte %offset of %out_binding.
// Writes the i64 %value %count times in the bound range of %out_binding.

stream.executable private @__builtin_fill_i64 {
stream.executable.export public @__builtin_fill_i64 workgroups(%arg0: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @__builtin_fill_i64(%value: i64, %offset: index, %count: index, %out_binding: !stream.binding) {
%out = stream.binding.subspan %out_binding[%offset] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi64>>{%count}
func.func @__builtin_fill_i64(%value: i64, %count: index, %out_binding: !stream.binding) {
%c0 = arith.constant 0 : index
%out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi64>>{%count}
%0 = tensor.empty(%count) : tensor<?xi64>
%1 = linalg.fill ins(%value : i64) outs(%0 : tensor<?xi64>) -> tensor<?xi64>
flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor<?xi64> -> !flow.dispatch.tensor<writeonly:tensor<?xi64>>{%count}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

// stream.builtin.fill.i8
// Writes the i8 %value %count times at byte %offset of %out_binding.
// Writes the i8 %value %count times in the bound range of %out_binding.

stream.executable private @__builtin_fill_i8 {
stream.executable.export public @__builtin_fill_i8 workgroups(%arg0: index) -> (index, index, index) {
%x, %y, %z = flow.dispatch.workgroup_count_from_dag_root %arg0
stream.return %x, %y, %z : index, index, index
}
builtin.module {
func.func @__builtin_fill_i8(%value: i8, %offset: index, %count: index, %out_binding: !stream.binding) {
%out = stream.binding.subspan %out_binding[%offset] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%count}
func.func @__builtin_fill_i8(%value: i8, %count: index, %out_binding: !stream.binding) {
%c0 = arith.constant 0 : index
%out = stream.binding.subspan %out_binding[%c0] : !stream.binding -> !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%count}
%0 = tensor.empty(%count) : tensor<?xi8>
%1 = linalg.fill ins(%value : i8) outs(%0 : tensor<?xi8>) -> tensor<?xi8>
flow.dispatch.tensor.store %1, %out, offsets = [0], sizes = [%count], strides = [1] : tensor<?xi8> -> !flow.dispatch.tensor<writeonly:tensor<?xi8>>{%count}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,6 @@ static LogicalResult replaceBuiltinFillOp(IREE::Stream::AsyncFillOp fillOp,
SmallVector<Value> operands = {
fillOp.getTarget(),
pattern,
fillOp.getTargetOffset(),
elementCount,
};
SmallVector<Value> operandSizes = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ util.func public @builtinSplatI64(%arg0: index, %arg1: i64) -> !stream.resource<
// CHECK-SAME: (%[[RES:.+]]: !stream.resource<*>, %[[SIZE:.+]]: index, %[[VALUE:.+]]: i64, %[[BYTE_OFFSET:.+]]: index, %[[BYTE_END:.+]]: index, %[[BYTE_LENGTH:.+]]: index)
util.func public @builtinFillI64(%res: !stream.resource<*>, %size: index, %value: i64, %byte_offset: index, %byte_end: index, %byte_length: index) -> !stream.resource<*> {
// CHECK: %[[COUNT:.+]] = arith.divui %[[BYTE_LENGTH]], %c8
// CHECK: %[[RET:.+]] = stream.async.dispatch @__builtin_fill_i64::@__builtin_fill_i64[%[[COUNT]]](%[[RES]][%[[BYTE_OFFSET]] to %[[BYTE_END]] for %[[BYTE_LENGTH]]], %[[VALUE]], %[[BYTE_OFFSET]], %[[COUNT]]) : (!stream.resource<*>{%[[SIZE]]}, i64, index, index) -> %[[RES]]{%[[SIZE]]}
// CHECK: %[[RET:.+]] = stream.async.dispatch @__builtin_fill_i64::@__builtin_fill_i64[%[[COUNT]]](%[[RES]][%[[BYTE_OFFSET]] to %[[BYTE_END]] for %[[BYTE_LENGTH]]], %[[VALUE]], %[[COUNT]]) : (!stream.resource<*>{%[[SIZE]]}, i64, index) -> %[[RES]]{%[[SIZE]]}
%0 = stream.async.fill %value, %res[%byte_offset to %byte_end for %byte_length] : i64 -> %arg0 as !stream.resource<*>{%size}
// CHECK: util.return %[[RET]]
util.return %0 : !stream.resource<*>
Expand Down

0 comments on commit 7c41049

Please sign in to comment.