diff --git a/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp b/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp index 384d1a0ddccd2..be71bd02fc43b 100644 --- a/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp +++ b/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/IR/Value.h" +#include "llvm/ADT/DenseMap.h" #include @@ -57,26 +58,29 @@ WarpDistributionPattern::moveRegionToNewWarpOpAndAppendReturns( warpOp.getResultTypes().end()); auto yield = cast( warpOp.getBodyRegion().getBlocks().begin()->getTerminator()); - llvm::SmallSetVector yieldValues(yield.getOperands().begin(), - yield.getOperands().end()); + SmallVector yieldValues(yield.getOperands().begin(), + yield.getOperands().end()); + llvm::SmallDenseMap indexLookup; + // Record the value -> first index mapping for faster lookup. + for (auto [i, v] : llvm::enumerate(yieldValues)) { + if (!indexLookup.count(v)) + indexLookup[v] = i; + } + for (auto [value, type] : llvm::zip_equal(newYieldedValues, newReturnTypes)) { - if (yieldValues.insert(value)) { + // If the value already exists in the yield, don't create a new output. + if (indexLookup.count(value)) { + indices.push_back(indexLookup[value]); + } else { + // If the value is new, add it to the yield and to the types. + yieldValues.push_back(value); types.push_back(type); indices.push_back(yieldValues.size() - 1); - } else { - // If the value already exit the region don't create a new output. - for (auto [idx, yieldOperand] : - llvm::enumerate(yieldValues.getArrayRef())) { - if (yieldOperand == value) { - indices.push_back(idx); - break; - } - } } } - yieldValues.insert_range(newYieldedValues); + WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndReplaceReturns( - rewriter, warpOp, yieldValues.getArrayRef(), types); + rewriter, warpOp, yieldValues, types); rewriter.replaceOp(warpOp, newWarpOp.getResults().take_front(warpOp.getNumResults())); return newWarpOp; diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir index ae8fce786ee57..c3ce7e9ca7fda 100644 --- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir +++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir @@ -1803,3 +1803,24 @@ func.func @warp_propagate_nd_write(%laneid: index, %dest: memref<4x1024xf32>) { // CHECK-DIST-AND-PROP: %[[IDS:.+]]:2 = affine.delinearize_index %{{.*}} into (4, 8) : index, index // CHECK-DIST-AND-PROP: %[[INNER_ID:.+]] = affine.apply #map()[%[[IDS]]#1] // CHECK-DIST-AND-PROP: vector.transfer_write %[[W]], %{{.*}}[%[[IDS]]#0, %[[INNER_ID]]] {{.*}} : vector<1x128xf32> + +// ----- +func.func @warp_propagate_duplicated_operands_in_yield(%laneid: index) { + %r:3 = gpu.warp_execute_on_lane_0(%laneid)[32] -> (vector<1xf32>, vector<1xf32>, vector<1xf32>) { + %0 = "some_def"() : () -> (vector<32xf32>) + %1 = "some_other_def"() : () -> (vector<32xf32>) + %2 = math.exp %1 : vector<32xf32> + gpu.yield %2, %0, %0 : vector<32xf32>, vector<32xf32>, vector<32xf32> + } + "some_use"(%r#0) : (vector<1xf32>) -> () + return +} + +// CHECK-PROP-LABEL : func.func @warp_propagate_duplicated_operands_in_yield( +// CHECK-PROP : %[[W:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[32] -> (vector<1xf32>) { +// CHECK-PROP : %{{.*}} = "some_def"() : () -> vector<32xf32> +// CHECK-PROP : %[[T3:.*]] = "some_other_def"() : () -> vector<32xf32> +// CHECK-PROP : gpu.yield %[[T3]] : vector<32xf32> +// CHECK-PROP : } +// CHECK-PROP : %[T1:.*] = math.exp %[[W]] : vector<1xf32> +// CHECK-PROP : "some_use"(%[[T1]]) : (vector<1xf32>) -> ()