diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index 5c7df25c58cde..4884541a60535 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -3252,6 +3252,7 @@ def GPU_SubgroupBroadcastOp : GPU_Op<"subgroup_broadcast", let assemblyFormat = [{ $src `,` $broadcast_type ($lane^)? attr-dict `:` type($result) }]; + let hasFolder = 1; let hasVerifier = 1; } diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 61a630aa88960..21c0d369b8d1c 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -2536,6 +2536,14 @@ LogicalResult gpu::SubgroupBroadcastOp::verify() { } } +OpFoldResult gpu::SubgroupBroadcastOp::fold(FoldAdaptor /*adaptor*/) { + // Broadcast result is always uniform. + if (auto prev = getSrc().getDefiningOp()) + return prev.getResult(); + + return nullptr; +} + //===----------------------------------------------------------------------===// // GPU KernelMetadataAttr //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/GPU/canonicalize.mlir b/mlir/test/Dialect/GPU/canonicalize.mlir index 33ce98e6da0ed..2589976c306d9 100644 --- a/mlir/test/Dialect/GPU/canonicalize.mlir +++ b/mlir/test/Dialect/GPU/canonicalize.mlir @@ -289,3 +289,26 @@ func.func @gpu_launch_without_side_effects() { } return } + +// ----- + + +// CHECK-LABEL: func @broadcast_of_broadcast1 +// CHECK-SAME: (%[[VALUE:.*]]: f32, %[[LANE:.*]]: i32) +// CHECK: %[[RES:.*]] = gpu.subgroup_broadcast %[[VALUE]], first_active_lane : f32 +// CHECK: return %[[RES:.*]] +func.func @broadcast_of_broadcast1(%value : f32, %lane : i32) -> f32 { + %0 = gpu.subgroup_broadcast %value, first_active_lane : f32 + %1 = gpu.subgroup_broadcast %0, specific_lane %lane : f32 + return %1 : f32 +} + +// CHECK-LABEL: func @broadcast_of_broadcast2 +// CHECK-SAME: (%[[VALUE:.*]]: f32, %[[LANE:.*]]: i32) +// CHECK: %[[RES:.*]] = gpu.subgroup_broadcast %[[VALUE]], specific_lane %[[LANE]] : f32 +// CHECK: return %[[RES:.*]] +func.func @broadcast_of_broadcast2(%value : f32, %lane : i32) -> f32 { + %0 = gpu.subgroup_broadcast %value, specific_lane %lane : f32 + %1 = gpu.subgroup_broadcast %0, first_active_lane : f32 + return %1 : f32 +}