From 8d678c16e056bc74bd9f16779ad568909f0da480 Mon Sep 17 00:00:00 2001 From: luciechoi Date: Tue, 4 Nov 2025 21:01:51 +0000 Subject: [PATCH 1/2] Fix simplifycfg pass --- llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 9 +++ ...rging-duplicate-convergence-instrinsics.ll | 68 +++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 11db0ec487328..c1b6140abb471 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -230,6 +230,15 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, // Don't break self-loops. if (PredBB == BB) return false; + // Don't break if both the basic block and the predecessor contain convergent + // intrinsics. + for (Instruction &I : *BB) + if (isa(I)) { + for (Instruction &I : *PredBB) + if (isa(I)) + return false; + } + // Don't break unwinding instructions or terminators with other side-effects. Instruction *PTI = PredBB->getTerminator(); if (PTI->isSpecialTerminator() || PTI->mayHaveSideEffects()) diff --git a/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll b/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll new file mode 100644 index 0000000000000..d5ae64f6897e3 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes=simplifycfg | FileCheck %s + +declare token @llvm.experimental.convergence.entry() #0 + +define void @nested(i32 %tidx, i32 %tidy, ptr %array) #0 { +; CHECK-LABEL: @nested( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TIDY:%.*]], [[TIDX:%.*]] +; CHECK-NEXT: [[OR_COND_I:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br label [[FOR_COND_I:%.*]] +; CHECK: for.cond.i: +; CHECK-NEXT: [[TMP2:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP0]]) ] +; CHECK-NEXT: br label [[FOR_COND1_I:%.*]] +; CHECK: for.cond1.i: +; CHECK-NEXT: [[CMP2_I:%.*]] = phi i1 [ false, [[FOR_BODY4_I:%.*]] ], [ true, [[FOR_COND_I]] ] +; CHECK-NEXT: [[TMP3:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP2]]) ] +; CHECK-NEXT: br i1 [[CMP2_I]], label [[FOR_BODY4_I]], label [[EXIT:%.*]] +; CHECK: for.body4.i: +; CHECK-NEXT: br i1 [[OR_COND_I]], label [[IF_THEN_I:%.*]], label [[FOR_COND1_I]] +; CHECK: if.then.i: +; CHECK-NEXT: [[HLSL_WAVE_ACTIVE_MAX7_I:%.*]] = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 0) [ "convergencectrl"(token [[TMP3]]) ] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i32 0 +; CHECK-NEXT: store i32 [[HLSL_WAVE_ACTIVE_MAX7_I]], ptr [[TMP4]], align 4 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %0 = tail call token @llvm.experimental.convergence.entry() + %2 = or i32 %tidy, %tidx + %or.cond.i = icmp eq i32 %2, 0 + br label %for.cond.i + +for.cond.i: + %3 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ] + br label %for.cond1.i + +for.cond1.i: + %cmp2.i = phi i1 [ false, %for.body4.i ], [ true, %for.cond.i ] + %4 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %3) ] + br i1 %cmp2.i, label %for.body4.i, label %cleanup.i.loopexit + +for.body4.i: + br i1 %or.cond.i, label %if.then.i, label %for.cond1.i + +if.then.i: + %hlsl.wave.active.max7.i = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 0) [ "convergencectrl"(token %4) ] + %5 = getelementptr inbounds i32, ptr %array, i32 0 + store i32 %hlsl.wave.active.max7.i, ptr %5, align 4 + br label %cleanup.i + +cleanup.i.loopexit: + br label %cleanup.i + +cleanup.i: + br label %exit + +exit: + ret void +} + +declare token @llvm.experimental.convergence.loop() #0 + +declare i32 @llvm.spv.wave.reduce.umax.i32(i32) #0 + +attributes #0 = { convergent } From 0cad7f0ef150783d1bd5f15da9d6d51bb87045af Mon Sep 17 00:00:00 2001 From: luciechoi Date: Tue, 18 Nov 2025 19:58:19 +0000 Subject: [PATCH 2/2] Remove hlsl&spv specific functions --- .../skip-merging-duplicate-convergence-instrinsics.ll | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll b/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll index d5ae64f6897e3..368ae96d0c3c2 100644 --- a/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll +++ b/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll @@ -20,9 +20,9 @@ define void @nested(i32 %tidx, i32 %tidy, ptr %array) #0 { ; CHECK: for.body4.i: ; CHECK-NEXT: br i1 [[OR_COND_I]], label [[IF_THEN_I:%.*]], label [[FOR_COND1_I]] ; CHECK: if.then.i: -; CHECK-NEXT: [[HLSL_WAVE_ACTIVE_MAX7_I:%.*]] = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 0) [ "convergencectrl"(token [[TMP3]]) ] +; CHECK-NEXT: [[TEST_VAL:%.*]] = call spir_func i32 @func_test(i32 0) [ "convergencectrl"(token [[TMP3]]) ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i32 0 -; CHECK-NEXT: store i32 [[HLSL_WAVE_ACTIVE_MAX7_I]], ptr [[TMP4]], align 4 +; CHECK-NEXT: store i32 [[TEST_VAL]], ptr [[TMP4]], align 4 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -46,9 +46,9 @@ for.body4.i: br i1 %or.cond.i, label %if.then.i, label %for.cond1.i if.then.i: - %hlsl.wave.active.max7.i = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 0) [ "convergencectrl"(token %4) ] + %test.val = call spir_func i32 @func_test(i32 0) [ "convergencectrl"(token %4) ] %5 = getelementptr inbounds i32, ptr %array, i32 0 - store i32 %hlsl.wave.active.max7.i, ptr %5, align 4 + store i32 %test.val, ptr %5, align 4 br label %cleanup.i cleanup.i.loopexit: @@ -63,6 +63,6 @@ exit: declare token @llvm.experimental.convergence.loop() #0 -declare i32 @llvm.spv.wave.reduce.umax.i32(i32) #0 +declare i32 @func_test(i32) #0 attributes #0 = { convergent }