From 21ed39b275ad24e8b958c9f84952b2340640dde8 Mon Sep 17 00:00:00 2001 From: XChy Date: Mon, 1 Dec 2025 22:25:04 +0800 Subject: [PATCH 1/4] [MachineBasicBlock] Don't split loop header if the terminator is unanalyzable --- llvm/lib/CodeGen/MachineBasicBlock.cpp | 12 ++--- llvm/test/CodeGen/NVPTX/switch.ll | 73 ++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/switch.ll diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index ba0b025167307..10bf18b7fcb6d 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1425,14 +1425,13 @@ bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ, // where both sides of the branches are always executed. if (MF->getTarget().requiresStructuredCFG()) { + if (!MLI) + return false; + const MachineLoop *L = MLI->getLoopFor(Succ); // If `Succ` is a loop header, splitting the critical edge will not // break structured CFG. - if (MLI) { - const MachineLoop *L = MLI->getLoopFor(Succ); - return L && L->getHeader() == Succ; - } - - return false; + if (!L || L->getHeader() != Succ) + return false; } // Do we have an Indirect jump with a jumptable that we can rewrite? @@ -1459,6 +1458,7 @@ bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ, << printMBBReference(*this) << '\n'); return false; } + return true; } diff --git a/llvm/test/CodeGen/NVPTX/switch.ll b/llvm/test/CodeGen/NVPTX/switch.ll new file mode 100644 index 0000000000000..7fcfcfbb85d00 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/switch.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mcpu=sm_20 -verify-machineinstrs | FileCheck %s + +target triple = "nvptx64-unknown-nvidiacl" + +define void @pr170051(i32 %cond) { +; CHECK-LABEL: pr170051( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: mov.b32 %r2, 0; +; CHECK-NEXT: ld.param.b32 %r1, [pr170051_param_0]; +; CHECK-NEXT: setp.gt.u32 %p1, %r1, 6; +; CHECK-NEXT: bra.uni $L__BB0_3; +; CHECK-NEXT: $L__BB0_1: // %BS_LABEL_2 +; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: or.b32 %r3, %r2, 1; +; CHECK-NEXT: $L__BB0_2: // %for.cond4 +; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: mov.b32 %r2, %r3; +; CHECK-NEXT: $L__BB0_3: // %BS_LABEL_1 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: @%p1 bra $L__BB0_5; +; CHECK-NEXT: // %bb.4: // %BS_LABEL_1 +; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: mov.b32 %r3, %r1; +; CHECK-NEXT: $L_brx_0: .branchtargets +; CHECK-NEXT: $L__BB0_2, +; CHECK-NEXT: $L__BB0_3, +; CHECK-NEXT: $L__BB0_5, +; CHECK-NEXT: $L__BB0_5, +; CHECK-NEXT: $L__BB0_1, +; CHECK-NEXT: $L__BB0_5, +; CHECK-NEXT: $L__BB0_3; +; CHECK-NEXT: brx.idx %r1, $L_brx_0; +; CHECK-NEXT: $L__BB0_5: // %unreachable +; CHECK-NEXT: // begin inline asm +; CHECK-NEXT: exit; +; CHECK-NEXT: // end inline asm +entry: + br label %for.cond + +for.cond: ; preds = %for.cond4.for.cond_crit_edge, %BS_LABEL_1, %BS_LABEL_1, %entry + %p_2218_0.1 = phi i32 [ 0, %entry ], [ %p_2218_0.3, %BS_LABEL_1 ], [ %p_2218_0.3, %BS_LABEL_1 ], [ poison, %for.cond4.for.cond_crit_edge ] + br label %BS_LABEL_1 + +BS_LABEL_2: ; preds = %BS_LABEL_1 + %sub = or i32 %p_2218_0.3, 1 + br label %for.cond4 + +for.cond4: ; preds = %BS_LABEL_1, %BS_LABEL_2 + %p_2218_0.2 = phi i32 [ 0, %BS_LABEL_1 ], [ %sub, %BS_LABEL_2 ] + br i1 false, label %for.cond4.for.cond_crit_edge, label %BS_LABEL_1 + +for.cond4.for.cond_crit_edge: ; preds = %for.cond4 + br label %for.cond + +BS_LABEL_1: ; preds = %for.cond4, %for.cond + %p_2218_0.3 = phi i32 [ %p_2218_0.2, %for.cond4 ], [ %p_2218_0.1, %for.cond ] + switch i32 %cond, label %unreachable [ + i32 0, label %for.cond4 + i32 4, label %BS_LABEL_2 + i32 1, label %for.cond + i32 6, label %for.cond + ] + +unreachable: ; preds = %BS_LABEL_1 + unreachable +} + + From a3bd7e4aa5bf504b89c563662cfe18f0cfd92174 Mon Sep 17 00:00:00 2001 From: XChy Date: Mon, 1 Dec 2025 22:36:10 +0800 Subject: [PATCH 2/4] format --- llvm/lib/CodeGen/MachineBasicBlock.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 10bf18b7fcb6d..f3fa52150bee9 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1458,7 +1458,6 @@ bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ, << printMBBReference(*this) << '\n'); return false; } - return true; } From 23cc659e44ad84bcc0c91485e26a26a03b24a049 Mon Sep 17 00:00:00 2001 From: XChy Date: Tue, 2 Dec 2025 13:05:23 +0800 Subject: [PATCH 3/4] add mir test --- .../test/CodeGen/NVPTX/switch-loop-header.mir | 182 ++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 llvm/test/CodeGen/NVPTX/switch-loop-header.mir diff --git a/llvm/test/CodeGen/NVPTX/switch-loop-header.mir b/llvm/test/CodeGen/NVPTX/switch-loop-header.mir new file mode 100644 index 0000000000000..4d86bb879f18f --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/switch-loop-header.mir @@ -0,0 +1,182 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -o - %s -passes="require,require,phi-node-elimination" | FileCheck %s + +--- | + target datalayout = "e-p6:32:32-i64:64-i128:128-i256:256-v16:16-v32:32-n16:32:64" + target triple = "nvptx64-unknown-nvidiacl" + + define void @func_26(i32 %BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.) { + entry: + br label %for.cond + + for.cond: ; preds = %BS_LABEL_1, %BS_LABEL_1, %entry + %p_2218_0.1 = phi i32 [ 0, %entry ], [ %p_2218_0.3, %BS_LABEL_1 ], [ %p_2218_0.3, %BS_LABEL_1 ] + br label %BS_LABEL_1 + + BS_LABEL_2: ; preds = %BS_LABEL_1 + %sub = or i32 %p_2218_0.3, 1 + br label %for.cond4 + + for.cond4: ; preds = %BS_LABEL_1, %BS_LABEL_2 + %p_2218_0.2 = phi i32 [ %BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.BS_COND_16.0., %BS_LABEL_1 ], [ %sub, %BS_LABEL_2 ] + br label %BS_LABEL_1 + + BS_LABEL_1: ; preds = %for.cond4, %for.cond + %p_2218_0.3 = phi i32 [ %p_2218_0.2, %for.cond4 ], [ %p_2218_0.1, %for.cond ] + switch i32 %BS_COND_16.0.BS_COND_16.0.BS_COND_16.0.BS_COND_16.0., label %unreachable [ + i32 0, label %for.cond4 + i32 4, label %BS_LABEL_2 + i32 1, label %for.cond + i32 6, label %for.cond + ] + + unreachable: ; preds = %BS_LABEL_1 + call void asm sideeffect "exit;", ""() + unreachable + } +... +--- +name: func_26 +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: false +isSSA: true +noVRegs: false +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHContTarget: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: b32, preferred-register: '', flags: [ ] } + - { id: 1, class: b32, preferred-register: '', flags: [ ] } + - { id: 2, class: b32, preferred-register: '', flags: [ ] } + - { id: 3, class: b32, preferred-register: '', flags: [ ] } + - { id: 4, class: b32, preferred-register: '', flags: [ ] } + - { id: 5, class: b32, preferred-register: '', flags: [ ] } + - { id: 6, class: b32, preferred-register: '', flags: [ ] } + - { id: 7, class: b1, preferred-register: '', flags: [ ] } + - { id: 8, class: b32, preferred-register: '', flags: [ ] } + - { id: 9, class: b1, preferred-register: '', flags: [ ] } + - { id: 10, class: b32, preferred-register: '', flags: [ ] } + - { id: 11, class: b1, preferred-register: '', flags: [ ] } +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +jumpTable: + kind: inline + entries: + - id: 0 + blocks: [ '%bb.3', '%bb.1', '%bb.6', '%bb.6', '%bb.2', '%bb.6', + '%bb.1' ] +body: | + ; CHECK-LABEL: name: func_26 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[DEF:%[0-9]+]]:b32 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:b1 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[DEF2:%[0-9]+]]:b32 = IMPLICIT_DEF + ; CHECK-NEXT: GOTO %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x00000000), %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBranch undef [[DEF1]], %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.3(0x3e000000), %bb.1(0x04000000), %bb.6(0x00000000), %bb.2(0x3e000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BRX_START 0 + ; CHECK-NEXT: BRX_ITEM %bb.3 + ; CHECK-NEXT: BRX_ITEM %bb.1 + ; CHECK-NEXT: BRX_ITEM %bb.6 + ; CHECK-NEXT: BRX_ITEM %bb.6 + ; CHECK-NEXT: BRX_ITEM %bb.2 + ; CHECK-NEXT: BRX_ITEM %bb.6 + ; CHECK-NEXT: BRX_END %bb.1, undef [[DEF]], 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + bb.0: + successors: %bb.1(0x80000000) + + %10:b32 = IMPLICIT_DEF + %11:b1 = IMPLICIT_DEF + + bb.1: + successors: %bb.4(0x80000000) + + %0:b32 = PHI undef %10, %bb.0, undef %0, %bb.5 + GOTO %bb.4 + + bb.2: + successors: %bb.3(0x80000000) + + bb.3: + successors: %bb.4(0x80000000) + + bb.4: + successors: %bb.6(0x00000000), %bb.5(0x80000000) + + CBranch undef %11, %bb.6 + + bb.5: + successors: %bb.3(0x3e000000), %bb.1(0x04000000), %bb.6(0x00000000), %bb.2(0x3e000000) + + BRX_START 0 + BRX_ITEM %bb.3 + BRX_ITEM %bb.1 + BRX_ITEM %bb.6 + BRX_ITEM %bb.6 + BRX_ITEM %bb.2 + BRX_ITEM %bb.6 + BRX_END %bb.1, undef %10, 0 + + bb.6: +... From 6fe4f70f89ac7023b849e6ebd9989daa67da880b Mon Sep 17 00:00:00 2001 From: XChy Date: Wed, 3 Dec 2025 23:30:50 +0800 Subject: [PATCH 4/4] add comment --- llvm/lib/CodeGen/MachineBasicBlock.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index f3fa52150bee9..be94e1e6d25b6 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1428,8 +1428,9 @@ bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ, if (!MLI) return false; const MachineLoop *L = MLI->getLoopFor(Succ); - // If `Succ` is a loop header, splitting the critical edge will not - // break structured CFG. + // Only if `Succ` is a loop header, splitting the critical edge will not + // break structured CFG. And fallthrough to check if this's terminator is + // analyzable. if (!L || L->getHeader() != Succ) return false; }