Skip to content

Conversation

@llvmbot
Copy link
Member

@llvmbot llvmbot commented Dec 4, 2025

Backport 78ee4a5

Requested by: @alexrp

…lvm#135925)

This change corrects the scheduling relationship between dependent PHI
nodes. Previously, the implementation treated SU1 as the successor of
SU0. In reality, SU0 should depend on SU1, not the other way around.
The incorrect ordering could cause SU0 to be scheduled before SU1, which
leads to invalid IR: subsequent instructions may reference values that
have not yet been defined.

%3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0 %7:intregs
= PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1 %27:intregs = A2_zxtb
%3:intregs - SU2
%13:intregs = C2_muxri %45:predregs, 0, %46:intreg

Co-Authored by: Sumanth Gundapaneni

(cherry picked from commit 78ee4a5)
@llvmbot
Copy link
Member Author

llvmbot commented Dec 4, 2025

@iajbar What do you think about merging this PR to the release branch?

@llvmbot
Copy link
Member Author

llvmbot commented Dec 4, 2025

@llvm/pr-subscribers-backend-hexagon

Author: None (llvmbot)

Changes

Backport 78ee4a5

Requested by: @alexrp


Full diff: https://github.com/llvm/llvm-project/pull/170749.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/MachinePipeliner.cpp (+13-2)
  • (added) llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll (+96)
  • (modified) llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll (+4-6)
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 0e7cb0c980d40..1035188344c33 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1217,8 +1217,19 @@ void SwingSchedulerDAG::updatePhiDependences() {
               HasPhiDef = Reg;
               // Add a chain edge to a dependent Phi that isn't an existing
               // predecessor.
-              if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
-                I.addPred(SDep(SU, SDep::Barrier));
+
+              // %3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0
+              // %7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1
+              // %27:intregs = A2_zxtb %3:intregs - SU2
+              // %13:intregs = C2_muxri %45:predregs, 0, %46:intreg
+              // If we have dependent phis, SU0 should be the successor of SU1
+              // not the other way around. (it used to be SU1 is the successor
+              // of SU0). In some cases, SU0 is scheduled earlier than SU1
+              // resulting in bad IR as we do not have a value that can be used
+              // by SU2.
+
+              if (SU->NodeNum < I.NodeNum && !SU->isPred(&I))
+                SU->addPred(SDep(&I, SDep::Barrier));
             }
           }
         }
diff --git a/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll b/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll
new file mode 100644
index 0000000000000..6d324029966d7
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-dependent-phis.ll
@@ -0,0 +1,96 @@
+;RUN: llc -march=hexagon -mv71t -O2 < %s -o - 2>&1 > /dev/null
+
+; Validate that we do not crash while running this test.
+;%3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0
+;%7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1
+;%27:intregs = A2_zxtb %3:intregs - SU2
+;%13:intregs = C2_muxri %45:predregs, 0, %46:intreg
+;If we have dependent phis, SU0 should be the successor of SU1 not
+;the other way around. (it used to be SU1 is the successor of SU0).
+;In some cases, SU0 is scheduled earlier than SU1 resulting in bad
+;IR as we do not have a value that can be used by SU2.
+
+@global = common dso_local local_unnamed_addr global ptr null, align 4
+@global.1 = common dso_local local_unnamed_addr global i32 0, align 4
+@global.2 = common dso_local local_unnamed_addr global i16 0, align 2
+@global.3 = common dso_local local_unnamed_addr global i16 0, align 2
+@global.4 = common dso_local local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: nofree norecurse nosync nounwind
+define dso_local i16 @wombat(i8 zeroext %arg, i16 %dummy) local_unnamed_addr #0 {
+bb:
+  %load = load ptr, ptr @global, align 4
+  %load1 = load i32, ptr @global.1, align 4
+  %add2 = add nsw i32 %load1, -1
+  store i32 %add2, ptr @global.1, align 4
+  %icmp = icmp eq i32 %load1, 0
+  br i1 %icmp, label %bb36, label %bb3
+
+bb3:                                              ; preds = %bb3, %bb
+  %phi = phi i32 [ %add30, %bb3 ], [ %add2, %bb ]
+  %phi4 = phi i8 [ %phi8, %bb3 ], [ %arg, %bb ]
+  %phi5 = phi i16 [ %select23, %bb3 ], [ %dummy, %bb ]
+  %phi6 = phi i16 [ %select26, %bb3 ], [ %dummy, %bb ]
+  %phi7 = phi i16 [ %select, %bb3 ], [ %dummy, %bb ]
+  %phi8 = phi i8 [ %select29, %bb3 ], [ %arg, %bb ]
+  %zext = zext i8 %phi4 to i32
+  %getelementptr = getelementptr inbounds i32, ptr %load, i32 %zext
+  %getelementptr9 = getelementptr inbounds i32, ptr %getelementptr, i32 2
+  %ptrtoint = ptrtoint ptr %getelementptr9 to i32
+  %trunc = trunc i32 %ptrtoint to i16
+  %sext10 = sext i16 %phi7 to i32
+  %shl11 = shl i32 %ptrtoint, 16
+  %ashr = ashr exact i32 %shl11, 16
+  %icmp12 = icmp slt i32 %ashr, %sext10
+  %select = select i1 %icmp12, i16 %trunc, i16 %phi7
+  %getelementptr13 = getelementptr inbounds i32, ptr %getelementptr, i32 3
+  %load14 = load i32, ptr %getelementptr13, align 4
+  %shl = shl i32 %load14, 8
+  %getelementptr15 = getelementptr inbounds i32, ptr %getelementptr, i32 1
+  %load16 = load i32, ptr %getelementptr15, align 4
+  %shl17 = shl i32 %load16, 16
+  %ashr18 = ashr exact i32 %shl17, 16
+  %add = add nsw i32 %ashr18, %load14
+  %lshr = lshr i32 %add, 8
+  %or = or i32 %lshr, %shl
+  %sub = sub i32 %or, %load16
+  %trunc19 = trunc i32 %sub to i16
+  %sext = sext i16 %phi5 to i32
+  %shl20 = shl i32 %sub, 16
+  %ashr21 = ashr exact i32 %shl20, 16
+  %icmp22 = icmp sgt i32 %ashr21, %sext
+  %select23 = select i1 %icmp22, i16 %trunc19, i16 %phi5
+  %sext24 = sext i16 %phi6 to i32
+  %icmp25 = icmp slt i32 %ashr21, %sext24
+  %select26 = select i1 %icmp25, i16 %trunc19, i16 %phi6
+  %icmp27 = icmp eq i8 %phi8, 0
+  %add28 = add i8 %phi8, -1
+  %select29 = select i1 %icmp27, i8 0, i8 %add28
+  %add30 = add nsw i32 %phi, -1
+  %icmp31 = icmp eq i32 %phi, 0
+  br i1 %icmp31, label %bb32, label %bb3
+
+bb32:                                             ; preds = %bb3
+  store i16 %trunc, ptr @global.2, align 2
+  store i16 %trunc19, ptr @global.3, align 2
+  store i32 -1, ptr @global.1, align 4
+  %sext33 = sext i16 %select to i32
+  %sext34 = sext i16 %select23 to i32
+  %sext35 = sext i16 %select26 to i32
+  br label %bb36
+
+bb36:                                             ; preds = %bb32, %bb
+  %phi37 = phi i32 [ %sext33, %bb32 ], [ 0, %bb ]
+  %phi38 = phi i32 [ %sext35, %bb32 ], [ 0, %bb ]
+  %phi39 = phi i32 [ %sext34, %bb32 ], [ 0, %bb ]
+  %sub40 = sub nsw i32 %phi39, %phi38
+  %icmp41 = icmp slt i32 %sub40, %phi37
+  br i1 %icmp41, label %bb43, label %bb42
+
+bb42:                                             ; preds = %bb36
+  store i32 0, ptr @global.4, align 4
+  br label %bb43
+
+bb43:                                             ; preds = %bb42, %bb36
+  ret i16 %dummy
+}
diff --git a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll
index 08e3e604b5b05..549428643ccaa 100644
--- a/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll
@@ -1,15 +1,17 @@
+; XFAIL: Hexagon
 ; RUN: llc -mtriple=hexagon -hexagon-initial-cfg-cleanup=0 -disable-cgp-delete-phis < %s | FileCheck %s
 
 ; Test that we generate the correct Phi name in the last couple of epilog
 ; blocks, when there are 3 epilog blocks. The Phi was scheduled in stage
 ; 2, so the computation for the number of Phis needs to be adjusted when
 ; the incoming prolog block is from prolog 0 or prolog 1.
-; Note: the pipeliner no longer generates a 3 stage pipeline for this test.
+; Note: the pipeliner has been generating a 4-stage pipelined loop.
 
 ; CHECK: loop0
 ; CHECK: [[REG0:r([0-9]+)]] = add(r{{[0-8]+}},#8)
+; CHECK: r{{[0-9]+}} = [[REG0]]
 ; CHECK: endloop0
-; CHECK: [[REG0]] = add(r{{[0-9]+}},#8)
+; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},#8)
 
 ; Function Attrs: nounwind
 define void @f0(ptr noalias nocapture readonly %a0, i32 %a1, ptr noalias %a2, ptr %a3) #0 {
@@ -50,7 +52,3 @@ declare i32 @llvm.hexagon.M2.mpy.ll.s0(i32, i32) #1
 
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32, i32, i32) #1
-
-attributes #0 = { nounwind "target-cpu"="hexagonv60" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }

Copy link
Contributor

@iajbar iajbar left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please merge this correctness fix. Thank you.

@androm3da
Copy link
Member

@iajbar @quic-asaravan could there be some other dependencies that need to be cherry-picked for this fix?

2025-12-04T21:46:58.3210476Z ******************** TEST 'LLVM :: CodeGen/Hexagon/phi-elim.ll' FAILED ********************
2025-12-04T21:46:58.3213147Z Exit Code: 134
2025-12-04T21:46:58.3213736Z 
2025-12-04T21:46:58.3214072Z Command Output (stderr):
2025-12-04T21:46:58.3215568Z --
2025-12-04T21:46:58.3217037Z /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/llc -mtriple=hexagon < /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/test/CodeGen/Hexagon/phi-elim.ll # RUN: at line 1
2025-12-04T21:46:58.3219195Z + /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/llc -mtriple=hexagon
2025-12-04T21:46:58.3221933Z llc: /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp:522: void llvm::ScheduleDAGTopologicalSort::InitDAGTopologicalSorting(): Assertion `Node2Index[SU.NodeNum] > Node2Index[PD.getSUnit()->NodeNum] && "Wrong topological sorting"' failed.
2025-12-04T21:46:58.3224463Z PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
2025-12-04T21:46:58.3225351Z Stack dump:
2025-12-04T21:46:58.3226149Z 0.	Program arguments: /home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/llc -mtriple=hexagon
2025-12-04T21:46:58.3227183Z 1.	Running pass 'Function Pass Manager' on module '<stdin>'.
2025-12-04T21:46:58.3227888Z 2.	Running pass 'Modulo Software Pipelining' on function '@f0'
2025-12-04T21:46:58.3229721Z  #0 0x0000578003f045c8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/Support/Unix/Signals.inc:834:13
2025-12-04T21:46:58.3231779Z  #1 0x0000578003f01cd5 llvm::sys::RunSignalHandlers() /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/Support/Signals.cpp:105:18
2025-12-04T21:46:58.3233408Z  #2 0x0000578003f05391 SignalHandler(int, siginfo_t*, void*) /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/Support/Unix/Signals.inc:426:38
2025-12-04T21:46:58.3234537Z  #3 0x00007b8e5cce1330 (/lib/x86_64-linux-gnu/libc.so.6+0x45330)
2025-12-04T21:46:58.3235243Z  #4 0x00007b8e5cd3ab2c pthread_kill (/lib/x86_64-linux-gnu/libc.so.6+0x9eb2c)
2025-12-04T21:46:58.3235996Z  #5 0x00007b8e5cce127e raise (/lib/x86_64-linux-gnu/libc.so.6+0x4527e)
2025-12-04T21:46:58.3236627Z  #6 0x00007b8e5ccc48ff abort (/lib/x86_64-linux-gnu/libc.so.6+0x288ff)
2025-12-04T21:46:58.3237500Z  #7 0x00007b8e5ccc481b (/lib/x86_64-linux-gnu/libc.so.6+0x2881b)
2025-12-04T21:46:58.3238107Z  #8 0x00007b8e5ccd7517 (/lib/x86_64-linux-gnu/libc.so.6+0x3b517)
2025-12-04T21:46:58.3239737Z  #9 0x00005780031ce0d6 llvm::ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/CodeGen/ScheduleDAG.cpp:0:0
2025-12-04T21:46:58.3241434Z #10 0x0000578002f8e5e1 llvm::SwingSchedulerDAG::schedule() /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp:710:3
2025-12-04T21:46:58.3243241Z #11 0x0000578002f8cf7f llvm::MachinePipeliner::swingModuloScheduler(llvm::MachineLoop&) /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp:635:7
2025-12-04T21:46:58.3249321Z #12 0x0000578002f8aff2 llvm::MachinePipeliner::scheduleLoop(llvm::MachineLoop&) /home/gha/actions-runner/_work/llvm-project/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp:434:15

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

Status: Needs Triage

Development

Successfully merging this pull request may close these issues.

4 participants