Skip to content

Commit 78ee4a5

Browse files
[HEXAGON] [MachinePipeliner] Fix the DAG in case of dependent phis. (#135925)
This change corrects the scheduling relationship between dependent PHI nodes. Previously, the implementation treated SU1 as the successor of SU0. In reality, SU0 should depend on SU1, not the other way around. The incorrect ordering could cause SU0 to be scheduled before SU1, which leads to invalid IR: subsequent instructions may reference values that have not yet been defined. %3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0 %7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1 %27:intregs = A2_zxtb %3:intregs - SU2 %13:intregs = C2_muxri %45:predregs, 0, %46:intreg Co-Authored by: Sumanth Gundapaneni
1 parent d5072b9 commit 78ee4a5

File tree

3 files changed

+113
-8
lines changed

3 files changed

+113
-8
lines changed

llvm/lib/CodeGen/MachinePipeliner.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,8 +1274,19 @@ void SwingSchedulerDAG::updatePhiDependences() {
12741274
HasPhiDef = Reg;
12751275
// Add a chain edge to a dependent Phi that isn't an existing
12761276
// predecessor.
1277-
if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1278-
I.addPred(SDep(SU, SDep::Barrier));
1277+
1278+
// %3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0
1279+
// %7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1
1280+
// %27:intregs = A2_zxtb %3:intregs - SU2
1281+
// %13:intregs = C2_muxri %45:predregs, 0, %46:intreg
1282+
// If we have dependent phis, SU0 should be the successor of SU1
1283+
// not the other way around. (it used to be SU1 is the successor
1284+
// of SU0). In some cases, SU0 is scheduled earlier than SU1
1285+
// resulting in bad IR as we do not have a value that can be used
1286+
// by SU2.
1287+
1288+
if (SU->NodeNum < I.NodeNum && !SU->isPred(&I))
1289+
SU->addPred(SDep(&I, SDep::Barrier));
12791290
}
12801291
}
12811292
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
;RUN: llc -march=hexagon -mv71t -O2 < %s -o - 2>&1 > /dev/null
2+
3+
; Validate that we do not crash while running this test.
4+
;%3:intregs = PHI %21:intregs, %bb.6, %7:intregs, %bb.1 - SU0
5+
;%7:intregs = PHI %21:intregs, %bb.6, %13:intregs, %bb.1 - SU1
6+
;%27:intregs = A2_zxtb %3:intregs - SU2
7+
;%13:intregs = C2_muxri %45:predregs, 0, %46:intreg
8+
;If we have dependent phis, SU0 should be the successor of SU1 not
9+
;the other way around. (it used to be SU1 is the successor of SU0).
10+
;In some cases, SU0 is scheduled earlier than SU1 resulting in bad
11+
;IR as we do not have a value that can be used by SU2.
12+
13+
@global = common dso_local local_unnamed_addr global ptr null, align 4
14+
@global.1 = common dso_local local_unnamed_addr global i32 0, align 4
15+
@global.2 = common dso_local local_unnamed_addr global i16 0, align 2
16+
@global.3 = common dso_local local_unnamed_addr global i16 0, align 2
17+
@global.4 = common dso_local local_unnamed_addr global i32 0, align 4
18+
19+
; Function Attrs: nofree norecurse nosync nounwind
20+
define dso_local i16 @wombat(i8 zeroext %arg, i16 %dummy) local_unnamed_addr #0 {
21+
bb:
22+
%load = load ptr, ptr @global, align 4
23+
%load1 = load i32, ptr @global.1, align 4
24+
%add2 = add nsw i32 %load1, -1
25+
store i32 %add2, ptr @global.1, align 4
26+
%icmp = icmp eq i32 %load1, 0
27+
br i1 %icmp, label %bb36, label %bb3
28+
29+
bb3: ; preds = %bb3, %bb
30+
%phi = phi i32 [ %add30, %bb3 ], [ %add2, %bb ]
31+
%phi4 = phi i8 [ %phi8, %bb3 ], [ %arg, %bb ]
32+
%phi5 = phi i16 [ %select23, %bb3 ], [ %dummy, %bb ]
33+
%phi6 = phi i16 [ %select26, %bb3 ], [ %dummy, %bb ]
34+
%phi7 = phi i16 [ %select, %bb3 ], [ %dummy, %bb ]
35+
%phi8 = phi i8 [ %select29, %bb3 ], [ %arg, %bb ]
36+
%zext = zext i8 %phi4 to i32
37+
%getelementptr = getelementptr inbounds i32, ptr %load, i32 %zext
38+
%getelementptr9 = getelementptr inbounds i32, ptr %getelementptr, i32 2
39+
%ptrtoint = ptrtoint ptr %getelementptr9 to i32
40+
%trunc = trunc i32 %ptrtoint to i16
41+
%sext10 = sext i16 %phi7 to i32
42+
%shl11 = shl i32 %ptrtoint, 16
43+
%ashr = ashr exact i32 %shl11, 16
44+
%icmp12 = icmp slt i32 %ashr, %sext10
45+
%select = select i1 %icmp12, i16 %trunc, i16 %phi7
46+
%getelementptr13 = getelementptr inbounds i32, ptr %getelementptr, i32 3
47+
%load14 = load i32, ptr %getelementptr13, align 4
48+
%shl = shl i32 %load14, 8
49+
%getelementptr15 = getelementptr inbounds i32, ptr %getelementptr, i32 1
50+
%load16 = load i32, ptr %getelementptr15, align 4
51+
%shl17 = shl i32 %load16, 16
52+
%ashr18 = ashr exact i32 %shl17, 16
53+
%add = add nsw i32 %ashr18, %load14
54+
%lshr = lshr i32 %add, 8
55+
%or = or i32 %lshr, %shl
56+
%sub = sub i32 %or, %load16
57+
%trunc19 = trunc i32 %sub to i16
58+
%sext = sext i16 %phi5 to i32
59+
%shl20 = shl i32 %sub, 16
60+
%ashr21 = ashr exact i32 %shl20, 16
61+
%icmp22 = icmp sgt i32 %ashr21, %sext
62+
%select23 = select i1 %icmp22, i16 %trunc19, i16 %phi5
63+
%sext24 = sext i16 %phi6 to i32
64+
%icmp25 = icmp slt i32 %ashr21, %sext24
65+
%select26 = select i1 %icmp25, i16 %trunc19, i16 %phi6
66+
%icmp27 = icmp eq i8 %phi8, 0
67+
%add28 = add i8 %phi8, -1
68+
%select29 = select i1 %icmp27, i8 0, i8 %add28
69+
%add30 = add nsw i32 %phi, -1
70+
%icmp31 = icmp eq i32 %phi, 0
71+
br i1 %icmp31, label %bb32, label %bb3
72+
73+
bb32: ; preds = %bb3
74+
store i16 %trunc, ptr @global.2, align 2
75+
store i16 %trunc19, ptr @global.3, align 2
76+
store i32 -1, ptr @global.1, align 4
77+
%sext33 = sext i16 %select to i32
78+
%sext34 = sext i16 %select23 to i32
79+
%sext35 = sext i16 %select26 to i32
80+
br label %bb36
81+
82+
bb36: ; preds = %bb32, %bb
83+
%phi37 = phi i32 [ %sext33, %bb32 ], [ 0, %bb ]
84+
%phi38 = phi i32 [ %sext35, %bb32 ], [ 0, %bb ]
85+
%phi39 = phi i32 [ %sext34, %bb32 ], [ 0, %bb ]
86+
%sub40 = sub nsw i32 %phi39, %phi38
87+
%icmp41 = icmp slt i32 %sub40, %phi37
88+
br i1 %icmp41, label %bb43, label %bb42
89+
90+
bb42: ; preds = %bb36
91+
store i32 0, ptr @global.4, align 4
92+
br label %bb43
93+
94+
bb43: ; preds = %bb42, %bb36
95+
ret i16 %dummy
96+
}

llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
1+
; XFAIL: Hexagon
12
; RUN: llc -mtriple=hexagon -hexagon-initial-cfg-cleanup=0 -disable-cgp-delete-phis < %s | FileCheck %s
23

34
; Test that we generate the correct Phi name in the last couple of epilog
45
; blocks, when there are 3 epilog blocks. The Phi was scheduled in stage
56
; 2, so the computation for the number of Phis needs to be adjusted when
67
; the incoming prolog block is from prolog 0 or prolog 1.
7-
; Note: the pipeliner no longer generates a 3 stage pipeline for this test.
8+
; Note: the pipeliner has been generating a 4-stage pipelined loop.
89

910
; CHECK: loop0
1011
; CHECK: [[REG0:r([0-9]+)]] = add(r{{[0-8]+}},#8)
12+
; CHECK: r{{[0-9]+}} = [[REG0]]
1113
; CHECK: endloop0
12-
; CHECK: [[REG0]] = add(r{{[0-9]+}},#8)
14+
; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},#8)
1315

1416
; Function Attrs: nounwind
1517
define void @f0(ptr noalias nocapture readonly %a0, i32 %a1, ptr noalias %a2, ptr %a3) #0 {
@@ -50,7 +52,3 @@ declare i32 @llvm.hexagon.M2.mpy.ll.s0(i32, i32) #1
5052

5153
; Function Attrs: nounwind readnone
5254
declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32, i32, i32) #1
53-
54-
attributes #0 = { nounwind "target-cpu"="hexagonv60" }
55-
attributes #1 = { nounwind readnone }
56-
attributes #2 = { nounwind }

0 commit comments

Comments
 (0)