-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Description
In 538.imagick_r on RISC-V with LTO, there's a very hot loop in MorphologyApply that has a double constant materialized in it, see the profile in https://cc-perf.igalia.com/db_default/v4/nts/profile/6/103/101. We should be hoisting it out of the loop in early-machinelicm but we fail to do so because it mistakenly thinks the register pressure is too high.
Here's a simplified version of the loop that demonstrates this:
define void @f(ptr %p) {
entry:
br label %loop
loop:
%iv = phi i64 [0, %entry], [%iv.next, %latch]
%gep = getelementptr double, ptr %p, i64 %iv
%x = load double, ptr %gep
%y0 = fmul double %x, %x
%y1 = fmul double %y0, %y0
%y2 = fmul double %y1, %y1
%y3 = fmul double %y2, %y2
%y4 = fmul double %y3, %y3
%y5 = fmul double %y4, %y4
%y6 = fmul double %y5, %y5
%y7 = fmul double %y6, %y6
%y8 = fmul double %y7, %y7
%y9 = fmul double %y8, %y8
%y10 = fmul double %y9, %y9
%y11 = fmul double %y10, %y10
%y12 = fmul double %y11, %y11
%y13 = fmul double %y12, %y12
%y14 = fmul double %y13, %y13
%y15 = fmul double %y14, %y14
%y16 = fmul double %y15, %y15
%y17 = fmul double %y16, %y16
%y18 = fmul double %y17, %y17
%y19 = fmul double %y18, %y18
%y20 = fmul double %y19, %y19
%y21 = fmul double %y20, %y20
%y22 = fmul double %y21, %y21
%y23 = fmul double %y22, %y22
%y24 = fmul double %y23, %y23
%y25 = fmul double %y24, %y24
%y26 = fmul double %y25, %y25
%y27 = fmul double %y26, %y26
%y28 = fmul double %y27, %y27
%y29 = fmul double %y28, %y28
%y30 = fmul double %y29, %y29
%y31 = fmul double %y30, %y30
%c = fcmp une double %y31, 0.0
br i1 %c, label %if, label %latch
if:
%z = fmul double %y31, 3.14159274101257324218750
store double %z, ptr %gep
br label %latch
latch:
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv.next, 1024
br i1 %ec, label %exit, label %loop
exit:
ret void
}When compiled with llc -mtriple riscv64 -mattr=+d:
******** Pre-regalloc Machine LICM: f ********
GPRAll: 0 -> 1
GPRAll: 1 -> 2
GPRAll: 2 -> 2
Entering %bb.1
LICM: Instruction not safe to move.
LICM: Instruction not a LICM candidate
GPRAll: 2 -> 3
LICM: Instruction not safe to move.
LICM: Instruction not a LICM candidate
FPR16: 0 -> 1
FPR16: 1 -> 2
FPR16: 2 -> 3
FPR16: 3 -> 4
FPR16: 4 -> 5
FPR16: 5 -> 6
FPR16: 6 -> 7
FPR16: 7 -> 8
FPR16: 8 -> 9
FPR16: 9 -> 10
FPR16: 10 -> 11
FPR16: 11 -> 12
FPR16: 12 -> 13
FPR16: 13 -> 14
FPR16: 14 -> 15
FPR16: 15 -> 16
FPR16: 16 -> 17
FPR16: 17 -> 18
FPR16: 18 -> 19
FPR16: 19 -> 20
FPR16: 20 -> 21
FPR16: 21 -> 22
FPR16: 22 -> 23
FPR16: 23 -> 24
FPR16: 24 -> 25
FPR16: 25 -> 26
FPR16: 26 -> 27
FPR16: 27 -> 28
FPR16: 28 -> 29
FPR16: 29 -> 30
FPR16: 30 -> 31
FPR16: 31 -> 32
FPR16: 32 -> 33
Hoist non-reg-pressure: %38:fpr64 = FMV_D_X $x0
Hoisting %38:fpr64 = FMV_D_X $x0
from %bb.1 to %bb.0
FPR16: 33 -> 34
GPRAll: 3 -> 4
FPR16: 34 -> 33
LICM: Instruction not safe to move.
LICM: Instruction not a LICM candidate
GPRAll: 4 -> 3
LICM: Instruction not safe to move.
LICM: Instruction not a LICM candidate
Entering %bb.2
Hoisting %40:gpr = LUI target-flags(riscv-hi) %const.0
from %bb.2 to %bb.0
GPRAll: 3 -> 4
Won't speculate: %41:fpr64 = FLD %40:gpr, target-flags(riscv-lo) %const.0 :: (load (s64) from constant-pool)
GPRAll: 4 -> 3
FPR16: 33 -> 34
FPR16: 34 -> 34
LICM: Instruction not safe to move.
LICM: Instruction not a LICM candidate
FPR16: 34 -> 33
Exiting %bb.2
Entering %bb.3
GPRAll: 3 -> 4
LICM: Instruction not safe to move.
LICM: Instruction not a LICM candidate
GPRAll: 4 -> 3
LICM: Instruction not safe to move.
LICM: Instruction not a LICM candidate
f: # @f
.cfi_startproc
# %bb.0: # %entry
lui a1, 2
fmv.d.x fa5, zero
add a1, a0, a1
lui a2, %hi(.LCPI0_0)
j .LBB0_2
.LBB0_1: # %latch
# in Loop: Header=BB0_2 Depth=1
addi a0, a0, 8
beq a0, a1, .LBB0_4
.LBB0_2: # %loop
# =>This Inner Loop Header: Depth=1
fld fa4, 0(a0)
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
fmul.d fa4, fa4, fa4
feq.d a3, fa4, fa5
bnez a3, .LBB0_1
# %bb.3: # %if
# in Loop: Header=BB0_2 Depth=1
fld fa3, %lo(.LCPI0_0)(a2) # not hoisted
fmul.d fa4, fa4, fa3
fsd fa4, 0(a0)
j .LBB0_1
.LBB0_4: # %exit
ret
- In the chain of fmuls the live range of each def is very short, but MachineLICM only checks to see if the killed flag is set to determine when a live range ends. The killed flag is only set by SelectionDAG if there's only one use, but the defs all have multiple uses. This means the definitions are never considered killed and so
CanCauseHighRegPressurereturns true by the time it reaches %z. 538.imagick_r doesn't have this chain but the loop in question is deeply nested in a heavily inlined function where there are lots of defs with multiple uses, so it runs into the same register pressure overestimation. - MachineLICM will still hoist instructions in the face of high register pressure but only if they're not speculated. But the constant being materalized in 538.imagick_r is in a conditional block so it fails the "Won't speculate: " check.
There's multiple ways we could fix this. Two potential avenues to explore are using RegPressureTracker or LiveVariables to accurately determine when a def is killed.
This isn't a RISC-V specific issue but it was discussed at the LLVM developer's meeting RISC-V roundtable so tagging it here.