Skip to content

Commit 5b094bf

Browse files
committed
[AArch64] Allow folding FMUL/FADD into FMA for FP16 types
isFMAFasterThanFMulAndFAdd should return true for FP16 types when HasFullFP16 is present, since we have the instructions to handle it for both SVE and NEON. (SVE patterns and tests will follow). Differential Revision: https://reviews.llvm.org/D96599
1 parent 1dfb06d commit 5b094bf

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11569,6 +11569,8 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
1156911569
return false;
1157011570

1157111571
switch (VT.getSimpleVT().SimpleTy) {
11572+
case MVT::f16:
11573+
return Subtarget->hasFullFP16();
1157211574
case MVT::f32:
1157311575
case MVT::f64:
1157411576
return true;

llvm/test/CodeGen/AArch64/f16-instructions.ll

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,26 @@ define half @test_fmul(half %a, half %b) #0 {
6262
ret half %r
6363
}
6464

65+
; CHECK-CVT-LABEL: test_fmadd:
66+
; CHECK-CVT-NEXT: fcvt s1, h1
67+
; CHECK-CVT-NEXT: fcvt s0, h0
68+
; CHECK-CVT-NEXT: fmul s0, s0, s1
69+
; CHECK-CVT-NEXT: fcvt h0, s0
70+
; CHECK-CVT-NEXT: fcvt s0, h0
71+
; CHECK-CVT-NEXT: fcvt s1, h2
72+
; CHECK-CVT-NEXT: fadd s0, s0, s1
73+
; CHECK-CVT-NEXT: fcvt h0, s0
74+
; CHECK-CVT-NEXT: ret
75+
76+
; CHECK-FP16-LABEL: test_fmadd:
77+
; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2
78+
; CHECK-FP16-NEXT: ret
79+
80+
define half @test_fmadd(half %a, half %b, half %c) #0 {
81+
%mul = fmul fast half %a, %b
82+
%r = fadd fast half %mul, %c
83+
ret half %r
84+
}
6585
; CHECK-CVT-LABEL: test_fdiv:
6686
; CHECK-CVT-NEXT: fcvt s1, h1
6787
; CHECK-CVT-NEXT: fcvt s0, h0
@@ -1305,8 +1325,7 @@ define half @test_round(half %a) #0 {
13051325
; CHECK-CVT-NEXT: ret
13061326

13071327
; CHECK-FP16-LABEL: test_fmuladd:
1308-
; CHECK-FP16-NEXT: fmul h0, h0, h1
1309-
; CHECK-FP16-NEXT: fadd h0, h0, h2
1328+
; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2
13101329
; CHECK-FP16-NEXT: ret
13111330

13121331
define half @test_fmuladd(half %a, half %b, half %c) #0 {

0 commit comments

Comments
 (0)