Skip to content

Commit

Permalink
[AArch64] Optimize floating point materialization
Browse files Browse the repository at this point in the history
This patch follows some ideas from r352866 to optimize the floating
point materialization even further. It changes isFPImmLegal to
considere up to 2 mov instruction or up to 5 in case subtarget has
fused literals.

The rationale is the cost is the same for mov+fmov vs. adrp+ldr; but
the mov+fmov sequence is always better because of the reduced d-cache
pressure. The timings are still the same if you consider movw+movk+fmov
vs. adrp+ldr will be fused (although one instruction longer).

Reviewers: efriedma

Differential Revision: https://reviews.llvm.org/D58460

llvm-svn: 356390
  • Loading branch information
zatrazz committed Mar 18, 2019
1 parent 664c1ef commit a3cefa5
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 21 deletions.
16 changes: 13 additions & 3 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//

#include "AArch64ExpandImm.h"
#include "AArch64ISelLowering.h"
#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
Expand Down Expand Up @@ -5424,9 +5425,18 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
// If we can not materialize in immediate field for fmov, check if the
// value can be encoded as the immediate operand of a logical instruction.
// The immediate value will be created with either MOVZ, MOVN, or ORR.
if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32))
IsLegal = AArch64_AM::isAnyMOVWMovAlias(ImmInt.getZExtValue(),
VT.getSizeInBits());
if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
// The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
// however the mov+fmov sequence is always better because of the reduced
// cache pressure. The timings are still the same if you consider
// movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
// movw+movk is fused). So we limit up to 2 instrdduction at most.
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
Insn);
unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
IsLegal = Insn.size() <= Limit;
}

LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
<< " imm value: "; Imm.dump(););
Expand Down
40 changes: 40 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
@@ -0,0 +1,40 @@
; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s

; CHECK: literal8
; CHECK: .quad 4614256656552045848
define double @foo() optsize {
; CHECK: _foo:
; CHECK: adrp x[[REG:[0-9]+]], lCPI0_0@PAGE
; CHECK: ldr d0, [x[[REG]], lCPI0_0@PAGEOFF]
; CHECK-NEXT: ret
ret double 0x400921FB54442D18
}

; CHECK: literal8
; CHECK: .quad 137438953409
define double @foo2() optsize {
; CHECK: _foo2:
; CHECK: adrp x[[REG:[0-9]+]], lCPI1_0@PAGE
; CHECK: ldr d0, [x[[REG]], lCPI1_0@PAGEOFF]
; CHECK-NEXT: ret
ret double 0x1FFFFFFFC1
}

define float @bar() optsize {
; CHECK: _bar:
; CHECK: adrp x[[REG:[0-9]+]], lCPI2_0@PAGE
; CHECK: ldr s0, [x[[REG]], lCPI2_0@PAGEOFF]
; CHECK-NEXT: ret
ret float 0x400921FB60000000
}

; CHECK: literal16
; CHECK: .quad 0
; CHECK: .quad 0
define fp128 @baz() optsize {
; CHECK: _baz:
; CHECK: adrp x[[REG:[0-9]+]], lCPI3_0@PAGE
; CHECK: ldr q0, [x[[REG]], lCPI3_0@PAGEOFF]
; CHECK-NEXT: ret
ret fp128 0xL00000000000000000000000000000000
}
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
Expand Up @@ -10,12 +10,11 @@ define double @foo() {
ret double 0x400921FB54442D18
}

; CHECK: literal4
; CHECK: .long 1078530011
define float @bar() {
; CHECK: _bar:
; CHECK: adrp x[[REG:[0-9]+]], lCPI1_0@PAGE
; CHECK: ldr s0, [x[[REG]], lCPI1_0@PAGEOFF]
; CHECK: mov [[REG:w[0-9]+]], #4059
; CHECK: movk [[REG]], #16457, lsl #16
; CHECK: fmov s0, [[REG]]
; CHECK-NEXT: ret
ret float 0x400921FB60000000
}
Expand Down
12 changes: 10 additions & 2 deletions llvm/test/CodeGen/AArch64/fpimm.ll
Expand Up @@ -45,6 +45,13 @@ define void @check_double() {
; TINY-DAG: mov [[X128:x[0-9]+]], #4638707616191610880
; TINY-DAG: fmov {{d[0-9]+}}, [[X128]]

; 64-bit ORR followed by MOVK.
; CHECK-DAG: mov [[XFP0:x[0-9]+]], #1082331758844
; CHECK-DAG: movk [[XFP0]], #64764, lsl #16
; CHECk-DAG: fmov {{d[0-9]+}}, [[XFP0]]
%newval3 = fadd double %val, 0xFCFCFC00FC
store volatile double %newval3, double* @varf64

; CHECK: ret
; TINY: ret
ret void
Expand All @@ -54,8 +61,9 @@ define void @check_double() {
; LARGE: mov [[REG:w[0-9]+]], #4059
; LARGE-NEXT: movk [[REG]], #16457, lsl #16
; LARGE-NEXT: fmov s0, [[REG]]
; TINY-LABEL: check_float2
; TINY: ldr s0, .LCPI2_0
; TINY-LABEL: check_float2
; TINY: mov [[REG:w[0-9]+]], #4059
; TINY-NEXT: movk [[REG]], #16457, lsl #16
define float @check_float2() {
ret float 3.14159274101257324218750
}
Expand Down
11 changes: 7 additions & 4 deletions llvm/test/CodeGen/AArch64/literal_pools_float.ll
Expand Up @@ -31,16 +31,19 @@ define void @floating_lits() {

%doubleval = load double, double* @vardouble
%newdouble = fadd double %doubleval, 129.0
; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
; CHECK: ldr [[LIT129:d[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
; CHECK: mov [[W129:x[0-9]+]], #35184372088832
; CHECK: movk [[W129]], #16480, lsl #48
; CHECK: fmov {{d[0-9]+}}, [[W129]]
; CHECK-NOFP-NOT: fadd

; CHECK-TINY: ldr [[LIT129:d[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
; CHECK-TINY: mov [[W129:x[0-9]+]], #35184372088832
; CHECK-TINY: movk [[W129]], #16480, lsl #48
; CHECK-TINY: fmov {{d[0-9]+}}, [[W129]]
; CHECK-NOFP-TINY-NOT: ldr {{d[0-9]+}},
; CHECK-NOFP-TINY-NOT: fadd

; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g0_nc:[[CURLIT:.LCPI[0-9]+_[0-9]+]]
; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g0_nc:[[CURLIT:vardouble]]
; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
; CHECK-LARGE: movk x[[LITADDR]], #:abs_g3:[[CURLIT]]
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
Expand Up @@ -46,3 +46,18 @@ entry:
; CHECKDONT-NEXT: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #48
}

; Function Attrs: norecurse nounwind readnone
define double @litf() {
entry:
ret double 0x400921FB54442D18

; CHECK-LABEL: litf:
; CHECK-DONT: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
; CHECK-DONT-NEXT: ldr {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
; CHECK-FUSE: mov [[R:x[0-9]+]], #11544
; CHECK-FUSE: movk [[R]], #21572, lsl #16
; CHECK-FUSE: movk [[R]], #8699, lsl #32
; CHECK-FUSE: movk [[R]], #16393, lsl #48
; CHECK-FUSE: fmov {{d[0-9]+}}, [[R]]
}
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AArch64/win_cst_pool.ll
Expand Up @@ -2,22 +2,22 @@
; RUN: llc < %s -mtriple=aarch64-win32-gnu | FileCheck -check-prefix=MINGW %s

define double @double() {
ret double 0x0000000000800001
ret double 0x2000000000800001
}
; CHECK: .globl __real@0000000000800001
; CHECK-NEXT: .section .rdata,"dr",discard,__real@0000000000800001
; CHECK: .globl __real@2000000000800001
; CHECK-NEXT: .section .rdata,"dr",discard,__real@2000000000800001
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: __real@0000000000800001:
; CHECK-NEXT: .xword 8388609
; CHECK-NEXT: __real@2000000000800001:
; CHECK-NEXT: .xword 2305843009222082561
; CHECK: double:
; CHECK: adrp x8, __real@0000000000800001
; CHECK-NEXT: ldr d0, [x8, __real@0000000000800001]
; CHECK: adrp x8, __real@2000000000800001
; CHECK-NEXT: ldr d0, [x8, __real@2000000000800001]
; CHECK-NEXT: ret

; MINGW: .section .rdata,"dr"
; MINGW-NEXT: .p2align 3
; MINGW-NEXT: [[LABEL:\.LC.*]]:
; MINGW-NEXT: .xword 8388609
; MINGW-NEXT: .xword 2305843009222082561
; MINGW: double:
; MINGW: adrp x8, [[LABEL]]
; MINGW-NEXT: ldr d0, [x8, [[LABEL]]]
Expand Down

0 comments on commit a3cefa5

Please sign in to comment.