diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 82161b162ecdf..2abf2da6bc700 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -250,6 +250,10 @@ def FeatureFuseCryptoEOR : SubtargetFeature< "fuse-crypto-eor", "HasFuseCryptoEOR", "true", "CPU fuses AES/PMULL and EOR operations">; +def FeatureFuseAdrpAdd : SubtargetFeature< + "fuse-adrp-add", "HasFuseAdrpAdd", "true", + "CPU fuses adrp+add operations">; + def FeatureFuseLiterals : SubtargetFeature< "fuse-literals", "HasFuseLiterals", "true", "CPU fuses literal generation operations">; @@ -660,6 +664,7 @@ def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", FeatureFuseAES, FeatureBalanceFPOps, FeatureCustomCheapAsMoveHandling, + FeatureFuseAdrpAdd, FeatureFuseLiterals, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive]>; @@ -668,11 +673,13 @@ def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65", "Cortex-A65 ARM processors", [ FeatureFuseAES, FeatureFuseAddress, + FeatureFuseAdrpAdd, FeatureFuseLiterals]>; def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", "Cortex-A72 ARM processors", [ FeatureFuseAES, + FeatureFuseAdrpAdd, FeatureFuseLiterals]>; def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", @@ -813,6 +820,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", FeatureFuseArithmeticLogic, FeatureFuseCCSelect, FeatureFuseCryptoEOR, + FeatureFuseAdrpAdd, FeatureFuseLiterals, FeatureZCRegMove, FeatureZCZeroing]>; @@ -824,6 +832,7 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", FeatureFuseAddress, FeatureFuseAES, FeatureFuseCCSelect, + FeatureFuseAdrpAdd, FeatureFuseLiterals, FeatureLSLFast, FeaturePostRAScheduler, @@ -840,6 +849,7 @@ def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3", FeatureFuseAES, FeatureFuseArithmeticLogic, FeatureFuseCCSelect, + FeatureFuseAdrpAdd, FeatureFuseLiterals, FeatureLSLFast, FeaturePostRAScheduler, @@ -1062,7 +1072,7 @@ def ProcessorFeatures { // by default for users targeting generic AArch64. The extensions do not // affect code generated by the compiler and can be used only by explicitly // mentioning the new system register names in assembly. - list Generic = [FeatureFPARMv8, FeatureNEON, FeatureETE]; + list Generic = [FeatureFPARMv8, FeatureNEON, FeatureETE, FeatureFuseAdrpAdd]; } diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index e8217eaf6ed5c..c7657f37d16d9 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -157,16 +157,19 @@ static bool isCryptoEORPair(const MachineInstr *FirstMI, return false; } -/// Literal generation. -static bool isLiteralsPair(const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { +static bool isAdrpAddPair(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { // Assume the 1st instr to be a wildcard if it is unspecified. - - // PC relative address. if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::ADRP) && SecondMI.getOpcode() == AArch64::ADDXri) return true; + return false; +} +/// Literal generation. +static bool isLiteralsPair(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + // Assume the 1st instr to be a wildcard if it is unspecified. // 32 bit immediate. if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZWi) && (SecondMI.getOpcode() == AArch64::MOVKWi && @@ -397,6 +400,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, return true; if (ST.hasFuseCryptoEOR() && isCryptoEORPair(FirstMI, SecondMI)) return true; + if (ST.hasFuseAdrpAdd() && isAdrpAddPair(FirstMI, SecondMI)) + return true; if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI)) return true; if (ST.hasFuseAddress() && isAddressLdStPair(FirstMI, SecondMI)) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index ceb92582dbdb3..e919263f92794 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -204,8 +204,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { /// Return true if the CPU supports any kind of instruction fusion. bool hasFusion() const { return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || - hasFuseAES() || hasFuseArithmeticLogic() || - hasFuseCCSelect() || hasFuseLiterals(); + hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() || + hasFuseAdrpAdd() || hasFuseLiterals(); } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll index c146138e603e8..eb41722839d82 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-of-shifted-dbg-value-fallback.ll @@ -56,10 +56,9 @@ define void @baz(i8* %arg) !dbg !6 { ; CHECK-NEXT: lsl x8, x0, #4 ; CHECK-NEXT: .loc 1 0 0 is_stmt 0 // tmp.ll:0:0 ; CHECK-NEXT: adrp x9, global+202752 +; CHECK-NEXT: add x9, x9, :lo12:global+202752 ; CHECK-NEXT: .loc 1 4 1 // tmp.ll:4:1 ; CHECK-NEXT: and x8, x8, #0x1ff0 -; CHECK-NEXT: .loc 1 0 0 // tmp.ll:0:0 -; CHECK-NEXT: add x9, x9, :lo12:global+202752 ; CHECK-NEXT: .loc 1 5 1 is_stmt 1 // tmp.ll:5:1 ; CHECK-NEXT: str xzr, [x9, x8] ; CHECK-NEXT: .loc 1 6 1 // tmp.ll:6:1 diff --git a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll index 4b94de4c7a49b..2139d7043ab22 100644 --- a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll +++ b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll @@ -457,10 +457,10 @@ define void @caller_in_memory() { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: add x8, sp, #8 ; CHECK-NEXT: bl return_in_memory -; CHECK-NEXT: adrp x8, in_memory_store ; CHECK-NEXT: ldur q0, [sp, #24] -; CHECK-NEXT: ldur q1, [sp, #8] +; CHECK-NEXT: adrp x8, in_memory_store ; CHECK-NEXT: add x8, x8, :lo12:in_memory_store +; CHECK-NEXT: ldur q1, [sp, #8] ; CHECK-NEXT: ldur q2, [sp, #56] ; CHECK-NEXT: ldur q3, [sp, #40] ; CHECK-NEXT: ldr d4, [sp, #72] @@ -478,14 +478,14 @@ define void @caller_in_memory() { define void @callee_in_memory(%T_IN_MEMORY %a) { ; CHECK-LABEL: callee_in_memory: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, in_memory_store ; CHECK-NEXT: ldr d0, [sp, #64] -; CHECK-NEXT: ldp q1, q2, [sp, #32] +; CHECK-NEXT: adrp x8, in_memory_store ; CHECK-NEXT: add x8, x8, :lo12:in_memory_store -; CHECK-NEXT: str d0, [x8, #64] ; CHECK-NEXT: ldr q3, [sp, #16] -; CHECK-NEXT: stp q1, q2, [x8, #32] +; CHECK-NEXT: ldp q1, q2, [sp, #32] +; CHECK-NEXT: str d0, [x8, #64] ; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: stp q1, q2, [x8, #32] ; CHECK-NEXT: stp q0, q3, [x8] ; CHECK-NEXT: ret store %T_IN_MEMORY %a, %T_IN_MEMORY* @in_memory_store diff --git a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll index aa5435c4fd6b3..c996b07b080a3 100644 --- a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll +++ b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll @@ -71,8 +71,8 @@ define [2 x i64] @f4() { ; ; GISEL-LABEL: f4: ; GISEL: // %bb.0: -; GISEL-NEXT: adrp x9, x2+8 ; GISEL-NEXT: adrp x8, x2+8 +; GISEL-NEXT: adrp x9, x2+8 ; GISEL-NEXT: add x9, x9, :lo12:x2+8 ; GISEL-NEXT: ldr x0, [x8, :lo12:x2+8] ; GISEL-NEXT: ldr x1, [x9, #8] diff --git a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll index 62fc207b0de2a..17db35c529f0a 100644 --- a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll +++ b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll @@ -8,8 +8,8 @@ define void @test1() { ; CHECK-LABEL: test1: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x -; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x8, x8, :lo12:x +; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y ; CHECK-NEXT: ldp x8, x9, [x8] ; CHECK-NEXT: stp x8, x9, [x10] @@ -23,8 +23,8 @@ define void @test2() { ; CHECK-LABEL: test2: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x -; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x8, x8, :lo12:x +; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y ; CHECK-NEXT: ldp x8, x9, [x8, #504] ; CHECK-NEXT: stp x8, x9, [x10, #504] @@ -38,10 +38,10 @@ define void @test3() { ; CHECK-LABEL: test3: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x -; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x8, x8, :lo12:x -; CHECK-NEXT: add x10, x10, :lo12:y ; CHECK-NEXT: add x8, x8, #512 +; CHECK-NEXT: adrp x10, y +; CHECK-NEXT: add x10, x10, :lo12:y ; CHECK-NEXT: add x10, x10, #512 ; CHECK-NEXT: ldp x8, x9, [x8] ; CHECK-NEXT: stp x8, x9, [x10] @@ -55,8 +55,8 @@ define void @test4() { ; CHECK-LABEL: test4: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x -; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x8, x8, :lo12:x +; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y ; CHECK-NEXT: ldp x8, x9, [x8, #-512] ; CHECK-NEXT: stp x8, x9, [x10, #-512] @@ -70,10 +70,10 @@ define void @test5() { ; CHECK-LABEL: test5: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x -; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x8, x8, :lo12:x -; CHECK-NEXT: add x10, x10, :lo12:y ; CHECK-NEXT: sub x8, x8, #520 +; CHECK-NEXT: adrp x10, y +; CHECK-NEXT: add x10, x10, :lo12:y ; CHECK-NEXT: sub x10, x10, #520 ; CHECK-NEXT: ldp x8, x9, [x8] ; CHECK-NEXT: stp x8, x9, [x10] @@ -87,10 +87,10 @@ define void @test6() { ; CHECK-LABEL: test6: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x -; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x8, x8, :lo12:x -; CHECK-NEXT: add x10, x10, :lo12:y ; CHECK-NEXT: sub x8, x8, #520 +; CHECK-NEXT: adrp x10, y +; CHECK-NEXT: add x10, x10, :lo12:y ; CHECK-NEXT: sub x10, x10, #520 ; CHECK-NEXT: ldp x8, x9, [x8] ; CHECK-NEXT: stp x8, x9, [x10] @@ -104,10 +104,10 @@ define void @test7() { ; CHECK-LABEL: test7: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x -; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x8, x8, :lo12:x -; CHECK-NEXT: add x10, x10, :lo12:y ; CHECK-NEXT: add x8, x8, #503 +; CHECK-NEXT: adrp x10, y +; CHECK-NEXT: add x10, x10, :lo12:y ; CHECK-NEXT: add x10, x10, #503 ; CHECK-NEXT: ldp x8, x9, [x8] ; CHECK-NEXT: stp x8, x9, [x10] diff --git a/llvm/test/CodeGen/AArch64/jump-table-32.ll b/llvm/test/CodeGen/AArch64/jump-table-32.ll index 339a44fc95ac4..d8572e901af29 100644 --- a/llvm/test/CodeGen/AArch64/jump-table-32.ll +++ b/llvm/test/CodeGen/AArch64/jump-table-32.ll @@ -9,8 +9,9 @@ define i32 @test_jumptable(i32 %in) { i32 2, label %lbl3 i32 4, label %lbl4 ] -; CHECK: adrp [[JTPAGE:x[0-9]+]], LJTI0_0@PAGE + ; CHECK: mov w[[INDEX:[0-9]+]], w0 +; CHECK: adrp [[JTPAGE:x[0-9]+]], LJTI0_0@PAGE ; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], LJTI0_0@PAGEOFF ; CHECK: adr [[BASE_BLOCK:x[0-9]+]], LBB0_2 ; CHECK: ldrb w[[OFFSET:[0-9]+]], [x[[JT]], x[[INDEX]]] diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll b/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll index 834cf371ac259..2b03fa34453ee 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll @@ -55,8 +55,8 @@ entry: ; CHECK-LABEL: OUTLINED_FUNCTION_0: ; CHECK: .cfi_startproc ; CHECK: adrp x1, _ZTIi -; CHECK-NEXT: mov x2, xzr ; CHECK-NEXT: add x1, x1, :lo12:_ZTIi +; CHECK-NEXT: mov x2, xzr ; CHECK-NEXT: str w19, [x0] ; CHECK-NEXT: b __cxa_throw ; CHECK: .cfi_endproc diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll index d9837c92426d1..bc70e9fe7c9f4 100644 --- a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll @@ -1,5 +1,5 @@ -; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=-fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT -; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE +; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=-fuse-adrp-add,-fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT +; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-adrp-add,+fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll index 587663f261207..feb44beaed8db 100644 --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -23,11 +23,11 @@ define dso_local void @run_test() local_unnamed_addr #0 { ; CHECK-NEXT: .cfi_offset b14, -56 ; CHECK-NEXT: .cfi_offset b15, -64 ; CHECK-NEXT: movi v14.2d, #0000000000000000 -; CHECK-NEXT: adrp x10, B+48 -; CHECK-NEXT: adrp x11, A ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: mov x9, xzr +; CHECK-NEXT: adrp x10, B+48 ; CHECK-NEXT: add x10, x10, :lo12:B+48 +; CHECK-NEXT: adrp x11, A ; CHECK-NEXT: add x11, x11, :lo12:A ; CHECK-NEXT: // implicit-def: $q2 ; CHECK-NEXT: // implicit-def: $q3 diff --git a/llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll b/llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll index ba65b5f4d5f46..ecfbb67e8ec96 100644 --- a/llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll +++ b/llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll @@ -2,8 +2,8 @@ ; CHECK-LABEL: vls_sve_and_64xi8: ; CHECK-NEXT: adrp x[[ONE:[0-9]+]], .LCPI0_0 -; CHECK-NEXT: ptrue p0.b, vl64 ; CHECK-NEXT: add x[[TWO:[0-9]+]], x[[ONE]], :lo12:.LCPI0_0 +; CHECK-NEXT: ptrue p0.b, vl64 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x[[TWO]]] ; CHECK-NEXT: and z0.d, z0.d, z1.d diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll index 31d2404666247..5416f0c976e87 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -578,8 +578,8 @@ define @splat_nxv2f64_imm_out_of_range() { ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI55_0 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: add x8, x8, :lo12:.LCPI55_0 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %1 = insertelement undef, double 3.33, i32 0