diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index fd3410586e172..e05ce2890a08c 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -342,6 +342,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); } + int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) { + return getTLI()->getPreferredLargeGEPBaseOffset(MinOffset, MaxOffset); + } + unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const { auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) { diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 77ee6b89ed8a3..3cd89c71f7164 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -30,8 +30,8 @@ #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/LowLevelTypeUtils.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -2721,6 +2721,12 @@ class TargetLoweringBase { Type *Ty, unsigned AddrSpace, Instruction *I = nullptr) const; + /// Return the prefered common base offset. + virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, + int64_t MaxOffset) const { + return 0; + } + /// Return true if the specified immediate is legal icmp immediate, that is /// the target has icmp instructions which can compare a register against the /// immediate without having to materialize the immediate into a register. diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 885d2d3ce2482..824371c9b9f91 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6121,6 +6121,55 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { int64_t BaseOffset = LargeOffsetGEPs.begin()->second; Value *NewBaseGEP = nullptr; + auto createNewBase = [&](int64_t BaseOffset, Value *OldBase, + GetElementPtrInst *GEP) { + LLVMContext &Ctx = GEP->getContext(); + Type *PtrIdxTy = DL->getIndexType(GEP->getType()); + Type *I8PtrTy = + PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace()); + Type *I8Ty = Type::getInt8Ty(Ctx); + + BasicBlock::iterator NewBaseInsertPt; + BasicBlock *NewBaseInsertBB; + if (auto *BaseI = dyn_cast(OldBase)) { + // If the base of the struct is an instruction, the new base will be + // inserted close to it. + NewBaseInsertBB = BaseI->getParent(); + if (isa(BaseI)) + NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); + else if (InvokeInst *Invoke = dyn_cast(BaseI)) { + NewBaseInsertBB = + SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI); + NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); + } else + NewBaseInsertPt = std::next(BaseI->getIterator()); + } else { + // If the current base is an argument or global value, the new base + // will be inserted to the entry block. + NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock(); + NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); + } + IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt); + // Create a new base. + Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset); + NewBaseGEP = OldBase; + if (NewBaseGEP->getType() != I8PtrTy) + NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy); + NewBaseGEP = + NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep"); + NewGEPBases.insert(NewBaseGEP); + return; + }; + + // Check whether all the offsets can be encoded with prefered common base. + if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset( + LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) { + BaseOffset = PreferBase; + // Create a new base if the offset of the BaseGEP can be decoded with one + // instruction. + createNewBase(BaseOffset, OldBase, BaseGEP); + } + auto *LargeOffsetGEP = LargeOffsetGEPs.begin(); while (LargeOffsetGEP != LargeOffsetGEPs.end()) { GetElementPtrInst *GEP = LargeOffsetGEP->first; @@ -6153,35 +6202,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() { if (!NewBaseGEP) { // Create a new base if we don't have one yet. Find the insertion // pointer for the new base first. - BasicBlock::iterator NewBaseInsertPt; - BasicBlock *NewBaseInsertBB; - if (auto *BaseI = dyn_cast(OldBase)) { - // If the base of the struct is an instruction, the new base will be - // inserted close to it. - NewBaseInsertBB = BaseI->getParent(); - if (isa(BaseI)) - NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); - else if (InvokeInst *Invoke = dyn_cast(BaseI)) { - NewBaseInsertBB = - SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI); - NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); - } else - NewBaseInsertPt = std::next(BaseI->getIterator()); - } else { - // If the current base is an argument or global value, the new base - // will be inserted to the entry block. - NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock(); - NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); - } - IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt); - // Create a new base. - Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset); - NewBaseGEP = OldBase; - if (NewBaseGEP->getType() != I8PtrTy) - NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy); - NewBaseGEP = - NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep"); - NewGEPBases.insert(NewBaseGEP); + createNewBase(BaseOffset, OldBase, GEP); } IRBuilder<> Builder(GEP); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b6a16217dfae3..f6e64c49ef05e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16070,6 +16070,20 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL, AM.Scale); } +// Check whether the 2 offsets belong to the same imm24 range, and their high +// 12bits are same, then their high part can be decoded with the offset of add. +int64_t +AArch64TargetLowering::getPreferredLargeGEPBaseOffset(int64_t MinOffset, + int64_t MaxOffset) const { + int64_t HighPart = MinOffset & ~0xfffULL; + if (MinOffset >> 12 == MaxOffset >> 12 && isLegalAddImmediate(HighPart)) { + // Rebase the value to an integer multiple of imm12. + return HighPart; + } + + return 0; +} + bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const { // Consider splitting large offset of struct or array. return true; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 3c8479e1f6e3c..6ddbcd41dcb76 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -699,6 +699,9 @@ class AArch64TargetLowering : public TargetLowering { unsigned AS, Instruction *I = nullptr) const override; + int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, + int64_t MaxOffset) const override; + /// Return true if an FMA operation is faster than a pair of fmul and fadd /// instructions. fmuladd intrinsics will be expanded to FMAs when this method /// returns true, otherwise fmuladd is expanded to fmul + fadd. diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll index 69c558d9d5599..3d4749a7b8e7d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll @@ -209,3 +209,89 @@ define void @t17(i64 %a) { %3 = load volatile i64, ptr %2, align 8 ret void } + +define i32 @LdOffset_i8(ptr %a) { +; CHECK-LABEL: LdOffset_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #56952 // =0xde78 +; CHECK-NEXT: movk w8, #15, lsl #16 +; CHECK-NEXT: ldrb w0, [x0, x8] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 + %val = load i8, ptr %arrayidx, align 1 + %conv = zext i8 %val to i32 + ret i32 %conv +} + +define i32 @LdOffset_i16(ptr %a) { +; CHECK-LABEL: LdOffset_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #48368 // =0xbcf0 +; CHECK-NEXT: movk w8, #31, lsl #16 +; CHECK-NEXT: ldrsh w0, [x0, x8] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992 + %val = load i16, ptr %arrayidx, align 2 + %conv = sext i16 %val to i32 + ret i32 %conv +} + +define i32 @LdOffset_i32(ptr %a) { +; CHECK-LABEL: LdOffset_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #31200 // =0x79e0 +; CHECK-NEXT: movk w8, #63, lsl #16 +; CHECK-NEXT: ldr w0, [x0, x8] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992 + %val = load i32, ptr %arrayidx, align 4 + ret i32 %val +} + +define i64 @LdOffset_i64_multi_offset(ptr %a) { +; CHECK-LABEL: LdOffset_i64_multi_offset: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #2031, lsl #12 // =8318976 +; CHECK-NEXT: ldr x9, [x8, #960] +; CHECK-NEXT: ldr x8, [x8, #3016] +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992 + %val0 = load i64, ptr %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds i64, ptr %a, i64 1040249 + %val1 = load i64, ptr %arrayidx1, align 8 + %add = add nsw i64 %val1, %val0 + ret i64 %add +} + +define i64 @LdOffset_i64_multi_offset_with_commmon_base(ptr %a) { +; CHECK-LABEL: LdOffset_i64_multi_offset_with_commmon_base: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #507, lsl #12 // =2076672 +; CHECK-NEXT: ldr x9, [x8, #26464] +; CHECK-NEXT: ldr x8, [x8, #26496] +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret + %b = getelementptr inbounds i16, ptr %a, i64 1038336 + %arrayidx = getelementptr inbounds i64, ptr %b, i64 3308 + %val0 = load i64, ptr %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 3312 + %val1 = load i64, ptr %arrayidx1, align 8 + %add = add nsw i64 %val1, %val0 + ret i64 %add +} + +; Negative test: the offset is odd +define i32 @LdOffset_i16_odd_offset(ptr nocapture noundef readonly %a) { +; CHECK-LABEL: LdOffset_i16_odd_offset: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #56953 // =0xde79 +; CHECK-NEXT: movk w8, #15, lsl #16 +; CHECK-NEXT: ldrsh w0, [x0, x8] +; CHECK-NEXT: ret + %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039993 + %val = load i16, ptr %arrayidx, align 2 + %conv = sext i16 %val to i32 + ret i32 %conv +} + diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll index 080b3dd75ee9a..097575ca86bcc 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll @@ -6,18 +6,17 @@ define void @test1(ptr %s, i32 %n) { ; CHECK-LABEL: test1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #40000 // =0x9c40 -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: add x9, x9, x10 -; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov w9, wzr +; CHECK-NEXT: add x8, x8, #9, lsl #12 // =36864 +; CHECK-NEXT: cmp w9, w1 ; CHECK-NEXT: b.ge .LBB0_2 ; CHECK-NEXT: .LBB0_1: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: str w8, [x9, #4] -; CHECK-NEXT: add w8, w8, #1 -; CHECK-NEXT: str w8, [x9] -; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: str w9, [x8, #3140] +; CHECK-NEXT: add w9, w9, #1 +; CHECK-NEXT: str w9, [x8, #3136] +; CHECK-NEXT: cmp w9, w1 ; CHECK-NEXT: b.lt .LBB0_1 ; CHECK-NEXT: .LBB0_2: // %while_end ; CHECK-NEXT: ret @@ -47,16 +46,15 @@ define void @test2(ptr %struct, i32 %n) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cbz x0, .LBB1_3 ; CHECK-NEXT: // %bb.1: // %while_cond.preheader -; CHECK-NEXT: mov w8, #40000 // =0x9c40 ; CHECK-NEXT: mov w9, wzr -; CHECK-NEXT: add x8, x0, x8 +; CHECK-NEXT: add x8, x0, #9, lsl #12 // =36864 ; CHECK-NEXT: cmp w9, w1 ; CHECK-NEXT: b.ge .LBB1_3 ; CHECK-NEXT: .LBB1_2: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: str w9, [x8, #4] +; CHECK-NEXT: str w9, [x8, #3140] ; CHECK-NEXT: add w9, w9, #1 -; CHECK-NEXT: str w9, [x8] +; CHECK-NEXT: str w9, [x8, #3136] ; CHECK-NEXT: cmp w9, w1 ; CHECK-NEXT: b.lt .LBB1_2 ; CHECK-NEXT: .LBB1_3: // %while_end @@ -89,16 +87,15 @@ define void @test3(ptr %s1, ptr %s2, i1 %cond, i32 %n) { ; CHECK-NEXT: csel x8, x1, x0, ne ; CHECK-NEXT: cbz x8, .LBB2_3 ; CHECK-NEXT: // %bb.1: // %while_cond.preheader -; CHECK-NEXT: mov w10, #40000 // =0x9c40 ; CHECK-NEXT: mov w9, wzr -; CHECK-NEXT: add x8, x8, x10 +; CHECK-NEXT: add x8, x8, #9, lsl #12 // =36864 ; CHECK-NEXT: cmp w9, w3 ; CHECK-NEXT: b.ge .LBB2_3 ; CHECK-NEXT: .LBB2_2: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: str w9, [x8, #4] +; CHECK-NEXT: str w9, [x8, #3140] ; CHECK-NEXT: add w9, w9, #1 -; CHECK-NEXT: str w9, [x8] +; CHECK-NEXT: str w9, [x8, #3136] ; CHECK-NEXT: cmp w9, w3 ; CHECK-NEXT: b.lt .LBB2_2 ; CHECK-NEXT: .LBB2_3: // %while_end @@ -141,17 +138,15 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler { ; CHECK-NEXT: .cfi_personality 156, DW.ref.__FrameHandler ; CHECK-NEXT: .cfi_lsda 28, .Lexception0 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: mov w19, w0 -; CHECK-NEXT: mov w21, wzr -; CHECK-NEXT: mov w20, #40000 // =0x9c40 +; CHECK-NEXT: mov w20, wzr ; CHECK-NEXT: .LBB3_1: // %while_cond ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: .Ltmp0: @@ -159,23 +154,22 @@ define void @test4(i32 %n) uwtable personality ptr @__FrameHandler { ; CHECK-NEXT: .Ltmp1: ; CHECK-NEXT: // %bb.2: // %while_cond_x.split ; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1 -; CHECK-NEXT: add x8, x0, x20 -; CHECK-NEXT: cmp w21, w19 -; CHECK-NEXT: str wzr, [x8] +; CHECK-NEXT: add x8, x0, #9, lsl #12 // =36864 +; CHECK-NEXT: cmp w20, w19 +; CHECK-NEXT: str wzr, [x8, #3136] ; CHECK-NEXT: b.ge .LBB3_4 ; CHECK-NEXT: // %bb.3: // %while_body ; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1 -; CHECK-NEXT: str w21, [x8, #4] -; CHECK-NEXT: add w21, w21, #1 -; CHECK-NEXT: str w21, [x8] +; CHECK-NEXT: str w20, [x8, #3140] +; CHECK-NEXT: add w20, w20, #1 +; CHECK-NEXT: str w20, [x8, #3136] ; CHECK-NEXT: b .LBB3_1 ; CHECK-NEXT: .LBB3_4: // %while_end ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 ; CHECK-NEXT: .cfi_restore w20 -; CHECK-NEXT: .cfi_restore w21 ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_5: // %cleanup @@ -223,14 +217,13 @@ define void @test5(ptr %s, i32 %n) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: mov w9, wzr ; CHECK-NEXT: add x8, x8, #19, lsl #12 // =77824 -; CHECK-NEXT: add x8, x8, #2176 ; CHECK-NEXT: cmp w9, w1 ; CHECK-NEXT: b.ge .LBB4_2 ; CHECK-NEXT: .LBB4_1: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: str w9, [x8, #4] +; CHECK-NEXT: str w9, [x8, #2180] ; CHECK-NEXT: add w9, w9, #1 -; CHECK-NEXT: str w9, [x8] +; CHECK-NEXT: str w9, [x8, #2176] ; CHECK-NEXT: cmp w9, w1 ; CHECK-NEXT: b.lt .LBB4_1 ; CHECK-NEXT: .LBB4_2: // %while_end