279 changes: 279 additions & 0 deletions llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
STATISTIC(NumFailedAlignmentCheck, "Number of load/store pair transformation "
"not passed the alignment check");
STATISTIC(NumConstOffsetFolded,
"Number of const offset of index address folded");

DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
"Controls which pairs are considered for renaming");
Expand All @@ -77,6 +79,11 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
cl::Hidden);

// The LdStConstLimit limits how far we search for const offset instructions
// when we form index address load/store instructions.
static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
cl::init(10), cl::Hidden);

// Enable register renaming to find additional store pairing opportunities.
static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
cl::init(true), cl::Hidden);
Expand Down Expand Up @@ -173,6 +180,13 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
int UnscaledOffset, unsigned Limit);

// Scan the instruction list to find a register assigned with a const
// value that can be combined with the current instruction (a load or store)
// using base addressing with writeback. Scan backwards.
MachineBasicBlock::iterator
findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
unsigned &Offset);

// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
// pre or post indexed addressing with writeback. Scan backwards.
Expand All @@ -184,11 +198,19 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
unsigned BaseReg, int Offset);

bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI,
unsigned IndexReg, unsigned &Offset);

// Merge a pre- or post-index base register update into a ld/st instruction.
MachineBasicBlock::iterator
mergeUpdateInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Update, bool IsPreIdx);

MachineBasicBlock::iterator
mergeConstOffsetInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Update, unsigned Offset,
int Scale);

// Find and merge zero store instructions.
bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);

Expand All @@ -201,6 +223,9 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Find and merge a base register updates before or after a ld/st instruction.
bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);

// Find and merge an index ldr/st instruction into a base ld/st instruction.
bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);

bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);

bool runOnMachineFunction(MachineFunction &Fn) override;
Expand Down Expand Up @@ -483,6 +508,42 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
}
}

static unsigned getBaseAddressOpcode(unsigned Opc) {
// TODO: Add more index address stores.
switch (Opc) {
default:
llvm_unreachable("Opcode has no base address equivalent!");
case AArch64::LDRBroX:
return AArch64::LDRBui;
case AArch64::LDRBBroX:
return AArch64::LDRBBui;
case AArch64::LDRSBXroX:
return AArch64::LDRSBXui;
case AArch64::LDRSBWroX:
return AArch64::LDRSBWui;
case AArch64::LDRHroX:
return AArch64::LDRHui;
case AArch64::LDRHHroX:
return AArch64::LDRHHui;
case AArch64::LDRSHXroX:
return AArch64::LDRSHXui;
case AArch64::LDRSHWroX:
return AArch64::LDRSHWui;
case AArch64::LDRWroX:
return AArch64::LDRWui;
case AArch64::LDRSroX:
return AArch64::LDRSui;
case AArch64::LDRSWroX:
return AArch64::LDRSWui;
case AArch64::LDRDroX:
return AArch64::LDRDui;
case AArch64::LDRXroX:
return AArch64::LDRXui;
case AArch64::LDRQroX:
return AArch64::LDRQui;
}
}

static unsigned getPostIndexedOpcode(unsigned Opc) {
switch (Opc) {
default:
Expand Down Expand Up @@ -724,6 +785,41 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {
}
}

// Make sure this is a reg+reg Ld/St
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
unsigned Opc = MI.getOpcode();
switch (Opc) {
default:
return false;
// Scaled instructions.
// TODO: Add more index address stores.
case AArch64::LDRBroX:
case AArch64::LDRBBroX:
case AArch64::LDRSBXroX:
case AArch64::LDRSBWroX:
Scale = 1;
return true;
case AArch64::LDRHroX:
case AArch64::LDRHHroX:
case AArch64::LDRSHXroX:
case AArch64::LDRSHWroX:
Scale = 2;
return true;
case AArch64::LDRWroX:
case AArch64::LDRSroX:
case AArch64::LDRSWroX:
Scale = 4;
return true;
case AArch64::LDRDroX:
case AArch64::LDRXroX:
Scale = 8;
return true;
case AArch64::LDRQroX:
Scale = 16;
return true;
}
}

static bool isRewritableImplicitDef(unsigned Opc) {
switch (Opc) {
default:
Expand Down Expand Up @@ -2053,6 +2149,63 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
return NextI;
}

MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Update,
unsigned Offset, int Scale) {
assert((Update->getOpcode() == AArch64::MOVKWi) &&
"Unexpected const mov instruction to merge!");
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E);
MachineInstr &MemMI = *I;
unsigned Mask = (1 << 12) * Scale - 1;
unsigned Low = Offset & Mask;
unsigned High = Offset - Low;
Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg();
MachineInstrBuilder AddMIB, MemMIB;

// Add IndexReg, BaseReg, High (the BaseReg may be SP)
AddMIB =
BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
.addDef(IndexReg)
.addUse(BaseReg)
.addImm(High >> 12) // shifted value
.addImm(12); // shift 12
(void)AddMIB;
// Ld/St DestReg, IndexReg, Imm12
unsigned NewOpc = getBaseAddressOpcode(I->getOpcode());
MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
.add(getLdStRegOp(MemMI))
.add(AArch64InstrInfo::getLdStOffsetOp(MemMI))
.addImm(Low / Scale)
.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
(void)MemMIB;

++NumConstOffsetFolded;
LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
LLVM_DEBUG(PrevI->print(dbgs()));
LLVM_DEBUG(dbgs() << " ");
LLVM_DEBUG(Update->print(dbgs()));
LLVM_DEBUG(dbgs() << " ");
LLVM_DEBUG(I->print(dbgs()));
LLVM_DEBUG(dbgs() << " with instruction:\n ");
LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs()));
LLVM_DEBUG(dbgs() << " ");
LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs()));
LLVM_DEBUG(dbgs() << "\n");

// Erase the old instructions for the block.
I->eraseFromParent();
PrevI->eraseFromParent();
Update->eraseFromParent();

return NextI;
}

bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
MachineInstr &MI,
unsigned BaseReg, int Offset) {
Expand Down Expand Up @@ -2100,6 +2253,34 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
return false;
}

bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
MachineInstr &MI,
unsigned IndexReg,
unsigned &Offset) {
// The update instruction source and destination register must be the
// same as the load/store index register.
if (MI.getOpcode() == AArch64::MOVKWi &&
TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {

// movz + movk hold a large offset of a Ld/St instruction.
MachineBasicBlock::iterator B = MI.getParent()->begin();
MachineBasicBlock::iterator MBBI = &MI;
// Skip the scene when the MI is the first instruction of a block.
if (MBBI == B)
return false;
MBBI = prev_nodbg(MBBI, B);
MachineInstr &MovzMI = *MBBI;
if (MovzMI.getOpcode() == AArch64::MOVZWi) {
unsigned Low = MovzMI.getOperand(1).getImm();
unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
Offset = High + Low;
// 12-bit optionally shifted immediates are legal for adds.
return Offset >> 24 == 0;
}
}
return false;
}

MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
Expand Down Expand Up @@ -2255,6 +2436,60 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
return E;
}

MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr &MemMI = *I;
MachineBasicBlock::iterator MBBI = I;

// If the load is the first instruction in the block, there's obviously
// not any matching load or store.
if (MBBI == B)
return E;

// Make sure the IndexReg is killed and the shift amount is zero.
// TODO: Relex this restriction to extend, simplify processing now.
if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() ||
!AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() ||
(AArch64InstrInfo::getLdStAmountOp(MemMI).getImm() != 0))
return E;

Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg();

// Track which register units have been modified and used between the first
// insn (inclusive) and the second insn.
ModifiedRegUnits.clear();
UsedRegUnits.clear();
unsigned Count = 0;
do {
MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;

// Don't count transient instructions towards the search limit since there
// may be different numbers of them if e.g. debug information is present.
if (!MI.isTransient())
++Count;

// If we found a match, return it.
if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
return MBBI;
}

// Update the status of what the instruction clobbered and used.
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);

// Otherwise, if the index register is used or modified, we have no match,
// so return early.
if (!ModifiedRegUnits.available(IndexReg) ||
!UsedRegUnits.available(IndexReg))
return E;

} while (MBBI != B && Count < Limit);
return E;
}

bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
Expand Down Expand Up @@ -2443,6 +2678,34 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
return false;
}

bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
int Scale) {
MachineInstr &MI = *MBBI;
MachineBasicBlock::iterator E = MI.getParent()->end();
MachineBasicBlock::iterator Update;

// Don't know how to handle unscaled pre/post-index versions below, so bail.
if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
return false;

// Look back to try to find a const offset for index LdSt instruction. For
// example,
// mov x8, #LargeImm ; = a * (1<<12) + imm12
// ldr x1, [x0, x8]
// merged into:
// add x8, x0, a * (1<<12)
// ldr x1, [x8, imm12]
unsigned Offset;
Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset);
if (Update != E && (Offset & (Scale - 1)) == 0) {
// Merge the imm12 into the ld/st.
MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
return true;
}

return false;
}

bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
bool EnableNarrowZeroStOpt) {

Expand Down Expand Up @@ -2521,6 +2784,22 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++MBBI;
}

// 5) Find a register assigned with a const value that can be combined with
// into the load or store. e.g.,
// mov x8, #LargeImm ; = a * (1<<12) + imm12
// ldr x1, [x0, x8]
// ; becomes
// add x8, x0, a * (1<<12)
// ldr x1, [x8, imm12]
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
MBBI != E;) {
int Scale;
if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale))
Modified = true;
else
++MBBI;
}

return Modified;
}

Expand Down
100 changes: 40 additions & 60 deletions llvm/test/CodeGen/AArch64/arm64-addrmode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,8 @@ define void @t17(i64 %a) {
define i8 @LdOffset_i8(ptr %a) {
; CHECK-LABEL: LdOffset_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrb w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
; CHECK-NEXT: ldrb w0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
Expand All @@ -227,9 +226,8 @@ define i8 @LdOffset_i8(ptr %a) {
define i32 @LdOffset_i8_zext32(ptr %a) {
; CHECK-LABEL: LdOffset_i8_zext32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrb w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
; CHECK-NEXT: ldrb w0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
Expand All @@ -241,9 +239,8 @@ define i32 @LdOffset_i8_zext32(ptr %a) {
define i32 @LdOffset_i8_sext32(ptr %a) {
; CHECK-LABEL: LdOffset_i8_sext32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrsb w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
; CHECK-NEXT: ldrsb w0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
Expand All @@ -255,9 +252,8 @@ define i32 @LdOffset_i8_sext32(ptr %a) {
define i64 @LdOffset_i8_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_zext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrb w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
; CHECK-NEXT: ldrb w0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
Expand All @@ -269,9 +265,8 @@ define i64 @LdOffset_i8_zext64(ptr %a) {
define i64 @LdOffset_i8_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_sext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrsb x0, [x0, x8]
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
; CHECK-NEXT: ldrsb x0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
Expand All @@ -283,9 +278,8 @@ define i64 @LdOffset_i8_sext64(ptr %a) {
define i16 @LdOffset_i16(ptr %a) {
; CHECK-LABEL: LdOffset_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrh w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -296,9 +290,8 @@ define i16 @LdOffset_i16(ptr %a) {
define i32 @LdOffset_i16_zext32(ptr %a) {
; CHECK-LABEL: LdOffset_i16_zext32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrh w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -310,9 +303,8 @@ define i32 @LdOffset_i16_zext32(ptr %a) {
define i32 @LdOffset_i16_sext32(ptr %a) {
; CHECK-LABEL: LdOffset_i16_sext32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrsh w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrsh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -324,9 +316,8 @@ define i32 @LdOffset_i16_sext32(ptr %a) {
define i64 @LdOffset_i16_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_zext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrh w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -338,9 +329,8 @@ define i64 @LdOffset_i16_zext64(ptr %a) {
define i64 @LdOffset_i16_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_sext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrsh x0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrsh x0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -352,9 +342,8 @@ define i64 @LdOffset_i16_sext64(ptr %a) {
define i32 @LdOffset_i32(ptr %a) {
; CHECK-LABEL: LdOffset_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldr w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
; CHECK-NEXT: ldr w0, [x8, #14816]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 4
Expand All @@ -365,9 +354,8 @@ define i32 @LdOffset_i32(ptr %a) {
define i64 @LdOffset_i32_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_zext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldr w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
; CHECK-NEXT: ldr w0, [x8, #14816]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 2
Expand All @@ -379,9 +367,8 @@ define i64 @LdOffset_i32_zext64(ptr %a) {
define i64 @LdOffset_i32_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_sext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldrsw x0, [x0, x8]
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
; CHECK-NEXT: ldrsw x0, [x8, #14816]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 2
Expand All @@ -393,9 +380,8 @@ define i64 @LdOffset_i32_sext64(ptr %a) {
define i64 @LdOffset_i64(ptr %a) {
; CHECK-LABEL: LdOffset_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
; CHECK-NEXT: movk w8, #126, lsl #16
; CHECK-NEXT: ldr x0, [x0, x8]
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
; CHECK-NEXT: ldr x0, [x8, #29632]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
%val = load i64, ptr %arrayidx, align 4
Expand All @@ -406,9 +392,8 @@ define i64 @LdOffset_i64(ptr %a) {
define <2 x i32> @LdOffset_v2i32(ptr %a) {
; CHECK-LABEL: LdOffset_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
; CHECK-NEXT: movk w8, #126, lsl #16
; CHECK-NEXT: ldr d0, [x0, x8]
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
; CHECK-NEXT: ldr d0, [x8, #29632]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds <2 x i32>, ptr %a, i64 1039992
%val = load <2 x i32>, ptr %arrayidx, align 4
Expand All @@ -419,9 +404,8 @@ define <2 x i32> @LdOffset_v2i32(ptr %a) {
define <2 x i64> @LdOffset_v2i64(ptr %a) {
; CHECK-LABEL: LdOffset_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #59264 // =0xe780
; CHECK-NEXT: movk w8, #253, lsl #16
; CHECK-NEXT: ldr q0, [x0, x8]
; CHECK-NEXT: add x8, x0, #4048, lsl #12 // =16580608
; CHECK-NEXT: ldr q0, [x8, #59264]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds <2 x i64>, ptr %a, i64 1039992
%val = load <2 x i64>, ptr %arrayidx, align 4
Expand All @@ -432,9 +416,8 @@ define <2 x i64> @LdOffset_v2i64(ptr %a) {
define double @LdOffset_i8_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrsb w8, [x0, x8]
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
; CHECK-NEXT: ldrsb w8, [x8, #3704]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
Expand All @@ -447,9 +430,8 @@ define double @LdOffset_i8_f64(ptr %a) {
define double @LdOffset_i16_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrsh w8, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrsh w8, [x8, #7408]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
Expand All @@ -462,9 +444,8 @@ define double @LdOffset_i16_f64(ptr %a) {
define double @LdOffset_i32_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldr s0, [x0, x8]
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
; CHECK-NEXT: ldr s0, [x8, #14816]
; CHECK-NEXT: ucvtf d0, d0
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
Expand All @@ -477,9 +458,8 @@ define double @LdOffset_i32_f64(ptr %a) {
define double @LdOffset_i64_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i64_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
; CHECK-NEXT: movk w8, #126, lsl #16
; CHECK-NEXT: ldr d0, [x0, x8]
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
; CHECK-NEXT: ldr d0, [x8, #29632]
; CHECK-NEXT: scvtf d0, d0
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
Expand Down
25 changes: 22 additions & 3 deletions llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@ body: |
; CHECK-LABEL: name: LdOffset
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = MOVZWi 56952, 0
; CHECK-NEXT: renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8
; CHECK-NEXT: renamable $w0 = LDRBBroX killed renamable $x0, killed renamable $x8, 0, 0
; CHECK-NEXT: $x8 = ADDXri $x0, 253, 12
; CHECK-NEXT: renamable $w0 = LDRBBui killed renamable $x8, 3704
; CHECK-NEXT: RET undef $lr, implicit $w0
renamable $w8 = MOVZWi 56952, 0
renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8
Expand Down Expand Up @@ -46,3 +45,23 @@ body: |
renamable $w0 = LDRBBroX killed renamable $x0, renamable $x8, 0, 0
RET undef $lr, implicit $w0
...

# Negative test: No MOVZWi used for the const offset
---
name: LdOffset_missing_MOVZ
tracksRegLiveness: true
liveins:
- { reg: '$x0', virtual-reg: '' }
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: LdOffset_missing_MOVZ
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8
; CHECK-NEXT: renamable $w0 = LDRBBroX killed renamable $x0, killed renamable $x8, 0, 0
; CHECK-NEXT: RET undef $lr, implicit $w0
renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8
renamable $w0 = LDRBBroX killed renamable $x0, killed renamable $x8, 0, 0
RET undef $lr, implicit $w0