Skip to content

Commit

Permalink
[AArch64] Fold more load.x into load.i with large offset
Browse files Browse the repository at this point in the history
The list of load.x is refer to canFoldIntoAddrMode on D152828.
Also support LDRSroX missed in canFoldIntoAddrMode
  • Loading branch information
vfdff committed Dec 21, 2023
1 parent 32878c2 commit f568763
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 54 deletions.
13 changes: 13 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4094,7 +4094,20 @@ AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
case AArch64::LDRBroX:
case AArch64::LDRBBroX:
case AArch64::LDRSBXroX:
case AArch64::LDRSBWroX:
case AArch64::LDRHroX:
case AArch64::LDRHHroX:
case AArch64::LDRSHXroX:
case AArch64::LDRSHWroX:
case AArch64::LDRWroX:
case AArch64::LDRSroX:
case AArch64::LDRSWroX:
case AArch64::LDRDroX:
case AArch64::LDRXroX:
case AArch64::LDRQroX:
return MI.getOperand(4);
}
}
Expand Down
53 changes: 50 additions & 3 deletions llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {

// Scan the instruction list to find a register assigned with a const
// value that can be combined with the current instruction (a load or store)
// using base addressing with writeback. Scan forwards.
// using base addressing with writeback. Scan backwards.
MachineBasicBlock::iterator
findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
unsigned &Offset);
Expand Down Expand Up @@ -221,7 +221,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Find and merge a base register updates before or after a ld/st instruction.
bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);

// Find and merge a index ldr/st instructions into a base ld/st instruction.
// Find and merge a index ldr/st instruction into a base ld/st instruction.
bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);

bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
Expand Down Expand Up @@ -511,8 +511,34 @@ static unsigned getBaseAddressOpcode(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no base address equivalent!");
case AArch64::LDRBroX:
return AArch64::LDRBui;
case AArch64::LDRBBroX:
return AArch64::LDRBBui;
case AArch64::LDRSBXroX:
return AArch64::LDRSBXui;
case AArch64::LDRSBWroX:
return AArch64::LDRSBWui;
case AArch64::LDRHroX:
return AArch64::LDRHui;
case AArch64::LDRHHroX:
return AArch64::LDRHHui;
case AArch64::LDRSHXroX:
return AArch64::LDRSHXui;
case AArch64::LDRSHWroX:
return AArch64::LDRSHWui;
case AArch64::LDRWroX:
return AArch64::LDRWui;
case AArch64::LDRSroX:
return AArch64::LDRSui;
case AArch64::LDRSWroX:
return AArch64::LDRSWui;
case AArch64::LDRDroX:
return AArch64::LDRDui;
case AArch64::LDRXroX:
return AArch64::LDRXui;
case AArch64::LDRQroX:
return AArch64::LDRQui;
}
}

Expand Down Expand Up @@ -764,10 +790,31 @@ static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
default:
return false;
// Scaled instructions.
// TODO: Add more index address loads/stores.
// TODO: Add more index address stores.
case AArch64::LDRBroX:
case AArch64::LDRBBroX:
case AArch64::LDRSBXroX:
case AArch64::LDRSBWroX:
Scale = 1;
return true;
case AArch64::LDRHroX:
case AArch64::LDRHHroX:
case AArch64::LDRSHXroX:
case AArch64::LDRSHWroX:
Scale = 2;
return true;
case AArch64::LDRWroX:
case AArch64::LDRSroX:
case AArch64::LDRSWroX:
Scale = 4;
return true;
case AArch64::LDRDroX:
case AArch64::LDRXroX:
Scale = 8;
return true;
case AArch64::LDRQroX:
Scale = 16;
return true;
}
}

Expand Down
85 changes: 34 additions & 51 deletions llvm/test/CodeGen/AArch64/arm64-addrmode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,8 @@ define i32 @LdOffset_i8_zext32(ptr %a) {
define i32 @LdOffset_i8_sext32(ptr %a) {
; CHECK-LABEL: LdOffset_i8_sext32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrsb w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
; CHECK-NEXT: ldrsb w0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
Expand All @@ -266,9 +265,8 @@ define i64 @LdOffset_i8_zext64(ptr %a) {
define i64 @LdOffset_i8_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_sext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrsb x0, [x0, x8]
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
; CHECK-NEXT: ldrsb x0, [x8, #3704]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
%val = load i8, ptr %arrayidx, align 1
Expand All @@ -280,9 +278,8 @@ define i64 @LdOffset_i8_sext64(ptr %a) {
define i16 @LdOffset_i16(ptr %a) {
; CHECK-LABEL: LdOffset_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrh w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -293,9 +290,8 @@ define i16 @LdOffset_i16(ptr %a) {
define i32 @LdOffset_i16_zext32(ptr %a) {
; CHECK-LABEL: LdOffset_i16_zext32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrh w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -307,9 +303,8 @@ define i32 @LdOffset_i16_zext32(ptr %a) {
define i32 @LdOffset_i16_sext32(ptr %a) {
; CHECK-LABEL: LdOffset_i16_sext32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrsh w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrsh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -321,9 +316,8 @@ define i32 @LdOffset_i16_sext32(ptr %a) {
define i64 @LdOffset_i16_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_zext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrh w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrh w0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -335,9 +329,8 @@ define i64 @LdOffset_i16_zext64(ptr %a) {
define i64 @LdOffset_i16_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_sext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrsh x0, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrsh x0, [x8, #7408]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
%val = load i16, ptr %arrayidx, align 2
Expand All @@ -349,9 +342,8 @@ define i64 @LdOffset_i16_sext64(ptr %a) {
define i32 @LdOffset_i32(ptr %a) {
; CHECK-LABEL: LdOffset_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldr w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
; CHECK-NEXT: ldr w0, [x8, #14816]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 4
Expand All @@ -362,9 +354,8 @@ define i32 @LdOffset_i32(ptr %a) {
define i64 @LdOffset_i32_zext64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_zext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldr w0, [x0, x8]
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
; CHECK-NEXT: ldr w0, [x8, #14816]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 2
Expand All @@ -376,9 +367,8 @@ define i64 @LdOffset_i32_zext64(ptr %a) {
define i64 @LdOffset_i32_sext64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_sext64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldrsw x0, [x0, x8]
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
; CHECK-NEXT: ldrsw x0, [x8, #14816]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
%val = load i32, ptr %arrayidx, align 2
Expand All @@ -390,9 +380,8 @@ define i64 @LdOffset_i32_sext64(ptr %a) {
define i64 @LdOffset_i64(ptr %a) {
; CHECK-LABEL: LdOffset_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
; CHECK-NEXT: movk w8, #126, lsl #16
; CHECK-NEXT: ldr x0, [x0, x8]
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
; CHECK-NEXT: ldr x0, [x8, #29632]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
%val = load i64, ptr %arrayidx, align 4
Expand All @@ -403,9 +392,8 @@ define i64 @LdOffset_i64(ptr %a) {
define <2 x i32> @LdOffset_v2i32(ptr %a) {
; CHECK-LABEL: LdOffset_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
; CHECK-NEXT: movk w8, #126, lsl #16
; CHECK-NEXT: ldr d0, [x0, x8]
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
; CHECK-NEXT: ldr d0, [x8, #29632]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds <2 x i32>, ptr %a, i64 1039992
%val = load <2 x i32>, ptr %arrayidx, align 4
Expand All @@ -416,9 +404,8 @@ define <2 x i32> @LdOffset_v2i32(ptr %a) {
define <2 x i64> @LdOffset_v2i64(ptr %a) {
; CHECK-LABEL: LdOffset_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #59264 // =0xe780
; CHECK-NEXT: movk w8, #253, lsl #16
; CHECK-NEXT: ldr q0, [x0, x8]
; CHECK-NEXT: add x8, x0, #4048, lsl #12 // =16580608
; CHECK-NEXT: ldr q0, [x8, #59264]
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds <2 x i64>, ptr %a, i64 1039992
%val = load <2 x i64>, ptr %arrayidx, align 4
Expand All @@ -429,9 +416,8 @@ define <2 x i64> @LdOffset_v2i64(ptr %a) {
define double @LdOffset_i8_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i8_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #56952 // =0xde78
; CHECK-NEXT: movk w8, #15, lsl #16
; CHECK-NEXT: ldrsb w8, [x0, x8]
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
; CHECK-NEXT: ldrsb w8, [x8, #3704]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
Expand All @@ -444,9 +430,8 @@ define double @LdOffset_i8_f64(ptr %a) {
define double @LdOffset_i16_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i16_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #48368 // =0xbcf0
; CHECK-NEXT: movk w8, #31, lsl #16
; CHECK-NEXT: ldrsh w8, [x0, x8]
; CHECK-NEXT: add x8, x0, #506, lsl #12 // =2072576
; CHECK-NEXT: ldrsh w8, [x8, #7408]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
Expand All @@ -459,9 +444,8 @@ define double @LdOffset_i16_f64(ptr %a) {
define double @LdOffset_i32_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i32_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #31200 // =0x79e0
; CHECK-NEXT: movk w8, #63, lsl #16
; CHECK-NEXT: ldr s0, [x0, x8]
; CHECK-NEXT: add x8, x0, #1012, lsl #12 // =4145152
; CHECK-NEXT: ldr s0, [x8, #14816]
; CHECK-NEXT: ucvtf d0, d0
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
Expand All @@ -474,9 +458,8 @@ define double @LdOffset_i32_f64(ptr %a) {
define double @LdOffset_i64_f64(ptr %a) {
; CHECK-LABEL: LdOffset_i64_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #62400 // =0xf3c0
; CHECK-NEXT: movk w8, #126, lsl #16
; CHECK-NEXT: ldr d0, [x0, x8]
; CHECK-NEXT: add x8, x0, #2024, lsl #12 // =8290304
; CHECK-NEXT: ldr d0, [x8, #29632]
; CHECK-NEXT: scvtf d0, d0
; CHECK-NEXT: ret
%arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
Expand Down

1 comment on commit f568763

@vitalybuka
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reverted and reported issues #76202

Please sign in to comment.