Skip to content

Commit

Permalink
[AArch64][GlobalISel] Fold G_MUL into XRO load addressing mode when p…
Browse files Browse the repository at this point in the history
…ossible

If we have a G_MUL, and either the LHS or the RHS of that mul is the legal
shift value for a load addressing mode, we can fold it into the load.

This gives some code size savings on some SPEC tests. The best are around 2%
on 300.twolf and 3% on 254.gap.

Differential Revision: https://reviews.llvm.org/D65173

llvm-svn: 366954
  • Loading branch information
Jessica Paquette committed Jul 24, 2019
1 parent 5f4426e commit 6849911
Show file tree
Hide file tree
Showing 2 changed files with 216 additions and 9 deletions.
49 changes: 40 additions & 9 deletions llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
Expand Up @@ -4125,21 +4125,52 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
return None;

// Now try to match the G_SHL.
MachineInstr *Shl =
getOpcodeDef(TargetOpcode::G_SHL, Gep->getOperand(2).getReg(), MRI);
if (!Shl || !isWorthFoldingIntoExtendedReg(*Shl, MRI))
// Now, try to match an opcode which will match our specific offset.
// We want a G_SHL or a G_MUL.
MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
if (!OffsetInst)
return None;

// Now, try to find the specific G_CONSTANT.
auto ValAndVReg =
getConstantVRegValWithLookThrough(Shl->getOperand(2).getReg(), MRI);
if (!ValAndVReg)
unsigned OffsetOpc = OffsetInst->getOpcode();
if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
return None;

if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
return None;

// Now, try to find the specific G_CONSTANT. Start by assuming that the
// register we will offset is the LHS, and the register containing the
// constant is the RHS.
Register OffsetReg = OffsetInst->getOperand(1).getReg();
Register ConstantReg = OffsetInst->getOperand(2).getReg();
auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!ValAndVReg) {
// We didn't get a constant on the RHS. If the opcode is a shift, then
// we're done.
if (OffsetOpc == TargetOpcode::G_SHL)
return None;

// If we have a G_MUL, we can use either register. Try looking at the RHS.
std::swap(OffsetReg, ConstantReg);
ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!ValAndVReg)
return None;
}

// The value must fit into 3 bits, and must be positive. Make sure that is
// true.
int64_t ImmVal = ValAndVReg->Value;

// Since we're going to pull this into a shift, the constant value must be
// a power of 2. If we got a multiply, then we need to check this.
if (OffsetOpc == TargetOpcode::G_MUL) {
if (!isPowerOf2_32(ImmVal))
return None;

// Got a power of 2. So, the amount we'll shift is the log base-2 of that.
ImmVal = Log2_32(ImmVal);
}

if ((ImmVal & 0x7) != ImmVal)
return None;

Expand All @@ -4152,7 +4183,7 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
// offset. Signify that we are shifting by setting the shift flag to 1.
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
[=](MachineInstrBuilder &MIB) { MIB.add(Shl->getOperand(1)); },
[=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
[=](MachineInstrBuilder &MIB) { MIB.addImm(1); },
}};
Expand Down
176 changes: 176 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
Expand Up @@ -7,6 +7,12 @@
define void @more_than_one_use(i64* %addr) { ret void }
define void @ldrxrox_shl(i64* %addr) { ret void }
define void @ldrdrox_shl(i64* %addr) { ret void }
define void @ldrxrox_mul_rhs(i64* %addr) { ret void }
define void @ldrdrox_mul_rhs(i64* %addr) { ret void }
define void @ldrxrox_mul_lhs(i64* %addr) { ret void }
define void @ldrdrox_mul_lhs(i64* %addr) { ret void }
define void @mul_not_pow_2(i64* %addr) { ret void }
define void @mul_wrong_pow_2(i64* %addr) { ret void }
define void @more_than_one_use_shl_1(i64* %addr) { ret void }
define void @more_than_one_use_shl_2(i64* %addr) { ret void }
define void @more_than_one_use_shl_lsl_fast(i64* %addr) #1 { ret void }
Expand Down Expand Up @@ -152,6 +158,176 @@ body: |
$d2 = COPY %5(s64)
RET_ReallyLR implicit $d2
...
---
name: ldrxrox_mul_rhs
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
; CHECK-LABEL: name: ldrxrox_mul_rhs
; CHECK: liveins: $x0, $x1, $x2
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr)
; CHECK: $x2 = COPY [[LDRXroX]]
; CHECK: RET_ReallyLR implicit $x2
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 8
%2:gpr(s64) = G_MUL %0, %1(s64)
%3:gpr(p0) = COPY $x1
%4:gpr(p0) = G_GEP %3, %2
%5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr)
$x2 = COPY %5(s64)
RET_ReallyLR implicit $x2
...
---
name: ldrdrox_mul_rhs
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $d2
; CHECK-LABEL: name: ldrdrox_mul_rhs
; CHECK: liveins: $x0, $x1, $d2
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr)
; CHECK: $d2 = COPY [[LDRDroX]]
; CHECK: RET_ReallyLR implicit $d2
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 8
%2:gpr(s64) = G_MUL %0, %1(s64)
%3:gpr(p0) = COPY $x1
%4:gpr(p0) = G_GEP %3, %2
%5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr)
$d2 = COPY %5(s64)
RET_ReallyLR implicit $d2
...
---
name: ldrxrox_mul_lhs
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $x2
; CHECK-LABEL: name: ldrxrox_mul_lhs
; CHECK: liveins: $x0, $x1, $x2
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr)
; CHECK: $x2 = COPY [[LDRXroX]]
; CHECK: RET_ReallyLR implicit $x2
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 8
%2:gpr(s64) = G_MUL %1, %0(s64)
%3:gpr(p0) = COPY $x1
%4:gpr(p0) = G_GEP %3, %2
%5:gpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr)
$x2 = COPY %5(s64)
RET_ReallyLR implicit $x2
...
---
name: ldrdrox_mul_lhs
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
liveins: $x0, $x1, $d2
; CHECK-LABEL: name: ldrdrox_mul_lhs
; CHECK: liveins: $x0, $x1, $d2
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[COPY]], 0, 1 :: (load 8 from %ir.addr)
; CHECK: $d2 = COPY [[LDRDroX]]
; CHECK: RET_ReallyLR implicit $d2
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 8
%2:gpr(s64) = G_MUL %1, %0(s64)
%3:gpr(p0) = COPY $x1
%4:gpr(p0) = G_GEP %3, %2
%5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr)
$d2 = COPY %5(s64)
RET_ReallyLR implicit $d2
...
---
name: mul_not_pow_2
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
; Show that we don't get a shifted load from a mul when we don't have a
; power of 2. (The bit isn't set on the load.)
liveins: $x0, $x1, $d2
; CHECK-LABEL: name: mul_not_pow_2
; CHECK: liveins: $x0, $x1, $d2
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 7
; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[MOVi64imm]], [[COPY]], $xzr
; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load 8 from %ir.addr)
; CHECK: $d2 = COPY [[LDRDroX]]
; CHECK: RET_ReallyLR implicit $d2
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 7
%2:gpr(s64) = G_MUL %1, %0(s64)
%3:gpr(p0) = COPY $x1
%4:gpr(p0) = G_GEP %3, %2
%5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr)
$d2 = COPY %5(s64)
RET_ReallyLR implicit $d2
...
---
name: mul_wrong_pow_2
alignment: 2
legalized: true
regBankSelected: true
tracksRegLiveness: true
machineFunctionInfo: {}
body: |
bb.0:
; Show that we don't get a shifted load from a mul when we don't have
; the right power of 2. (The bit isn't set on the load.)
liveins: $x0, $x1, $d2
; CHECK-LABEL: name: mul_wrong_pow_2
; CHECK: liveins: $x0, $x1, $d2
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 16
; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[MOVi64imm]], [[COPY]], $xzr
; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY1]], [[MADDXrrr]], 0, 0 :: (load 8 from %ir.addr)
; CHECK: $d2 = COPY [[LDRDroX]]
; CHECK: RET_ReallyLR implicit $d2
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = G_CONSTANT i64 16
%2:gpr(s64) = G_MUL %1, %0(s64)
%3:gpr(p0) = COPY $x1
%4:gpr(p0) = G_GEP %3, %2
%5:fpr(s64) = G_LOAD %4(p0) :: (load 8 from %ir.addr)
$d2 = COPY %5(s64)
RET_ReallyLR implicit $d2
...
---
name: more_than_one_use_shl_1
Expand Down

0 comments on commit 6849911

Please sign in to comment.