Skip to content

Commit

Permalink
[AArch64][GlobalISel] Select arith extended add/sub in manual selecti…
Browse files Browse the repository at this point in the history
…on code

The manual selection code for add/sub was not checking if it was possible to
fold in shifts + extends (the *rx opcode variants).

As a result, we could never select things like

```
cmp x1, w0, uxtw #2
```

Because we don't import any patterns for compares.

This adds support for the arithmetic shifted register forms and updates tests
for instructions selected using `emitADD`, `emitADDS`, and `emitSUBS`.

This is a 0.1% geomean code size improvement on SPECINT2000 at -Os.

Differential Revision: https://reviews.llvm.org/D91207
  • Loading branch information
Jessica Paquette committed Nov 11, 2020
1 parent 20de182 commit c42053f
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 9 deletions.
28 changes: 19 additions & 9 deletions llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Expand Up @@ -188,7 +188,8 @@ class AArch64InstructionSelector : public InstructionSelector {
/// {{AArch64::ADDXri, AArch64::ADDWri},
/// {AArch64::ADDXrs, AArch64::ADDWrs},
/// {AArch64::ADDXrr, AArch64::ADDWrr},
/// {AArch64::SUBXri, AArch64::SUBWri}}};
/// {AArch64::SUBXri, AArch64::SUBWri},
/// {AArch64::ADDXrx, AArch64::ADDWrx}}};
/// \endcode
///
/// Each row in the table corresponds to a different addressing mode. Each
Expand All @@ -199,6 +200,7 @@ class AArch64InstructionSelector : public InstructionSelector {
/// - Row 1: The rs opcode variants
/// - Row 2: The rr opcode variants
/// - Row 3: The ri opcode variants for negative immediates
/// - Row 4: The rx opcode variants
///
/// \attention Columns must be structured as follows:
/// - Column 0: The 64-bit opcode variants
Expand All @@ -208,7 +210,7 @@ class AArch64InstructionSelector : public InstructionSelector {
/// \p LHS is the left-hand operand of the binop to emit.
/// \p RHS is the right-hand operand of the binop to emit.
MachineInstr *emitAddSub(
const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
Expand Down Expand Up @@ -3821,7 +3823,7 @@ MachineInstr *AArch64InstructionSelector::emitInstr(
}

MachineInstr *AArch64InstructionSelector::emitAddSub(
const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
Expand All @@ -3842,6 +3844,11 @@ MachineInstr *AArch64InstructionSelector::emitAddSub(
return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
MIRBuilder, Fns);

// INSTRrx form.
if (auto Fns = selectArithExtendedRegister(RHS))
return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
MIRBuilder, Fns);

// INSTRrs form.
if (auto Fns = selectShiftedRegister(RHS))
return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
Expand All @@ -3854,35 +3861,38 @@ MachineInstr *
AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
const std::array<std::array<unsigned, 2>, 4> OpcTable{
const std::array<std::array<unsigned, 2>, 5> OpcTable{
{{AArch64::ADDXri, AArch64::ADDWri},
{AArch64::ADDXrs, AArch64::ADDWrs},
{AArch64::ADDXrr, AArch64::ADDWrr},
{AArch64::SUBXri, AArch64::SUBWri}}};
{AArch64::SUBXri, AArch64::SUBWri},
{AArch64::ADDXrx, AArch64::ADDWrx}}};
return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
}

MachineInstr *
AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
const std::array<std::array<unsigned, 2>, 4> OpcTable{
const std::array<std::array<unsigned, 2>, 5> OpcTable{
{{AArch64::ADDSXri, AArch64::ADDSWri},
{AArch64::ADDSXrs, AArch64::ADDSWrs},
{AArch64::ADDSXrr, AArch64::ADDSWrr},
{AArch64::SUBSXri, AArch64::SUBSWri}}};
{AArch64::SUBSXri, AArch64::SUBSWri},
{AArch64::ADDSXrx, AArch64::ADDSWrx}}};
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
}

MachineInstr *
AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
const std::array<std::array<unsigned, 2>, 4> OpcTable{
const std::array<std::array<unsigned, 2>, 5> OpcTable{
{{AArch64::SUBSXri, AArch64::SUBSWri},
{AArch64::SUBSXrs, AArch64::SUBSWrs},
{AArch64::SUBSXrr, AArch64::SUBSWrr},
{AArch64::ADDSXri, AArch64::ADDSWri}}};
{AArch64::ADDSXri, AArch64::ADDSWri},
{AArch64::SUBSXrx, AArch64::SUBSWrx}}};
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
}

Expand Down
33 changes: 33 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir
Expand Up @@ -603,3 +603,36 @@ body: |
%cmp:gpr(s32) = G_ICMP intpred(ne), %reg0(s32), %sub
$w0 = COPY %cmp(s32)
RET_ReallyLR implicit $w0
...
---
name: cmn_arith_extended_shl
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $x0, $x1
; We should be able to fold away the extend + shift and select ADDSXrx.
; CHECK-LABEL: name: cmn_arith_extended_shl
; CHECK: liveins: $w0, $x0, $x1
; CHECK: %reg0:gpr64sp = COPY $x0
; CHECK: %reg1:gpr32 = COPY $w0
; CHECK: $xzr = ADDSXrx %reg0, %reg1, 50, implicit-def $nzcv
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
; CHECK: $w0 = COPY %cmp
; CHECK: RET_ReallyLR implicit $w0
%reg0:gpr(s64) = COPY $x0
%zero:gpr(s64) = G_CONSTANT i64 0
%sub:gpr(s64) = G_SUB %zero, %reg0
%reg1:gpr(s32) = COPY $w0
%ext:gpr(s64) = G_SEXT %reg1(s32)
%cst:gpr(s64) = G_CONSTANT i64 2
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
%cmp:gpr(s32) = G_ICMP intpred(ne), %sub(s64), %shift
$w0 = COPY %cmp(s32)
RET_ReallyLR implicit $w0
88 changes: 88 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir
Expand Up @@ -182,3 +182,91 @@ body: |
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
$w0 = COPY %cmp(s32)
RET_ReallyLR implicit $w0
...
---
name: cmp_arith_extended_s64
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $x1
; CHECK-LABEL: name: cmp_arith_extended_s64
; CHECK: liveins: $w0, $x1
; CHECK: %reg0:gpr32 = COPY $w0
; CHECK: %reg1:gpr64sp = COPY $x1
; CHECK: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
; CHECK: $w0 = COPY %cmp
; CHECK: RET_ReallyLR implicit $w0
%reg0:gpr(s32) = COPY $w0
%reg1:gpr(s64) = COPY $x1
%ext:gpr(s64) = G_ZEXT %reg0(s32)
%cst:gpr(s64) = G_CONSTANT i64 2
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
%cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s64), %shift
$w0 = COPY %cmp(s32)
RET_ReallyLR implicit $w0
...
---
name: cmp_arith_extended_s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $h0
; CHECK-LABEL: name: cmp_arith_extended_s32
; CHECK: liveins: $w0, $w1, $h0
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
; CHECK: %reg0:gpr32all = COPY [[SUBREG_TO_REG]]
; CHECK: %reg1:gpr32sp = COPY $w1
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0
; CHECK: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
; CHECK: $w0 = COPY %cmp
; CHECK: RET_ReallyLR implicit $w0
%reg0:gpr(s16) = COPY $h0
%reg1:gpr(s32) = COPY $w1
%ext:gpr(s32) = G_ZEXT %reg0(s16)
%cst:gpr(s32) = G_CONSTANT i32 2
%shift:gpr(s32) = G_SHL %ext, %cst(s32)
%cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s32), %shift
$w0 = COPY %cmp(s32)
RET_ReallyLR implicit $w0
...
---
name: cmp_arith_extended_shl_too_large
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $x1
; The constant on the G_SHL is > 4, so we won't sleect SUBSXrx
; CHECK-LABEL: name: cmp_arith_extended_shl_too_large
; CHECK: liveins: $w0, $x1
; CHECK: %reg0:gpr32 = COPY $w0
; CHECK: %reg1:gpr64 = COPY $x1
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg0, %subreg.sub_32
; CHECK: %ext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31
; CHECK: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv
; CHECK: $w0 = COPY %cmp
; CHECK: RET_ReallyLR implicit $w0
%reg0:gpr(s32) = COPY $w0
%reg1:gpr(s64) = COPY $x1
%ext:gpr(s64) = G_ZEXT %reg0(s32)
%cst:gpr(s64) = G_CONSTANT i64 5
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
%cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s64), %shift
$w0 = COPY %cmp(s32)
RET_ReallyLR implicit $w0
...
21 changes: 21 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir
Expand Up @@ -89,3 +89,24 @@ body: |
%2:gpr(p0) = G_PTR_ADD %0, %1(s64)
$x0 = COPY %2(p0)
...
---
name: ptr_add_arith_extended
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $x0
; CHECK-LABEL: name: ptr_add_arith_extended
; CHECK: %reg0:gpr32 = COPY $w0
; CHECK: %ptr:gpr64 = COPY $x1
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY %ptr
; CHECK: %ptr_add:gpr64sp = ADDXrx [[COPY]], %reg0, 18
; CHECK: $x0 = COPY %ptr_add
%reg0:gpr(s32) = COPY $w0
%ptr:gpr(p0) = COPY $x1
%ext:gpr(s64) = G_ZEXT %reg0(s32)
%cst:gpr(s64) = G_CONSTANT i64 2
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
%ptr_add:gpr(p0) = G_PTR_ADD %ptr, %shift(s64)
$x0 = COPY %ptr_add(p0)
...
28 changes: 28 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
Expand Up @@ -136,3 +136,31 @@ body: |
%add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy, %constant
$w0 = COPY %add(s32)
RET_ReallyLR implicit $w0
...
---
name: uaddo_arith_extended
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $w0, $x0
; Check that we get ADDSXrx.
; CHECK-LABEL: name: uaddo_arith_extended
; CHECK: liveins: $w0, $x0
; CHECK: %reg0:gpr64sp = COPY $x0
; CHECK: %reg1:gpr32 = COPY $w0
; CHECK: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv
; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
; CHECK: $x0 = COPY %add
; CHECK: RET_ReallyLR implicit $x0
%reg0:gpr(s64) = COPY $x0
%reg1:gpr(s32) = COPY $w0
%ext:gpr(s64) = G_ZEXT %reg1(s32)
%cst:gpr(s64) = G_CONSTANT i64 2
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
%add:gpr(s64), %flags:gpr(s1) = G_UADDO %reg0, %shift
$x0 = COPY %add(s64)
RET_ReallyLR implicit $x0

0 comments on commit c42053f

Please sign in to comment.