Skip to content
This repository has been archived by the owner on Apr 23, 2020. It is now read-only.

Commit

Permalink
Shifter ops are not always free. Do not fold them (especially to form
Browse files Browse the repository at this point in the history
complex load / store addressing mode) when they have higher cost and
when they have more than one use.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117509 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Evan Cheng committed Oct 27, 2010
1 parent de5fa93 commit f40deed
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 25 deletions.
123 changes: 109 additions & 14 deletions lib/Target/ARM/ARMISelDAGToDAG.cpp
Expand Up @@ -78,8 +78,12 @@ class ARMDAGToDAGISel : public SelectionDAGISel {

SDNode *Select(SDNode *N);

bool isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
bool SelectShifterOperandReg(SDValue N, SDValue &A,
SDValue &B, SDValue &C);
bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
SDValue &B, SDValue &C);
bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);

Expand Down Expand Up @@ -246,6 +250,17 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
}


bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal,
unsigned ShAmt) {
if (!Subtarget->isCortexA9())
return true;
if (Shift.hasOneUse())
return true;
// R << 2 is free.
return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
}

bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
SDValue &BaseReg,
SDValue &ShReg,
Expand All @@ -261,6 +276,32 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,

BaseReg = N.getOperand(0);
unsigned ShImmVal = 0;
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
ShReg = CurDAG->getRegister(0, MVT::i32);
ShImmVal = RHS->getZExtValue() & 31;
} else {
ShReg = N.getOperand(1);
if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal))
return false;
}
Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
MVT::i32);
return true;
}

bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc) {
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);

// Don't match base register only case. That is matched to a separate
// lower complexity pattern with explicit register operand.
if (ShOpcVal == ARM_AM::no_shift) return false;

BaseReg = N.getOperand(0);
unsigned ShImmVal = 0;
// Do not check isShifterOpProfitable. This must return true.
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
ShReg = CurDAG->getRegister(0, MVT::i32);
ShImmVal = RHS->getZExtValue() & 31;
Expand Down Expand Up @@ -321,7 +362,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,

bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL) {
if (N.getOpcode() == ISD::MUL &&
(!Subtarget->isCortexA9() || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
Expand Down Expand Up @@ -357,6 +399,10 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
}
}

if (Subtarget->isCortexA9() && !N.hasOneUse())
// Compute R +/- (R << N) and reuse it.
return false;

// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
Expand All @@ -371,23 +417,36 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
ShAmt = Sh->getZExtValue();
Offset = N.getOperand(1).getOperand(0);
if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
Offset = N.getOperand(1).getOperand(0);
else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
}

// Try matching (R shl C) + (R).
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
!(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
// fold it.
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
ShAmt = Sh->getZExtValue();
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
if (!Subtarget->isCortexA9() ||
(N.hasOneUse() &&
isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
} else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
Expand All @@ -408,7 +467,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
SDValue &Base,
SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL) {
if (N.getOpcode() == ISD::MUL &&
(!Subtarget->isCortexA9() || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
Expand Down Expand Up @@ -474,6 +534,16 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
}
}

if (Subtarget->isCortexA9() && !N.hasOneUse()) {
// Compute R +/- (R << N) and reuse it.
Base = N;
Offset = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
ARM_AM::no_shift),
MVT::i32);
return AM2_BASE;
}

// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
Expand All @@ -488,23 +558,36 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
ShAmt = Sh->getZExtValue();
Offset = N.getOperand(1).getOperand(0);
if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
Offset = N.getOperand(1).getOperand(0);
else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
}

// Try matching (R shl C) + (R).
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift &&
!(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
// fold it.
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
ShAmt = Sh->getZExtValue();
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
if (!Subtarget->isCortexA9() ||
(N.hasOneUse() &&
isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
Offset = N.getOperand(0).getOperand(0);
Base = N.getOperand(1);
} else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
Expand Down Expand Up @@ -543,7 +626,12 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
// it.
if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
ShAmt = Sh->getZExtValue();
Offset = N.getOperand(0);
if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
Offset = N.getOperand(0);
else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
Expand Down Expand Up @@ -959,6 +1047,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
return false;
}

if (Subtarget->isCortexA9() && !N.hasOneUse()) {
// Compute R + (R << [1,2,3]) and reuse it.
Base = N;
return false;
}

// Look for (R + R) or (R + (R << [1,2,3])).
unsigned ShAmt = 0;
Base = N.getOperand(0);
Expand All @@ -977,11 +1071,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
// it.
if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
ShAmt = Sh->getZExtValue();
if (ShAmt >= 4) {
if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
OffReg = OffReg.getOperand(0);
else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
} else
OffReg = OffReg.getOperand(0);
}
} else {
ShOpcVal = ARM_AM::no_shift;
}
Expand Down
12 changes: 10 additions & 2 deletions lib/Target/ARM/ARMInstrInfo.td
Expand Up @@ -325,6 +325,13 @@ def so_reg : Operand<i32>, // reg reg imm
let PrintMethod = "printSORegOperand";
let MIOperandInfo = (ops GPR, GPR, i32imm);
}
def shift_so_reg : Operand<i32>, // reg reg imm
ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
[shl,srl,sra,rotr]> {
string EncoderMethod = "getSORegOpValue";
let PrintMethod = "printSORegOperand";
let MIOperandInfo = (ops GPR, GPR, i32imm);
}

// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are
Expand Down Expand Up @@ -1715,9 +1722,10 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
let Inst{15-12} = Rd;
}

def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins so_reg:$src),
def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
DPSoRegFrm, IIC_iMOVsr,
"mov", "\t$Rd, $src", [(set GPR:$Rd, so_reg:$src)]>, UnaryDP {
"mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
UnaryDP {
bits<4> Rd;
bits<12> src;
let Inst{15-12} = Rd;
Expand Down
72 changes: 63 additions & 9 deletions test/CodeGen/ARM/shifter_operand.ll
@@ -1,18 +1,72 @@
; RUN: llc < %s -march=arm | grep add | grep lsl
; RUN: llc < %s -march=arm | grep bic | grep asr
; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
; rdar://8576755


define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
%shift.upgrd.1 = zext i8 %sh to i32 ; <i32> [#uses=1]
%A = shl i32 %Y, %shift.upgrd.1 ; <i32> [#uses=1]
%B = add i32 %X, %A ; <i32> [#uses=1]
; A8: test1:
; A8: add r0, r0, r1, lsl r2

; A9: test1:
; A9: add r0, r0, r1, lsl r2
%shift.upgrd.1 = zext i8 %sh to i32
%A = shl i32 %Y, %shift.upgrd.1
%B = add i32 %X, %A
ret i32 %B
}

define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
%shift.upgrd.2 = zext i8 %sh to i32 ; <i32> [#uses=1]
%A = ashr i32 %Y, %shift.upgrd.2 ; <i32> [#uses=1]
%B = xor i32 %A, -1 ; <i32> [#uses=1]
%C = and i32 %X, %B ; <i32> [#uses=1]
; A8: test2:
; A8: bic r0, r0, r1, asr r2

; A9: test2:
; A9: bic r0, r0, r1, asr r2
%shift.upgrd.2 = zext i8 %sh to i32
%A = ashr i32 %Y, %shift.upgrd.2
%B = xor i32 %A, -1
%C = and i32 %X, %B
ret i32 %C
}

define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
entry:
; A8: test3:
; A8: ldr r0, [r0, r2, lsl #2]
; A8: ldr r1, [r1, r2, lsl #2]

; lsl #2 is free
; A9: test3:
; A9: ldr r1, [r1, r2, lsl #2]
; A9: ldr r0, [r0, r2, lsl #2]
%tmp1 = shl i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i32*
%tmp4 = add i32 %base2, %tmp1
%tmp5 = inttoptr i32 %tmp4 to i32*
%tmp6 = load i32* %tmp3
%tmp7 = load i32* %tmp5
%tmp8 = add i32 %tmp7, %tmp6
ret i32 %tmp8
}

declare i8* @malloc(...)

define fastcc void @test4() nounwind {
entry:
; A8: test4:
; A8: ldr r1, [r0, r0, lsl #2]
; A8: str r1, [r0, r0, lsl #2]

; A9: test4:
; A9: add r0, r0, r0, lsl #2
; A9: ldr r1, [r0]
; A9: str r1, [r0]
%0 = tail call i8* (...)* @malloc(i32 undef) nounwind
%1 = bitcast i8* %0 to i32*
%2 = sext i16 undef to i32
%3 = getelementptr inbounds i32* %1, i32 %2
%4 = load i32* %3, align 4
%5 = add nsw i32 %4, 1
store i32 %5, i32* %3, align 4
ret void
}
1 change: 1 addition & 0 deletions utils/TableGen/EDEmitter.cpp
Expand Up @@ -586,6 +586,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,

MISC("brtarget", "kOperandTypeARMBranchTarget"); // ?
MISC("so_reg", "kOperandTypeARMSoReg"); // R, R, I
MISC("shift_so_reg", "kOperandTypeARMSoReg"); // R, R, I
MISC("t2_so_reg", "kOperandTypeThumb2SoReg"); // R, I
MISC("so_imm", "kOperandTypeARMSoImm"); // I
MISC("rot_imm", "kOperandTypeARMRotImm"); // I
Expand Down

0 comments on commit f40deed

Please sign in to comment.