Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 36 additions & 45 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16496,32 +16496,42 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
}

static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
unsigned ShY) {
unsigned ShY, bool AddX) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue X = N->getOperand(0);
SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
DAG.getTargetConstant(ShY, DL, VT), X);
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
DAG.getTargetConstant(ShX, DL, VT), Mul359);
DAG.getTargetConstant(ShX, DL, VT), AddX ? X : Mul359);
}

static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
uint64_t MulAmt) {
// 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
switch (MulAmt) {
case 5 * 3:
return getShlAddShlAdd(N, DAG, 2, 1);
return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false);
case 9 * 3:
return getShlAddShlAdd(N, DAG, 3, 1);
return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false);
case 5 * 5:
return getShlAddShlAdd(N, DAG, 2, 2);
return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false);
case 9 * 5:
return getShlAddShlAdd(N, DAG, 3, 2);
return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false);
case 9 * 9:
return getShlAddShlAdd(N, DAG, 3, 3);
return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false);
default:
return SDValue();
break;
}

// 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
int ShX;
if (int ShY = isShifted359(MulAmt - 1, ShX)) {
assert(ShX != 0 && "MulAmt=4,6,10 handled before");
if (ShX <= 3)
return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true);
}
return SDValue();
}

// Try to expand a scalar multiply to a faster sequence.
Expand Down Expand Up @@ -16581,41 +16591,30 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(Shift, DL, VT));
}

// 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt))
return V;
// 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
// of 25 which happen to be quite common.
// (2/4/8 * 3/5/9 + 1) * 2^N
Shift = llvm::countr_zero(MulAmt);
if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
if (Shift == 0)
return V;
SDLoc DL(N);
return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
}

// If this is a power 2 + 2/4/8, we can use a shift followed by a single
// shXadd. First check if this a sum of two power of 2s because that's
// easy. Then count how many zeros are up to the first bit.
if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
unsigned ScaleShift = llvm::countr_zero(MulAmt);
if (ScaleShift >= 1 && ScaleShift < 4) {
unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
SDLoc DL(N);
SDValue Shift1 =
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
DAG.getTargetConstant(ScaleShift, DL, VT), Shift1);
}
if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {
unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));
SDLoc DL(N);
SDValue Shift1 =
DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
DAG.getTargetConstant(Shift, DL, VT), Shift1);
}

// 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
// This is the two instruction form, there are also three instruction
// variants we could implement. e.g.
// (2^(1,2,3) * 3,5,9 + 1) << C2
// 2^(C1>3) * 3,5,9 +/- 1
if (int ShXAmount = isShifted359(MulAmt - 1, Shift)) {
assert(Shift != 0 && "MulAmt=4,6,10 handled before");
if (Shift <= 3) {
SDLoc DL(N);
SDValue Mul359 =
DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
DAG.getTargetConstant(ShXAmount, DL, VT), X);
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
DAG.getTargetConstant(Shift, DL, VT), X);
}
}
// TODO: 2^(C1>3) * 3,5,9 +/- 1

// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
Expand Down Expand Up @@ -16647,14 +16646,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
}
}

// 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
// of 25 which happen to be quite common.
Shift = llvm::countr_zero(MulAmt);
if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
SDLoc DL(N);
return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
}
}

if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
Expand Down
19 changes: 13 additions & 6 deletions llvm/test/CodeGen/RISCV/rv64xtheadba.ll
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,19 @@ define i64 @addmul20(i64 %a, i64 %b) {
}

define i64 @addmul22(i64 %a, i64 %b) {
; CHECK-LABEL: addmul22:
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 22
; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: addmul22:
; RV64I: # %bb.0:
; RV64I-NEXT: li a2, 22
; RV64I-NEXT: mul a0, a0, a2
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64XTHEADBA-LABEL: addmul22:
; RV64XTHEADBA: # %bb.0:
; RV64XTHEADBA-NEXT: th.addsl a2, a0, a0, 2
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a2, 1
; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1
; RV64XTHEADBA-NEXT: ret
%c = mul i64 %a, 22
%d = add i64 %c, %b
ret i64 %d
Expand Down
209 changes: 203 additions & 6 deletions llvm/test/CodeGen/RISCV/rv64zba.ll
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,33 @@ define i64 @addmul12(i64 %a, i64 %b) {
ret i64 %d
}

define i64 @addmul14(i64 %a, i64 %b) {
; RV64I-LABEL: addmul14:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a2, a0, 1
; RV64I-NEXT: slli a0, a0, 4
; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: addmul14:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sh1add a2, a0, a0
; RV64ZBA-NEXT: sh1add a0, a2, a0
; RV64ZBA-NEXT: sh1add a0, a0, a1
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: addmul14:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0
; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 14
%d = add i64 %c, %b
ret i64 %d
}

define i64 @addmul18(i64 %a, i64 %b) {
; RV64I-LABEL: addmul18:
; RV64I: # %bb.0:
Expand Down Expand Up @@ -636,12 +663,26 @@ define i64 @addmul20(i64 %a, i64 %b) {
}

define i64 @addmul22(i64 %a, i64 %b) {
; CHECK-LABEL: addmul22:
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 22
; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: addmul22:
; RV64I: # %bb.0:
; RV64I-NEXT: li a2, 22
; RV64I-NEXT: mul a0, a0, a2
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: addmul22:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sh2add a2, a0, a0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sequence is twice the size of the original code when Zca and Zcb are enabled. Do we have any opt for size limits on these transforms?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point! But the very same problem already affects other multipliers, e.g. addmul100. I suggest we deal with it in a separate PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I see these transforms are disabled when optimizing for size:

// LI + MUL is usually smaller than the alternative sequence.

; RV64ZBA-NEXT: sh1add a0, a2, a0
; RV64ZBA-NEXT: sh1add a0, a0, a1
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: addmul22:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 22
%d = add i64 %c, %b
ret i64 %d
Expand Down Expand Up @@ -672,6 +713,32 @@ define i64 @addmul24(i64 %a, i64 %b) {
ret i64 %d
}

define i64 @addmul26(i64 %a, i64 %b) {
; RV64I-LABEL: addmul26:
; RV64I: # %bb.0:
; RV64I-NEXT: li a2, 26
; RV64I-NEXT: mul a0, a0, a2
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: addmul26:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sh1add a2, a0, a0
; RV64ZBA-NEXT: sh2add a0, a2, a0
; RV64ZBA-NEXT: sh1add a0, a0, a1
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: addmul26:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0
; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 26
%d = add i64 %c, %b
ret i64 %d
}

define i64 @addmul36(i64 %a, i64 %b) {
; RV64I-LABEL: addmul36:
; RV64I: # %bb.0:
Expand Down Expand Up @@ -722,6 +789,58 @@ define i64 @addmul40(i64 %a, i64 %b) {
ret i64 %d
}

define i64 @addmul38(i64 %a, i64 %b) {
; RV64I-LABEL: addmul38:
; RV64I: # %bb.0:
; RV64I-NEXT: li a2, 38
; RV64I-NEXT: mul a0, a0, a2
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: addmul38:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sh3add a2, a0, a0
; RV64ZBA-NEXT: sh1add a0, a2, a0
; RV64ZBA-NEXT: sh1add a0, a0, a1
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: addmul38:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 38
%d = add i64 %c, %b
ret i64 %d
}

define i64 @addmul42(i64 %a, i64 %b) {
; RV64I-LABEL: addmul42:
; RV64I: # %bb.0:
; RV64I-NEXT: li a2, 42
; RV64I-NEXT: mul a0, a0, a2
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: addmul42:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sh2add a2, a0, a0
; RV64ZBA-NEXT: sh2add a0, a2, a0
; RV64ZBA-NEXT: sh1add a0, a0, a1
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: addmul42:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 42
%d = add i64 %c, %b
ret i64 %d
}

define i64 @addmul72(i64 %a, i64 %b) {
; RV64I-LABEL: addmul72:
; RV64I: # %bb.0:
Expand All @@ -747,6 +866,84 @@ define i64 @addmul72(i64 %a, i64 %b) {
ret i64 %d
}

define i64 @addmul74(i64 %a, i64 %b) {
; RV64I-LABEL: addmul74:
; RV64I: # %bb.0:
; RV64I-NEXT: li a2, 74
; RV64I-NEXT: mul a0, a0, a2
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: addmul74:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sh3add a2, a0, a0
; RV64ZBA-NEXT: sh2add a0, a2, a0
; RV64ZBA-NEXT: sh1add a0, a0, a1
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: addmul74:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 74
%d = add i64 %c, %b
ret i64 %d
}

define i64 @addmul82(i64 %a, i64 %b) {
; RV64I-LABEL: addmul82:
; RV64I: # %bb.0:
; RV64I-NEXT: li a2, 82
; RV64I-NEXT: mul a0, a0, a2
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: addmul82:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sh2add a2, a0, a0
; RV64ZBA-NEXT: sh3add a0, a2, a0
; RV64ZBA-NEXT: sh1add a0, a0, a1
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: addmul82:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 82
%d = add i64 %c, %b
ret i64 %d
}

define i64 @addmul146(i64 %a, i64 %b) {
; RV64I-LABEL: addmul146:
; RV64I: # %bb.0:
; RV64I-NEXT: li a2, 146
; RV64I-NEXT: mul a0, a0, a2
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: addmul146:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: sh3add a2, a0, a0
; RV64ZBA-NEXT: sh3add a0, a2, a0
; RV64ZBA-NEXT: sh1add a0, a0, a1
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: addmul146:
; RV64XANDESPERF: # %bb.0:
; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 146
%d = add i64 %c, %b
ret i64 %d
}

define i64 @mul50(i64 %a) {
; RV64I-LABEL: mul50:
; RV64I: # %bb.0:
Expand Down