From f38ee6f8016cacdddd5be7ed03776992c09fec72 Mon Sep 17 00:00:00 2001
From: Piotr Fusik
Date: Tue, 4 Nov 2025 14:49:11 +0100
Subject: [PATCH 1/6] [RISCV][test] Multiplication by `(2/4/8 * 3/5/9 + 1) <<
N` with SHL_ADD
---
llvm/test/CodeGen/RISCV/rv64zba.ll | 85 ++++++++++++++++++++++++++++++
1 file changed, 85 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index 7fd76262d547a..ba21fcff0a496 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -585,6 +585,19 @@ define i64 @addmul12(i64 %a, i64 %b) {
ret i64 %d
}
+define i64 @addmul14(i64 %a, i64 %b) {
+; CHECK-LABEL: addmul14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a2, a0, 1
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: sub a0, a0, a2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: ret
+ %c = mul i64 %a, 14
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
define i64 @addmul18(i64 %a, i64 %b) {
; RV64I-LABEL: addmul18:
; RV64I: # %bb.0:
@@ -672,6 +685,18 @@ define i64 @addmul24(i64 %a, i64 %b) {
ret i64 %d
}
+define i64 @addmul26(i64 %a, i64 %b) {
+; CHECK-LABEL: addmul26:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 26
+; CHECK-NEXT: mul a0, a0, a2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: ret
+ %c = mul i64 %a, 26
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
define i64 @addmul36(i64 %a, i64 %b) {
; RV64I-LABEL: addmul36:
; RV64I: # %bb.0:
@@ -722,6 +747,30 @@ define i64 @addmul40(i64 %a, i64 %b) {
ret i64 %d
}
+define i64 @addmul38(i64 %a, i64 %b) {
+; CHECK-LABEL: addmul38:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 38
+; CHECK-NEXT: mul a0, a0, a2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: ret
+ %c = mul i64 %a, 38
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
+define i64 @addmul42(i64 %a, i64 %b) {
+; CHECK-LABEL: addmul42:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 42
+; CHECK-NEXT: mul a0, a0, a2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: ret
+ %c = mul i64 %a, 42
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
define i64 @addmul72(i64 %a, i64 %b) {
; RV64I-LABEL: addmul72:
; RV64I: # %bb.0:
@@ -747,6 +796,42 @@ define i64 @addmul72(i64 %a, i64 %b) {
ret i64 %d
}
+define i64 @addmul74(i64 %a, i64 %b) {
+; CHECK-LABEL: addmul74:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 74
+; CHECK-NEXT: mul a0, a0, a2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: ret
+ %c = mul i64 %a, 74
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
+define i64 @addmul82(i64 %a, i64 %b) {
+; CHECK-LABEL: addmul82:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 82
+; CHECK-NEXT: mul a0, a0, a2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: ret
+ %c = mul i64 %a, 82
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
+define i64 @addmul146(i64 %a, i64 %b) {
+; CHECK-LABEL: addmul146:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 146
+; CHECK-NEXT: mul a0, a0, a2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: ret
+ %c = mul i64 %a, 146
+ %d = add i64 %c, %b
+ ret i64 %d
+}
+
define i64 @mul50(i64 %a) {
; RV64I-LABEL: mul50:
; RV64I: # %bb.0:
From 9b34e3f9c5277280c69ee108aaf6a60fbab98aba Mon Sep 17 00:00:00 2001
From: Piotr Fusik
Date: Tue, 4 Nov 2025 14:49:28 +0100
Subject: [PATCH 2/6] [RISCV] Expand multiplication by `(2/4/8 * 3/5/9 + 1) <<
N` with SHL_ADD
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 53 ++---
llvm/test/CodeGen/RISCV/rv64xtheadba.ll | 19 +-
llvm/test/CodeGen/RISCV/rv64zba.ll | 210 +++++++++++++++-----
3 files changed, 203 insertions(+), 79 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b8605629e2dfe..507237a9316fd 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16496,29 +16496,47 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
}
static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
- unsigned ShY) {
+ unsigned ShY, bool AddX) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue X = N->getOperand(0);
SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
DAG.getTargetConstant(ShY, DL, VT), X);
return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getTargetConstant(ShX, DL, VT), Mul359);
+ DAG.getTargetConstant(ShX, DL, VT), AddX ? X : Mul359);
}
static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
uint64_t MulAmt) {
switch (MulAmt) {
case 5 * 3:
- return getShlAddShlAdd(N, DAG, 2, 1);
+ return getShlAddShlAdd(N, DAG, 2, 1, false);
case 9 * 3:
- return getShlAddShlAdd(N, DAG, 3, 1);
+ return getShlAddShlAdd(N, DAG, 3, 1, false);
case 5 * 5:
- return getShlAddShlAdd(N, DAG, 2, 2);
+ return getShlAddShlAdd(N, DAG, 2, 2, false);
case 9 * 5:
- return getShlAddShlAdd(N, DAG, 3, 2);
+ return getShlAddShlAdd(N, DAG, 3, 2, false);
case 9 * 9:
- return getShlAddShlAdd(N, DAG, 3, 3);
+ return getShlAddShlAdd(N, DAG, 3, 3, false);
+ case 2 * 3 + 1:
+ return getShlAddShlAdd(N, DAG, 1, 1, true);
+ case 4 * 3 + 1:
+ return getShlAddShlAdd(N, DAG, 2, 1, true);
+ // case 8 * 3 + 1:
+ // Prefer 5 * 5 above because it doesn't require a register to hold X.
+ case 2 * 5 + 1:
+ return getShlAddShlAdd(N, DAG, 1, 2, true);
+ case 4 * 5 + 1:
+ return getShlAddShlAdd(N, DAG, 2, 2, true);
+ case 8 * 5 + 1:
+ return getShlAddShlAdd(N, DAG, 3, 2, true);
+ case 2 * 9 + 1:
+ return getShlAddShlAdd(N, DAG, 1, 3, true);
+ case 4 * 9 + 1:
+ return getShlAddShlAdd(N, DAG, 2, 3, true);
+ case 8 * 9 + 1:
+ return getShlAddShlAdd(N, DAG, 3, 3, true);
default:
return SDValue();
}
@@ -16581,7 +16599,8 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(Shift, DL, VT));
}
- // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
+ // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
+ // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt))
return V;
@@ -16600,22 +16619,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
}
}
- // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
- // This is the two instruction form, there are also three instruction
- // variants we could implement. e.g.
- // (2^(1,2,3) * 3,5,9 + 1) << C2
- // 2^(C1>3) * 3,5,9 +/- 1
- if (int ShXAmount = isShifted359(MulAmt - 1, Shift)) {
- assert(Shift != 0 && "MulAmt=4,6,10 handled before");
- if (Shift <= 3) {
- SDLoc DL(N);
- SDValue Mul359 =
- DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getTargetConstant(ShXAmount, DL, VT), X);
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
- DAG.getTargetConstant(Shift, DL, VT), X);
- }
- }
+ // TODO: 2^(C1>3) * 3,5,9 +/- 1
// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
@@ -16650,6 +16654,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
// 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
// of 25 which happen to be quite common.
+ // (2/4/8 * 3/5/9 + 1) * 2^N
Shift = llvm::countr_zero(MulAmt);
if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
SDLoc DL(N);
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll
index 50bd22bf5fd69..f4964288e3541 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll
@@ -205,12 +205,19 @@ define i64 @addmul20(i64 %a, i64 %b) {
}
define i64 @addmul22(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul22:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 22
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul22:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 22
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBA-LABEL: addmul22:
+; RV64XTHEADBA: # %bb.0:
+; RV64XTHEADBA-NEXT: th.addsl a2, a0, a0, 2
+; RV64XTHEADBA-NEXT: th.addsl a0, a0, a2, 1
+; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1
+; RV64XTHEADBA-NEXT: ret
%c = mul i64 %a, 22
%d = add i64 %c, %b
ret i64 %d
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index ba21fcff0a496..d4b228828c04d 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -586,13 +586,27 @@ define i64 @addmul12(i64 %a, i64 %b) {
}
define i64 @addmul14(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul14:
-; CHECK: # %bb.0:
-; CHECK-NEXT: slli a2, a0, 1
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: sub a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul14:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a2, a0, 1
+; RV64I-NEXT: slli a0, a0, 4
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul14:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh1add a2, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul14:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 14
%d = add i64 %c, %b
ret i64 %d
@@ -649,12 +663,26 @@ define i64 @addmul20(i64 %a, i64 %b) {
}
define i64 @addmul22(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul22:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 22
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul22:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 22
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul22:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a2, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul22:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 22
%d = add i64 %c, %b
ret i64 %d
@@ -686,12 +714,26 @@ define i64 @addmul24(i64 %a, i64 %b) {
}
define i64 @addmul26(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul26:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 26
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul26:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 26
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul26:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh1add a2, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul26:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 26
%d = add i64 %c, %b
ret i64 %d
@@ -748,24 +790,52 @@ define i64 @addmul40(i64 %a, i64 %b) {
}
define i64 @addmul38(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul38:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 38
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul38:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 38
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul38:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a2, a0, a0
+; RV64ZBA-NEXT: sh1add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul38:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 38
%d = add i64 %c, %b
ret i64 %d
}
define i64 @addmul42(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul42:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 42
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul42:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 42
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul42:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a2, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul42:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 42
%d = add i64 %c, %b
ret i64 %d
@@ -797,36 +867,78 @@ define i64 @addmul72(i64 %a, i64 %b) {
}
define i64 @addmul74(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul74:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 74
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul74:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 74
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul74:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a2, a0, a0
+; RV64ZBA-NEXT: sh2add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul74:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 74
%d = add i64 %c, %b
ret i64 %d
}
define i64 @addmul82(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul82:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 82
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul82:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 82
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul82:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh2add a2, a0, a0
+; RV64ZBA-NEXT: sh3add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul82:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 82
%d = add i64 %c, %b
ret i64 %d
}
define i64 @addmul146(i64 %a, i64 %b) {
-; CHECK-LABEL: addmul146:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 146
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ret
+; RV64I-LABEL: addmul146:
+; RV64I: # %bb.0:
+; RV64I-NEXT: li a2, 146
+; RV64I-NEXT: mul a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: addmul146:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: sh3add a2, a0, a0
+; RV64ZBA-NEXT: sh3add a0, a2, a0
+; RV64ZBA-NEXT: sh1add a0, a0, a1
+; RV64ZBA-NEXT: ret
+;
+; RV64XANDESPERF-LABEL: addmul146:
+; RV64XANDESPERF: # %bb.0:
+; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0
+; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2
+; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0
+; RV64XANDESPERF-NEXT: ret
%c = mul i64 %a, 146
%d = add i64 %c, %b
ret i64 %d
From 665bfea69203004af9d27e145c4a89933fb1a30f Mon Sep 17 00:00:00 2001
From: Piotr Fusik
Date: Tue, 4 Nov 2025 19:35:39 +0100
Subject: [PATCH 3/6] [RISCV][NFC] Revert to the generic detection
For future `ShX>3` patterns.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 27 ++++++---------------
1 file changed, 8 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 507237a9316fd..96ab04d3914d9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16519,27 +16519,16 @@ static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
return getShlAddShlAdd(N, DAG, 3, 2, false);
case 9 * 9:
return getShlAddShlAdd(N, DAG, 3, 3, false);
- case 2 * 3 + 1:
- return getShlAddShlAdd(N, DAG, 1, 1, true);
- case 4 * 3 + 1:
- return getShlAddShlAdd(N, DAG, 2, 1, true);
- // case 8 * 3 + 1:
- // Prefer 5 * 5 above because it doesn't require a register to hold X.
- case 2 * 5 + 1:
- return getShlAddShlAdd(N, DAG, 1, 2, true);
- case 4 * 5 + 1:
- return getShlAddShlAdd(N, DAG, 2, 2, true);
- case 8 * 5 + 1:
- return getShlAddShlAdd(N, DAG, 3, 2, true);
- case 2 * 9 + 1:
- return getShlAddShlAdd(N, DAG, 1, 3, true);
- case 4 * 9 + 1:
- return getShlAddShlAdd(N, DAG, 2, 3, true);
- case 8 * 9 + 1:
- return getShlAddShlAdd(N, DAG, 3, 3, true);
default:
- return SDValue();
+ break;
}
+ int ShX;
+ if (int ShY = isShifted359(MulAmt - 1, ShX)) {
+ assert(ShX != 0 && "MulAmt=4,6,10 handled before");
+ if (ShX <= 3)
+ return getShlAddShlAdd(N, DAG, ShX, ShY, true);
+ }
+ return SDValue();
}
// Try to expand a scalar multiply to a faster sequence.
From dd0a9563f8afd6315e92534c3cebff7e8c4eb588 Mon Sep 17 00:00:00 2001
From: Piotr Fusik
Date: Tue, 4 Nov 2025 19:50:16 +0100
Subject: [PATCH 4/6] [RISCV][NFC] Call `expandMulToShlAddShlAdd` once
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 40 +++++++++------------
1 file changed, 17 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 96ab04d3914d9..1a328bdce4db6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16588,24 +16588,27 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(Shift, DL, VT));
}
- // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
- // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
- if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt))
- return V;
+ // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
+ // of 25 which happen to be quite common.
+ // (2/4/8 * 3/5/9 + 1) * 2^N
+ Shift = llvm::countr_zero(MulAmt);
+ if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
+ if (Shift == 0)
+ return V;
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
+ }
// If this is a power 2 + 2/4/8, we can use a shift followed by a single
// shXadd. First check if this a sum of two power of 2s because that's
// easy. Then count how many zeros are up to the first bit.
- if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
- unsigned ScaleShift = llvm::countr_zero(MulAmt);
- if (ScaleShift >= 1 && ScaleShift < 4) {
- unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
- SDLoc DL(N);
- SDValue Shift1 =
- DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
- return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
- DAG.getTargetConstant(ScaleShift, DL, VT), Shift1);
- }
+ if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {
+ unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));
+ SDLoc DL(N);
+ SDValue Shift1 =
+ DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
+ return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
+ DAG.getTargetConstant(Shift, DL, VT), Shift1);
}
// TODO: 2^(C1>3) * 3,5,9 +/- 1
@@ -16640,15 +16643,6 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
}
}
-
- // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
- // of 25 which happen to be quite common.
- // (2/4/8 * 3/5/9 + 1) * 2^N
- Shift = llvm::countr_zero(MulAmt);
- if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift)) {
- SDLoc DL(N);
- return DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Shift, DL, VT));
- }
}
if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
From af464ba9bba4fc6f2cf7cd1cc20f8e8586973c5b Mon Sep 17 00:00:00 2001
From: Piotr Fusik
Date: Wed, 5 Nov 2025 07:14:34 +0100
Subject: [PATCH 5/6] [RISCV][NFC] Add AddX argument comments
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1a328bdce4db6..8e3f9d32401ad 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16510,15 +16510,15 @@ static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
uint64_t MulAmt) {
switch (MulAmt) {
case 5 * 3:
- return getShlAddShlAdd(N, DAG, 2, 1, false);
+ return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false);
case 9 * 3:
- return getShlAddShlAdd(N, DAG, 3, 1, false);
+ return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false);
case 5 * 5:
- return getShlAddShlAdd(N, DAG, 2, 2, false);
+ return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false);
case 9 * 5:
- return getShlAddShlAdd(N, DAG, 3, 2, false);
+ return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false);
case 9 * 9:
- return getShlAddShlAdd(N, DAG, 3, 3, false);
+ return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false);
default:
break;
}
@@ -16526,7 +16526,7 @@ static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
if (int ShY = isShifted359(MulAmt - 1, ShX)) {
assert(ShX != 0 && "MulAmt=4,6,10 handled before");
if (ShX <= 3)
- return getShlAddShlAdd(N, DAG, ShX, ShY, true);
+ return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true);
}
return SDValue();
}
From 6e58eb980af3cd44e0e6f817d7ced15371e2949c Mon Sep 17 00:00:00 2001
From: Piotr Fusik
Date: Wed, 5 Nov 2025 07:17:57 +0100
Subject: [PATCH 6/6] [RISCV][NFC] Comment the transforms in
expandMulToShlAddShlAdd
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +++
1 file changed, 3 insertions(+)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8e3f9d32401ad..51cec7a66bee7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16508,6 +16508,7 @@ static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
uint64_t MulAmt) {
+ // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
switch (MulAmt) {
case 5 * 3:
return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false);
@@ -16522,6 +16523,8 @@ static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG,
default:
break;
}
+
+ // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
int ShX;
if (int ShY = isShifted359(MulAmt - 1, ShX)) {
assert(ShX != 0 && "MulAmt=4,6,10 handled before");