From cc244af20d135db6ac7044a68439d00c981adb64 Mon Sep 17 00:00:00 2001 From: Iris Shi <0.0@owo.li> Date: Wed, 30 Apr 2025 19:12:22 +0800 Subject: [PATCH] [RISCV] Add 2^N + 2^M expanding pattern for mul --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 42 ++++-- llvm/test/CodeGen/RISCV/mul.ll | 86 ++++++----- llvm/test/CodeGen/RISCV/rv32xtheadba.ll | 55 ++++--- llvm/test/CodeGen/RISCV/rv32zba.ll | 55 ++++--- llvm/test/CodeGen/RISCV/rv64xtheadba.ll | 65 ++++---- llvm/test/CodeGen/RISCV/rv64zba.ll | 106 +++++++------ .../CodeGen/RISCV/rvv/calling-conv-fastcc.ll | 99 ++++++------- .../fixed-vectors-strided-load-store-asm.ll | 140 ++++++++++-------- .../RISCV/rvv/vreductions-fp-sdnode.ll | 12 +- 9 files changed, 369 insertions(+), 291 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 8403b51483323..0ef8daf0421e9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15456,6 +15456,30 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); } +// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2)) +static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, + uint64_t MulAmt) { + uint64_t MulAmtLowBit = MulAmt & (-MulAmt); + ISD::NodeType Op; + uint64_t ShiftAmt1; + if (isPowerOf2_64(MulAmt + MulAmtLowBit)) { + Op = ISD::SUB; + ShiftAmt1 = MulAmt + MulAmtLowBit; + } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) { + Op = ISD::ADD; + ShiftAmt1 = MulAmt - MulAmtLowBit; + } else { + return SDValue(); + } + EVT VT = N->getValueType(0); + SDLoc DL(N); + SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(Log2_64(ShiftAmt1), DL, VT)); + SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT)); + return DAG.getNode(Op, DL, VT, Shift1, Shift2); +} + // Try to expand a scalar multiply to a faster sequence. static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -15589,22 +15613,7 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359); } } - } - // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2)) - uint64_t MulAmtLowBit = MulAmt & (-MulAmt); - if (isPowerOf2_64(MulAmt + MulAmtLowBit)) { - uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit; - SDLoc DL(N); - SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), - DAG.getConstant(Log2_64(ShiftAmt1), DL, VT)); - SDValue Shift2 = - DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), - DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT)); - return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2); - } - - if (HasShlAdd) { for (uint64_t Divisor : {3, 5, 9}) { if (MulAmt % Divisor != 0) continue; @@ -15630,6 +15639,9 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, } } + if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt)) + return V; + return SDValue(); } diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index 9447dcaf72373..a65ea088df50c 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -502,24 +502,23 @@ define i32 @muli32_p18(i32 %a) nounwind { ; ; RV32IM-LABEL: muli32_p18: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a1, 18 -; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: slli a1, a0, 1 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli32_p18: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: li a1, 18 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_p18: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, 18 -; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 1 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, 18 ret i32 %1 @@ -593,24 +592,23 @@ define i32 @muli32_p34(i32 %a) nounwind { ; ; RV32IM-LABEL: muli32_p34: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a1, 34 -; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: slli a1, a0, 1 +; RV32IM-NEXT: slli a0, a0, 5 +; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli32_p34: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: li a1, 34 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 5 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_p34: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, 34 -; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 1 +; RV64IM-NEXT: slli a0, a0, 5 +; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, 34 ret i32 %1 @@ -624,24 +622,23 @@ define i32 @muli32_p36(i32 %a) nounwind { ; ; RV32IM-LABEL: muli32_p36: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a1, 36 -; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: slli a1, a0, 2 +; RV32IM-NEXT: slli a0, a0, 5 +; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli32_p36: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: li a1, 36 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slli a0, a0, 5 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_p36: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, 36 -; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 2 +; RV64IM-NEXT: slli a0, a0, 5 +; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, 36 ret i32 %1 @@ -886,10 +883,14 @@ define i64 @muli64_p72(i64 %a) nounwind { ; RV32IM-LABEL: muli64_p72: ; RV32IM: # %bb.0: ; RV32IM-NEXT: li a2, 72 -; RV32IM-NEXT: mul a1, a1, a2 -; RV32IM-NEXT: mulhu a3, a0, a2 -; RV32IM-NEXT: add a1, a3, a1 -; RV32IM-NEXT: mul a0, a0, a2 +; RV32IM-NEXT: slli a3, a1, 3 +; RV32IM-NEXT: slli a1, a1, 6 +; RV32IM-NEXT: add a1, a1, a3 +; RV32IM-NEXT: slli a3, a0, 3 +; RV32IM-NEXT: mulhu a2, a0, a2 +; RV32IM-NEXT: slli a0, a0, 6 +; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: add a0, a0, a3 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli64_p72: @@ -899,8 +900,9 @@ define i64 @muli64_p72(i64 %a) nounwind { ; ; RV64IM-LABEL: muli64_p72: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a1, 72 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 3 +; RV64IM-NEXT: slli a0, a0, 6 +; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i64 %a, 72 ret i64 %1 @@ -1263,12 +1265,16 @@ define i64 @muli64_p4352(i64 %a) nounwind { ; ; RV32IM-LABEL: muli64_p4352: ; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a2, a1, 8 +; RV32IM-NEXT: slli a1, a1, 12 +; RV32IM-NEXT: add a1, a1, a2 ; RV32IM-NEXT: li a2, 17 ; RV32IM-NEXT: slli a2, a2, 8 -; RV32IM-NEXT: mul a1, a1, a2 -; RV32IM-NEXT: mulhu a3, a0, a2 -; RV32IM-NEXT: add a1, a3, a1 -; RV32IM-NEXT: mul a0, a0, a2 +; RV32IM-NEXT: mulhu a2, a0, a2 +; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: slli a2, a0, 8 +; RV32IM-NEXT: slli a0, a0, 12 +; RV32IM-NEXT: add a0, a0, a2 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli64_p4352: diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadba.ll b/llvm/test/CodeGen/RISCV/rv32xtheadba.ll index 44ab0e1fef6c1..0fc0adbfa83d9 100644 --- a/llvm/test/CodeGen/RISCV/rv32xtheadba.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadba.ll @@ -116,8 +116,9 @@ define i32 @addmul6(i32 %a, i32 %b) { define i32 @addmul10(i32 %a, i32 %b) { ; RV32I-LABEL: addmul10: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 10 -; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 1 +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -153,8 +154,9 @@ define i32 @addmul12(i32 %a, i32 %b) { define i32 @addmul18(i32 %a, i32 %b) { ; RV32I-LABEL: addmul18: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 18 -; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 1 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -171,8 +173,9 @@ define i32 @addmul18(i32 %a, i32 %b) { define i32 @addmul20(i32 %a, i32 %b) { ; RV32I-LABEL: addmul20: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 20 -; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -208,8 +211,9 @@ define i32 @addmul24(i32 %a, i32 %b) { define i32 @addmul36(i32 %a, i32 %b) { ; RV32I-LABEL: addmul36: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 36 -; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: slli a0, a0, 5 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -226,8 +230,9 @@ define i32 @addmul36(i32 %a, i32 %b) { define i32 @addmul40(i32 %a, i32 %b) { ; RV32I-LABEL: addmul40: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 40 -; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 3 +; RV32I-NEXT: slli a0, a0, 5 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -244,8 +249,9 @@ define i32 @addmul40(i32 %a, i32 %b) { define i32 @addmul72(i32 %a, i32 %b) { ; RV32I-LABEL: addmul72: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 72 -; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 3 +; RV32I-NEXT: slli a0, a0, 6 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -279,8 +285,9 @@ define i32 @mul96(i32 %a) { define i32 @mul160(i32 %a) { ; RV32I-LABEL: mul160: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 160 -; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 5 +; RV32I-NEXT: slli a0, a0, 7 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32XTHEADBA-LABEL: mul160: @@ -312,8 +319,9 @@ define i32 @mul200(i32 %a) { define i32 @mul288(i32 %a) { ; RV32I-LABEL: mul288: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 288 -; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 5 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32XTHEADBA-LABEL: mul288: @@ -328,8 +336,9 @@ define i32 @mul288(i32 %a) { define i32 @mul258(i32 %a) { ; RV32I-LABEL: mul258: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 258 -; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32XTHEADBA-LABEL: mul258: @@ -344,8 +353,9 @@ define i32 @mul258(i32 %a) { define i32 @mul260(i32 %a) { ; RV32I-LABEL: mul260: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 260 -; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32XTHEADBA-LABEL: mul260: @@ -360,8 +370,9 @@ define i32 @mul260(i32 %a) { define i32 @mul264(i32 %a) { ; RV32I-LABEL: mul264: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 264 -; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32XTHEADBA-LABEL: mul264: diff --git a/llvm/test/CodeGen/RISCV/rv32zba.ll b/llvm/test/CodeGen/RISCV/rv32zba.ll index fec156ac2be27..f8ca41782c6e1 100644 --- a/llvm/test/CodeGen/RISCV/rv32zba.ll +++ b/llvm/test/CodeGen/RISCV/rv32zba.ll @@ -82,8 +82,9 @@ define i32 @addmul6(i32 %a, i32 %b) { define i32 @addmul10(i32 %a, i32 %b) { ; RV32I-LABEL: addmul10: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 10 -; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 1 +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -119,8 +120,9 @@ define i32 @addmul12(i32 %a, i32 %b) { define i32 @addmul18(i32 %a, i32 %b) { ; RV32I-LABEL: addmul18: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 18 -; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 1 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -137,8 +139,9 @@ define i32 @addmul18(i32 %a, i32 %b) { define i32 @addmul20(i32 %a, i32 %b) { ; RV32I-LABEL: addmul20: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 20 -; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -174,8 +177,9 @@ define i32 @addmul24(i32 %a, i32 %b) { define i32 @addmul36(i32 %a, i32 %b) { ; RV32I-LABEL: addmul36: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 36 -; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: slli a0, a0, 5 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -192,8 +196,9 @@ define i32 @addmul36(i32 %a, i32 %b) { define i32 @addmul40(i32 %a, i32 %b) { ; RV32I-LABEL: addmul40: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 40 -; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 3 +; RV32I-NEXT: slli a0, a0, 5 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -210,8 +215,9 @@ define i32 @addmul40(i32 %a, i32 %b) { define i32 @addmul72(i32 %a, i32 %b) { ; RV32I-LABEL: addmul72: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 72 -; RV32I-NEXT: mul a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 3 +; RV32I-NEXT: slli a0, a0, 6 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; @@ -245,8 +251,9 @@ define i32 @mul96(i32 %a) { define i32 @mul160(i32 %a) { ; RV32I-LABEL: mul160: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 160 -; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 5 +; RV32I-NEXT: slli a0, a0, 7 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32ZBA-LABEL: mul160: @@ -261,8 +268,9 @@ define i32 @mul160(i32 %a) { define i32 @mul288(i32 %a) { ; RV32I-LABEL: mul288: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 288 -; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 5 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32ZBA-LABEL: mul288: @@ -277,8 +285,9 @@ define i32 @mul288(i32 %a) { define i32 @mul258(i32 %a) { ; RV32I-LABEL: mul258: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 258 -; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32ZBA-LABEL: mul258: @@ -293,8 +302,9 @@ define i32 @mul258(i32 %a) { define i32 @mul260(i32 %a) { ; RV32I-LABEL: mul260: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 260 -; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32ZBA-LABEL: mul260: @@ -309,8 +319,9 @@ define i32 @mul260(i32 %a) { define i32 @mul264(i32 %a) { ; RV32I-LABEL: mul264: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 264 -; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32ZBA-LABEL: mul264: diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll index 2272c17bcef03..05396e3355ff6 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll @@ -131,8 +131,9 @@ define i64 @disjointormul6(i64 %a, i64 %b) { define i64 @addmul10(i64 %a, i64 %b) { ; RV64I-LABEL: addmul10: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 10 -; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 1 +; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -168,8 +169,9 @@ define i64 @addmul12(i64 %a, i64 %b) { define i64 @addmul18(i64 %a, i64 %b) { ; RV64I-LABEL: addmul18: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 18 -; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 1 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -186,8 +188,9 @@ define i64 @addmul18(i64 %a, i64 %b) { define i64 @addmul20(i64 %a, i64 %b) { ; RV64I-LABEL: addmul20: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 20 -; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 2 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -235,8 +238,9 @@ define i64 @addmul24(i64 %a, i64 %b) { define i64 @addmul36(i64 %a, i64 %b) { ; RV64I-LABEL: addmul36: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 36 -; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 2 +; RV64I-NEXT: slli a0, a0, 5 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -253,8 +257,9 @@ define i64 @addmul36(i64 %a, i64 %b) { define i64 @addmul40(i64 %a, i64 %b) { ; RV64I-LABEL: addmul40: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 40 -; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 3 +; RV64I-NEXT: slli a0, a0, 5 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -271,8 +276,9 @@ define i64 @addmul40(i64 %a, i64 %b) { define i64 @addmul72(i64 %a, i64 %b) { ; RV64I-LABEL: addmul72: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 72 -; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 3 +; RV64I-NEXT: slli a0, a0, 6 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -632,8 +638,9 @@ define i64 @mul137(i64 %a) { define i64 @mul160(i64 %a) { ; RV64I-LABEL: mul160: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 160 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 5 +; RV64I-NEXT: slli a0, a0, 7 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBA-LABEL: mul160: @@ -648,8 +655,9 @@ define i64 @mul160(i64 %a) { define i64 @mul288(i64 %a) { ; RV64I-LABEL: mul288: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 288 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 5 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBA-LABEL: mul288: @@ -697,8 +705,9 @@ define i64 @sh3add_imm(i64 %0) { define i64 @mul258(i64 %a) { ; RV64I-LABEL: mul258: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 258 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBA-LABEL: mul258: @@ -713,8 +722,9 @@ define i64 @mul258(i64 %a) { define i64 @mul260(i64 %a) { ; RV64I-LABEL: mul260: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 260 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBA-LABEL: mul260: @@ -729,8 +739,9 @@ define i64 @mul260(i64 %a) { define i64 @mul264(i64 %a) { ; RV64I-LABEL: mul264: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 264 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 3 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBA-LABEL: mul264: @@ -988,8 +999,9 @@ define signext i32 @mulw192(i32 signext %a) { define signext i32 @mulw320(i32 signext %a) { ; RV64I-LABEL: mulw320: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 320 -; RV64I-NEXT: mulw a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBA-LABEL: mulw320: @@ -1004,8 +1016,9 @@ define signext i32 @mulw320(i32 signext %a) { define signext i32 @mulw576(i32 signext %a) { ; RV64I-LABEL: mulw576: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 576 -; RV64I-NEXT: mulw a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: slli a0, a0, 9 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBA-LABEL: mulw576: diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 9760821832b37..e362e5ebd8192 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -414,8 +414,9 @@ define i64 @disjointormul6(i64 %a, i64 %b) { define i64 @addmul10(i64 %a, i64 %b) { ; RV64I-LABEL: addmul10: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 10 -; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 1 +; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -451,8 +452,9 @@ define i64 @addmul12(i64 %a, i64 %b) { define i64 @addmul18(i64 %a, i64 %b) { ; RV64I-LABEL: addmul18: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 18 -; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 1 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -469,8 +471,9 @@ define i64 @addmul18(i64 %a, i64 %b) { define i64 @addmul20(i64 %a, i64 %b) { ; RV64I-LABEL: addmul20: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 20 -; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 2 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -518,8 +521,9 @@ define i64 @addmul24(i64 %a, i64 %b) { define i64 @addmul36(i64 %a, i64 %b) { ; RV64I-LABEL: addmul36: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 36 -; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 2 +; RV64I-NEXT: slli a0, a0, 5 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -536,8 +540,9 @@ define i64 @addmul36(i64 %a, i64 %b) { define i64 @addmul40(i64 %a, i64 %b) { ; RV64I-LABEL: addmul40: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 40 -; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 3 +; RV64I-NEXT: slli a0, a0, 5 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -554,8 +559,9 @@ define i64 @addmul40(i64 %a, i64 %b) { define i64 @addmul72(i64 %a, i64 %b) { ; RV64I-LABEL: addmul72: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 72 -; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: slli a2, a0, 3 +; RV64I-NEXT: slli a0, a0, 6 +; RV64I-NEXT: add a0, a0, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -915,8 +921,9 @@ define i64 @mul137(i64 %a) { define i64 @mul160(i64 %a) { ; RV64I-LABEL: mul160: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 160 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 5 +; RV64I-NEXT: slli a0, a0, 7 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: mul160: @@ -931,8 +938,9 @@ define i64 @mul160(i64 %a) { define i64 @mul288(i64 %a) { ; RV64I-LABEL: mul288: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 288 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 5 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: mul288: @@ -947,10 +955,10 @@ define i64 @mul288(i64 %a) { define i64 @zext_mul68(i32 signext %a) { ; RV64I-LABEL: zext_mul68: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 17 -; RV64I-NEXT: slli a1, a1, 34 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: mulhu a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 30 +; RV64I-NEXT: srli a0, a0, 26 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: zext_mul68: @@ -985,10 +993,10 @@ define i64 @zext_mul96(i32 signext %a) { define i64 @zext_mul160(i32 signext %a) { ; RV64I-LABEL: zext_mul160: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 5 -; RV64I-NEXT: slli a1, a1, 37 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: mulhu a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 27 +; RV64I-NEXT: srli a0, a0, 25 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: zext_mul160: @@ -1004,10 +1012,10 @@ define i64 @zext_mul160(i32 signext %a) { define i64 @zext_mul288(i32 signext %a) { ; RV64I-LABEL: zext_mul288: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 9 -; RV64I-NEXT: slli a1, a1, 37 ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: mulhu a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 27 +; RV64I-NEXT: srli a0, a0, 24 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: zext_mul288: @@ -1043,9 +1051,9 @@ define i64 @zext_mul12884901888(i32 signext %a) { define i64 @zext_mul21474836480(i32 signext %a) { ; RV64I-LABEL: zext_mul21474836480: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 5 -; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: slli a0, a0, 34 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: zext_mul21474836480: @@ -1062,9 +1070,9 @@ define i64 @zext_mul21474836480(i32 signext %a) { define i64 @zext_mul38654705664(i32 signext %a) { ; RV64I-LABEL: zext_mul38654705664: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 9 -; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: slli a0, a0, 35 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: zext_mul38654705664: @@ -1188,8 +1196,9 @@ define i64 @adduw_imm(i32 signext %0) nounwind { define i64 @mul258(i64 %a) { ; RV64I-LABEL: mul258: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 258 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: mul258: @@ -1204,8 +1213,9 @@ define i64 @mul258(i64 %a) { define i64 @mul260(i64 %a) { ; RV64I-LABEL: mul260: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 260 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: mul260: @@ -1220,8 +1230,9 @@ define i64 @mul260(i64 %a) { define i64 @mul264(i64 %a) { ; RV64I-LABEL: mul264: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 264 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 3 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: mul264: @@ -1496,8 +1507,9 @@ define signext i32 @mulw192(i32 signext %a) { define signext i32 @mulw320(i32 signext %a) { ; RV64I-LABEL: mulw320: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 320 -; RV64I-NEXT: mulw a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: mulw320: @@ -1512,8 +1524,9 @@ define signext i32 @mulw320(i32 signext %a) { define signext i32 @mulw576(i32 signext %a) { ; RV64I-LABEL: mulw576: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 576 -; RV64I-NEXT: mulw a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: slli a0, a0, 9 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBA-LABEL: mulw576: @@ -2977,8 +2990,9 @@ define i64 @bext_mul132(i32 %1, i32 %2) { ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: srlw a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: li a1, 132 -; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slli a0, a0, 7 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBANOZBB-LABEL: bext_mul132: @@ -3015,10 +3029,10 @@ define ptr @gep_lshr_i32(ptr %0, i64 %1) { ; RV64I-LABEL: gep_lshr_i32: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: srli a1, a1, 2 -; RV64I-NEXT: li a2, 5 -; RV64I-NEXT: slli a2, a2, 36 ; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: mulhu a1, a1, a2 +; RV64I-NEXT: srli a2, a1, 28 +; RV64I-NEXT: srli a1, a1, 26 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll index 530f9bf19fce7..bd912193c4fed 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -105,87 +105,86 @@ define fastcc @ret_split_nxv128i32(ptr %x) { ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 40 ; CHECK-NEXT: vl8re32.v v8, (a1) -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # vscale x 64-byte Folded Spill -; CHECK-NEXT: slli a4, a2, 3 -; CHECK-NEXT: slli a5, a2, 5 -; CHECK-NEXT: slli a6, a2, 4 -; CHECK-NEXT: slli a7, a2, 6 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: sub a3, a5, a4 -; CHECK-NEXT: sub t0, a7, a6 -; CHECK-NEXT: sub a7, a7, a4 -; CHECK-NEXT: add t1, a1, a4 -; CHECK-NEXT: add t2, a1, a6 -; CHECK-NEXT: add t3, a1, a5 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 5 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # vscale x 64-byte Folded Spill +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: slli a4, a2, 5 +; CHECK-NEXT: slli a5, a2, 4 +; CHECK-NEXT: slli a2, a2, 6 +; CHECK-NEXT: sub a6, a4, a3 +; CHECK-NEXT: add a7, a4, a3 +; CHECK-NEXT: sub t0, a2, a5 +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: add t1, a1, a3 +; CHECK-NEXT: add t2, a1, a5 +; CHECK-NEXT: add t3, a1, a4 ; CHECK-NEXT: vl8re32.v v8, (t1) ; CHECK-NEXT: csrr t1, vlenb -; CHECK-NEXT: li t4, 24 -; CHECK-NEXT: mul t1, t1, t4 +; CHECK-NEXT: slli t1, t1, 4 ; CHECK-NEXT: add t1, sp, t1 ; CHECK-NEXT: addi t1, t1, 16 ; CHECK-NEXT: vs8r.v v8, (t1) # vscale x 64-byte Folded Spill -; CHECK-NEXT: add t1, a1, a2 +; CHECK-NEXT: add t1, a1, a6 ; CHECK-NEXT: vl8re32.v v8, (t2) -; CHECK-NEXT: csrr t2, vlenb -; CHECK-NEXT: slli t2, t2, 3 -; CHECK-NEXT: add t2, sp, t2 -; CHECK-NEXT: addi t2, t2, 16 +; CHECK-NEXT: addi t2, sp, 16 ; CHECK-NEXT: vs8r.v v8, (t2) # vscale x 64-byte Folded Spill -; CHECK-NEXT: add t2, a1, a3 +; CHECK-NEXT: add t2, a1, a7 ; CHECK-NEXT: vl8re32.v v16, (t3) ; CHECK-NEXT: add t3, a1, t0 -; CHECK-NEXT: add a1, a1, a7 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: vl8re32.v v8, (t1) -; CHECK-NEXT: vl8re32.v v24, (t2) ; CHECK-NEXT: csrr t1, vlenb -; CHECK-NEXT: slli t1, t1, 4 +; CHECK-NEXT: li t4, 24 +; CHECK-NEXT: mul t1, t1, t4 ; CHECK-NEXT: add t1, sp, t1 ; CHECK-NEXT: addi t1, t1, 16 -; CHECK-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vs8r.v v8, (t1) # vscale x 64-byte Folded Spill +; CHECK-NEXT: vl8re32.v v8, (t2) +; CHECK-NEXT: csrr t1, vlenb +; CHECK-NEXT: slli t1, t1, 3 +; CHECK-NEXT: add t1, sp, t1 +; CHECK-NEXT: addi t1, t1, 16 +; CHECK-NEXT: vs8r.v v8, (t1) # vscale x 64-byte Folded Spill ; CHECK-NEXT: vl8re32.v v24, (t3) -; CHECK-NEXT: addi t1, sp, 16 -; CHECK-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill -; CHECK-NEXT: vl8re32.v v24, (a1) +; CHECK-NEXT: vl8re32.v v8, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vs8r.v v0, (a0) -; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vs8r.v v8, (a2) +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vs8r.v v16, (a4) ; CHECK-NEXT: add a5, a0, a5 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vs8r.v v16, (a5) -; CHECK-NEXT: add a6, a0, a6 +; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vs8r.v v8, (a6) -; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload +; CHECK-NEXT: vs8r.v v16, (a3) +; CHECK-NEXT: add a2, a0, a2 +; CHECK-NEXT: vs8r.v v8, (a2) +; CHECK-NEXT: add t0, a0, t0 +; CHECK-NEXT: vs8r.v v24, (t0) +; CHECK-NEXT: add a7, a0, a7 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vs8r.v v8, (a4) -; CHECK-NEXT: add a7, a0, a7 -; CHECK-NEXT: vs8r.v v24, (a7) -; CHECK-NEXT: add t0, a0, t0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload -; CHECK-NEXT: vs8r.v v8, (t0) -; CHECK-NEXT: add a0, a0, a3 +; CHECK-NEXT: vs8r.v v8, (a7) +; CHECK-NEXT: add a0, a0, a6 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll index 29d9a8a9b060c..07aa05f609c40 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll @@ -653,28 +653,31 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; ZVE32F-LABEL: gather_of_pointers: ; ZVE32F: # %bb.0: # %bb ; ZVE32F-NEXT: li a2, 0 -; ZVE32F-NEXT: lui a4, 2 -; ZVE32F-NEXT: li a3, 1 -; ZVE32F-NEXT: add a4, a0, a4 -; ZVE32F-NEXT: li a5, 40 +; ZVE32F-NEXT: lui a3, 2 +; ZVE32F-NEXT: add a3, a0, a3 +; ZVE32F-NEXT: li a4, 1 ; ZVE32F-NEXT: .LBB12_1: # %bb2 ; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 -; ZVE32F-NEXT: mul a6, a3, a5 -; ZVE32F-NEXT: mul a7, a2, a5 +; ZVE32F-NEXT: slli a5, a4, 3 +; ZVE32F-NEXT: slli a6, a4, 5 +; ZVE32F-NEXT: slli a7, a2, 3 +; ZVE32F-NEXT: slli t0, a2, 5 ; ZVE32F-NEXT: addi a2, a2, 4 -; ZVE32F-NEXT: add a6, a1, a6 +; ZVE32F-NEXT: add a5, a6, a5 +; ZVE32F-NEXT: add a7, t0, a7 +; ZVE32F-NEXT: add a5, a1, a5 ; ZVE32F-NEXT: add a7, a1, a7 -; ZVE32F-NEXT: ld t0, 0(a7) -; ZVE32F-NEXT: ld t1, 0(a6) +; ZVE32F-NEXT: ld a6, 0(a7) +; ZVE32F-NEXT: ld t0, 0(a5) ; ZVE32F-NEXT: ld a7, 80(a7) -; ZVE32F-NEXT: ld a6, 80(a6) -; ZVE32F-NEXT: sd t0, 0(a0) -; ZVE32F-NEXT: sd t1, 8(a0) +; ZVE32F-NEXT: ld a5, 80(a5) +; ZVE32F-NEXT: sd a6, 0(a0) +; ZVE32F-NEXT: sd t0, 8(a0) ; ZVE32F-NEXT: sd a7, 16(a0) -; ZVE32F-NEXT: sd a6, 24(a0) +; ZVE32F-NEXT: sd a5, 24(a0) ; ZVE32F-NEXT: addi a0, a0, 32 -; ZVE32F-NEXT: addi a3, a3, 4 -; ZVE32F-NEXT: bne a0, a4, .LBB12_1 +; ZVE32F-NEXT: addi a4, a4, 4 +; ZVE32F-NEXT: bne a0, a3, .LBB12_1 ; ZVE32F-NEXT: # %bb.2: # %bb18 ; ZVE32F-NEXT: ret ; @@ -701,28 +704,31 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; OPTV-LABEL: gather_of_pointers: ; OPTV: # %bb.0: # %bb ; OPTV-NEXT: li a2, 0 -; OPTV-NEXT: lui a4, 2 -; OPTV-NEXT: li a3, 1 -; OPTV-NEXT: add a4, a0, a4 -; OPTV-NEXT: li a5, 40 +; OPTV-NEXT: lui a3, 2 +; OPTV-NEXT: add a3, a0, a3 +; OPTV-NEXT: li a4, 1 ; OPTV-NEXT: .LBB12_1: # %bb2 ; OPTV-NEXT: # =>This Inner Loop Header: Depth=1 -; OPTV-NEXT: mul a6, a3, a5 -; OPTV-NEXT: mul a7, a2, a5 +; OPTV-NEXT: slli a5, a4, 3 +; OPTV-NEXT: slli a6, a4, 5 +; OPTV-NEXT: slli a7, a2, 3 +; OPTV-NEXT: slli t0, a2, 5 ; OPTV-NEXT: addi a2, a2, 4 -; OPTV-NEXT: add a6, a1, a6 +; OPTV-NEXT: add a5, a6, a5 +; OPTV-NEXT: add a7, t0, a7 +; OPTV-NEXT: add a5, a1, a5 ; OPTV-NEXT: add a7, a1, a7 -; OPTV-NEXT: ld t0, 0(a7) -; OPTV-NEXT: ld t1, 0(a6) +; OPTV-NEXT: ld a6, 0(a7) +; OPTV-NEXT: ld t0, 0(a5) ; OPTV-NEXT: ld a7, 80(a7) -; OPTV-NEXT: ld a6, 80(a6) -; OPTV-NEXT: sd t0, 0(a0) -; OPTV-NEXT: sd t1, 8(a0) +; OPTV-NEXT: ld a5, 80(a5) +; OPTV-NEXT: sd a6, 0(a0) +; OPTV-NEXT: sd t0, 8(a0) ; OPTV-NEXT: sd a7, 16(a0) -; OPTV-NEXT: sd a6, 24(a0) +; OPTV-NEXT: sd a5, 24(a0) ; OPTV-NEXT: addi a0, a0, 32 -; OPTV-NEXT: addi a3, a3, 4 -; OPTV-NEXT: bne a0, a4, .LBB12_1 +; OPTV-NEXT: addi a4, a4, 4 +; OPTV-NEXT: bne a0, a3, .LBB12_1 ; OPTV-NEXT: # %bb.2: # %bb18 ; OPTV-NEXT: ret bb: @@ -778,28 +784,31 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu ; ZVE32F-LABEL: scatter_of_pointers: ; ZVE32F: # %bb.0: # %bb ; ZVE32F-NEXT: li a2, 0 -; ZVE32F-NEXT: lui a4, 2 -; ZVE32F-NEXT: li a3, 1 -; ZVE32F-NEXT: add a4, a1, a4 -; ZVE32F-NEXT: li a5, 40 +; ZVE32F-NEXT: lui a3, 2 +; ZVE32F-NEXT: add a3, a1, a3 +; ZVE32F-NEXT: li a4, 1 ; ZVE32F-NEXT: .LBB13_1: # %bb2 ; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 -; ZVE32F-NEXT: ld a6, 0(a1) -; ZVE32F-NEXT: ld a7, 8(a1) -; ZVE32F-NEXT: ld t0, 16(a1) -; ZVE32F-NEXT: ld t1, 24(a1) -; ZVE32F-NEXT: mul t2, a3, a5 -; ZVE32F-NEXT: mul t3, a2, a5 +; ZVE32F-NEXT: ld a5, 0(a1) +; ZVE32F-NEXT: ld a6, 8(a1) +; ZVE32F-NEXT: ld a7, 16(a1) +; ZVE32F-NEXT: ld t0, 24(a1) +; ZVE32F-NEXT: slli t1, a4, 3 +; ZVE32F-NEXT: slli t2, a4, 5 +; ZVE32F-NEXT: slli t3, a2, 3 +; ZVE32F-NEXT: add t1, t2, t1 +; ZVE32F-NEXT: slli t2, a2, 5 ; ZVE32F-NEXT: addi a2, a2, 4 ; ZVE32F-NEXT: addi a1, a1, 32 +; ZVE32F-NEXT: add t2, t2, t3 +; ZVE32F-NEXT: add t1, a0, t1 ; ZVE32F-NEXT: add t2, a0, t2 -; ZVE32F-NEXT: add t3, a0, t3 -; ZVE32F-NEXT: sd a6, 0(t3) -; ZVE32F-NEXT: sd a7, 0(t2) -; ZVE32F-NEXT: sd t0, 80(t3) -; ZVE32F-NEXT: sd t1, 80(t2) -; ZVE32F-NEXT: addi a3, a3, 4 -; ZVE32F-NEXT: bne a1, a4, .LBB13_1 +; ZVE32F-NEXT: sd a5, 0(t2) +; ZVE32F-NEXT: sd a6, 0(t1) +; ZVE32F-NEXT: sd a7, 80(t2) +; ZVE32F-NEXT: sd t0, 80(t1) +; ZVE32F-NEXT: addi a4, a4, 4 +; ZVE32F-NEXT: bne a1, a3, .LBB13_1 ; ZVE32F-NEXT: # %bb.2: # %bb18 ; ZVE32F-NEXT: ret ; @@ -826,28 +835,31 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu ; OPTV-LABEL: scatter_of_pointers: ; OPTV: # %bb.0: # %bb ; OPTV-NEXT: li a2, 0 -; OPTV-NEXT: lui a4, 2 -; OPTV-NEXT: li a3, 1 -; OPTV-NEXT: add a4, a1, a4 -; OPTV-NEXT: li a5, 40 +; OPTV-NEXT: lui a3, 2 +; OPTV-NEXT: add a3, a1, a3 +; OPTV-NEXT: li a4, 1 ; OPTV-NEXT: .LBB13_1: # %bb2 ; OPTV-NEXT: # =>This Inner Loop Header: Depth=1 -; OPTV-NEXT: ld a6, 0(a1) -; OPTV-NEXT: ld a7, 8(a1) -; OPTV-NEXT: ld t0, 16(a1) -; OPTV-NEXT: ld t1, 24(a1) -; OPTV-NEXT: mul t2, a3, a5 -; OPTV-NEXT: mul t3, a2, a5 +; OPTV-NEXT: ld a5, 0(a1) +; OPTV-NEXT: ld a6, 8(a1) +; OPTV-NEXT: ld a7, 16(a1) +; OPTV-NEXT: ld t0, 24(a1) +; OPTV-NEXT: slli t1, a4, 3 +; OPTV-NEXT: slli t2, a4, 5 +; OPTV-NEXT: slli t3, a2, 3 +; OPTV-NEXT: add t1, t2, t1 +; OPTV-NEXT: slli t2, a2, 5 ; OPTV-NEXT: addi a2, a2, 4 ; OPTV-NEXT: addi a1, a1, 32 +; OPTV-NEXT: add t2, t2, t3 +; OPTV-NEXT: add t1, a0, t1 ; OPTV-NEXT: add t2, a0, t2 -; OPTV-NEXT: add t3, a0, t3 -; OPTV-NEXT: sd a6, 0(t3) -; OPTV-NEXT: sd a7, 0(t2) -; OPTV-NEXT: sd t0, 80(t3) -; OPTV-NEXT: sd t1, 80(t2) -; OPTV-NEXT: addi a3, a3, 4 -; OPTV-NEXT: bne a1, a4, .LBB13_1 +; OPTV-NEXT: sd a5, 0(t2) +; OPTV-NEXT: sd a6, 0(t1) +; OPTV-NEXT: sd a7, 80(t2) +; OPTV-NEXT: sd t0, 80(t1) +; OPTV-NEXT: addi a4, a4, 4 +; OPTV-NEXT: bne a1, a3, .LBB13_1 ; OPTV-NEXT: # %bb.2: # %bb18 ; OPTV-NEXT: ret bb: diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll index a78130e8f102f..3da04eb7e6abe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -925,9 +925,9 @@ define half @vreduce_ord_fadd_nxv10f16( %v, half %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv10f16: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: srli a1, a0, 3 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma @@ -1007,9 +1007,9 @@ define half @vreduce_fmin_nxv10f16( %v) { ; CHECK-NEXT: addi a1, a1, %lo(.LCPI73_0) ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v12, (a1) -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: srli a1, a0, 3 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfredmin.vs v12, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v12