diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index ca301dcca4dab..9866567ac1eef 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -504,6 +504,125 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { break; } + case ISD::AND: { + auto *N1C = dyn_cast(Node->getOperand(1)); + if (!N1C) + break; + + SDValue N0 = Node->getOperand(0); + + bool LeftShift = N0.getOpcode() == ISD::SHL; + if (!LeftShift && N0.getOpcode() != ISD::SRL) + break; + + auto *C = dyn_cast(N0.getOperand(1)); + if (!C) + break; + uint64_t C2 = C->getZExtValue(); + unsigned XLen = Subtarget->getXLen(); + if (!C2 || C2 >= XLen) + break; + + uint64_t C1 = N1C->getZExtValue(); + + // Keep track of whether this is a andi, zext.h, or zext.w. + bool ZExtOrANDI = isInt<12>(N1C->getSExtValue()); + if (C1 == UINT64_C(0xFFFF) && + (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) + ZExtOrANDI = true; + if (C1 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()) + ZExtOrANDI = true; + + // Clear irrelevant bits in the mask. + if (LeftShift) + C1 &= maskTrailingZeros(C2); + else + C1 &= maskTrailingOnes(XLen - C2); + + // Some transforms should only be done if the shift has a single use or + // the AND would become (srli (slli X, 32), 32) + bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); + + SDValue X = N0.getOperand(0); + + // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask + // with c3 leading zeros. + if (!LeftShift && isMask_64(C1)) { + uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); + if (C2 < C3) { + // If the number of leading zeros is C2+32 this can be SRLIW. + if (C2 + 32 == C3) { + SDNode *SRLIW = + CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X, + CurDAG->getTargetConstant(C2, DL, XLenVT)); + ReplaceNode(Node, SRLIW); + return; + } + + // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if + // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. + // + // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type + // legalized and goes through DAG combine. + SDValue Y; + if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && + selectSExti32(X, Y)) { + SDNode *SRAIW = + CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, Y, + CurDAG->getTargetConstant(31, DL, XLenVT)); + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0), + CurDAG->getTargetConstant(C3 - 32, DL, XLenVT)); + ReplaceNode(Node, SRLIW); + return; + } + + // (srli (slli x, c3-c2), c3). + if (OneUseOrZExtW && !ZExtOrANDI) { + SDNode *SLLI = CurDAG->getMachineNode( + RISCV::SLLI, DL, XLenVT, X, + CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); + SDNode *SRLI = + CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), + CurDAG->getTargetConstant(C3, DL, XLenVT)); + ReplaceNode(Node, SRLI); + return; + } + } + } + + // Turn (and (shl x, c2) c1) -> (srli (slli c2+c3), c3) if c1 is a mask + // shifted by c2 bits with c3 leading zeros. + if (LeftShift && isShiftedMask_64(C1)) { + uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); + + if (C2 + C3 < XLen && + C1 == (maskTrailingOnes(XLen - (C2 + C3)) << C2)) { + // Use slli.uw when possible. + if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { + SDNode *SLLIUW = + CurDAG->getMachineNode(RISCV::SLLIUW, DL, XLenVT, X, + CurDAG->getTargetConstant(C2, DL, XLenVT)); + ReplaceNode(Node, SLLIUW); + return; + } + + // (srli (slli c2+c3), c3) + if (OneUseOrZExtW && !ZExtOrANDI) { + SDNode *SLLI = CurDAG->getMachineNode( + RISCV::SLLI, DL, XLenVT, X, + CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); + SDNode *SRLI = + CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), + CurDAG->getTargetConstant(C3, DL, XLenVT)); + ReplaceNode(Node, SRLI); + return; + } + } + } + + break; + } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = Node->getConstantOperandVal(0); switch (IntNo) { @@ -1377,41 +1496,6 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { return false; } -// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32 -// on RV64). -// SLLIUW is the same as SLLI except for the fact that it clears the bits -// XLEN-1:32 of the input RS1 before shifting. -// A PatFrag has already checked that it has the right structure: -// -// (AND (SHL RS1, VC2), VC1) -// -// We check that VC2, the shamt is less than 32, otherwise the pattern is -// exactly the same as SLLI and we give priority to that. -// Eventually we check that VC1, the mask used to clear the upper 32 bits -// of RS1, is correct: -// -// VC1 == (0xFFFFFFFF << VC2) -// -bool RISCVDAGToDAGISel::MatchSLLIUW(SDNode *N) const { - assert(N->getOpcode() == ISD::AND); - assert(N->getOperand(0).getOpcode() == ISD::SHL); - assert(isa(N->getOperand(1))); - assert(isa(N->getOperand(0).getOperand(1))); - - // The IsRV64 predicate is checked after PatFrag predicates so we can get - // here even on RV32. - if (!Subtarget->is64Bit()) - return false; - - SDValue Shl = N->getOperand(0); - uint64_t VC1 = N->getConstantOperandVal(1); - uint64_t VC2 = Shl.getConstantOperandVal(1); - - // Immediate range should be enforced by uimm5 predicate. - assert(VC2 < 32 && "Unexpected immediate"); - return (VC1 >> VC2) == UINT64_C(0xFFFFFFFF); -} - // Select VL as a 5 bit immediate or a value that will become a register. This // allows us to choose betwen VSETIVLI or VSETVLI later. bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index e8b79b008fa45..56d0722063168 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -58,8 +58,6 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { bool selectSExti32(SDValue N, SDValue &Val); bool selectZExti32(SDValue N, SDValue &Val); - bool MatchSLLIUW(SDNode *N) const; - bool selectVLOp(SDValue N, SDValue &VL); bool selectVSplat(SDValue N, SDValue &SplatVal); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 65767ae1e6828..949fff25e9e0a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -885,13 +885,6 @@ def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{ }]>; def zexti32 : ComplexPattern; -// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32 -// on RV64). Also used to optimize the same sequence without SLLIUW. -def SLLIUWPat : PatFrag<(ops node:$A, node:$B), - (and (shl node:$A, node:$B), imm), [{ - return MatchSLLIUW(N); -}]>; - def add_oneuse : PatFrag<(ops node:$A, node:$B), (add node:$A, node:$B), [{ return N->hasOneUse(); }]>; @@ -1236,14 +1229,6 @@ def : Pat<(i64 (and GPR:$rs1, 0xffffffff)), (SRLI (SLLI GPR:$rs1, 32), 32)>; // shifts instead of 3. This can occur when unsigned is used to index an array. def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)), (SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>; -// shl/and can appear in the other order too. -def : Pat<(i64 (SLLIUWPat GPR:$rs1, uimm5:$shamt)), - (SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>; - -// If we're shifting a value left by 0-31 bits, and then masking to 32-bits, -// use 2 shifts instead of 3. -def : Pat<(i64 (and (shl GPR:$rs1, uimm5:$shamt), 0xffffffff)), - (SRLI (SLLI GPR:$rs1, (ImmPlus32 uimm5:$shamt)), 32)>; } let Predicates = [IsRV64] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td index 278a83194552e..7d8d053cc584b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td @@ -998,8 +998,6 @@ def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2), } // Predicates = [HasStdExtZba] let Predicates = [HasStdExtZba, IsRV64] in { -def : Pat<(i64 (SLLIUWPat GPR:$rs1, uimm5:$shamt)), - (SLLIUW GPR:$rs1, uimm5:$shamt)>; def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)), (SLLIUW GPR:$rs1, uimm5:$shamt)>; def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)), @@ -1013,11 +1011,11 @@ def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), non_imm12:$rs2)), def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), non_imm12:$rs2)), (SH3ADDUW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (SLLIUWPat GPR:$rs1, (i64 1)), non_imm12:$rs2)), +def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), non_imm12:$rs2)), (SH1ADDUW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (SLLIUWPat GPR:$rs1, (i64 2)), non_imm12:$rs2)), +def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2)), (SH2ADDUW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (SLLIUWPat GPR:$rs1, (i64 3)), non_imm12:$rs2)), +def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)), (SH3ADDUW GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtZba, IsRV64] diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll index dd9e6fa1cc90e..991c91cc70ea0 100644 --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -711,18 +711,16 @@ define i32 @sdiv_pow2(i32 %a) nounwind { ; ; RV64I-LABEL: sdiv_pow2: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: srli a1, a1, 60 -; RV64I-NEXT: andi a1, a1, 7 +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 29 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: sraiw a0, a0, 3 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: sdiv_pow2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a1, a0 -; RV64IM-NEXT: srli a1, a1, 60 -; RV64IM-NEXT: andi a1, a1, 7 +; RV64IM-NEXT: sraiw a1, a0, 31 +; RV64IM-NEXT: srliw a1, a1, 29 ; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: sraiw a0, a0, 3 ; RV64IM-NEXT: ret @@ -749,22 +747,16 @@ define i32 @sdiv_pow2_2(i32 %a) nounwind { ; ; RV64I-LABEL: sdiv_pow2_2: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: srli a1, a1, 47 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -1 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 16 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: sraiw a0, a0, 16 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: sdiv_pow2_2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a1, a0 -; RV64IM-NEXT: srli a1, a1, 47 -; RV64IM-NEXT: lui a2, 16 -; RV64IM-NEXT: addiw a2, a2, -1 -; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: sraiw a1, a0, 31 +; RV64IM-NEXT: srliw a1, a1, 16 ; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: sraiw a0, a0, 16 ; RV64IM-NEXT: ret @@ -1280,10 +1272,9 @@ define i16 @sdiv16_constant(i16 %a) nounwind { ; RV64IM-NEXT: lui a1, 6 ; RV64IM-NEXT: addiw a1, a1, 1639 ; RV64IM-NEXT: mul a0, a0, a1 -; RV64IM-NEXT: srai a1, a0, 17 -; RV64IM-NEXT: srli a0, a0, 31 -; RV64IM-NEXT: andi a0, a0, 1 -; RV64IM-NEXT: add a0, a1, a0 +; RV64IM-NEXT: srliw a1, a0, 31 +; RV64IM-NEXT: srai a0, a0, 17 +; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: ret %1 = sdiv i16 %a, 5 ret i16 %1 diff --git a/llvm/test/CodeGen/RISCV/rem.ll b/llvm/test/CodeGen/RISCV/rem.ll index 8712f532d5aed..cb7098cea6113 100644 --- a/llvm/test/CodeGen/RISCV/rem.ll +++ b/llvm/test/CodeGen/RISCV/rem.ll @@ -138,9 +138,8 @@ define i32 @srem_pow2(i32 %a) nounwind { ; ; RV64I-LABEL: srem_pow2: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: srli a1, a1, 60 -; RV64I-NEXT: andi a1, a1, 7 +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 29 ; RV64I-NEXT: add a1, a0, a1 ; RV64I-NEXT: andi a1, a1, -8 ; RV64I-NEXT: subw a0, a0, a1 @@ -148,9 +147,8 @@ define i32 @srem_pow2(i32 %a) nounwind { ; ; RV64IM-LABEL: srem_pow2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a1, a0 -; RV64IM-NEXT: srli a1, a1, 60 -; RV64IM-NEXT: andi a1, a1, 7 +; RV64IM-NEXT: sraiw a1, a0, 31 +; RV64IM-NEXT: srliw a1, a1, 29 ; RV64IM-NEXT: add a1, a0, a1 ; RV64IM-NEXT: andi a1, a1, -8 ; RV64IM-NEXT: subw a0, a0, a1 @@ -182,11 +180,8 @@ define i32 @srem_pow2_2(i32 %a) nounwind { ; ; RV64I-LABEL: srem_pow2_2: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: srli a1, a1, 47 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -1 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 16 ; RV64I-NEXT: add a1, a0, a1 ; RV64I-NEXT: lui a2, 1048560 ; RV64I-NEXT: and a1, a1, a2 @@ -195,11 +190,8 @@ define i32 @srem_pow2_2(i32 %a) nounwind { ; ; RV64IM-LABEL: srem_pow2_2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a1, a0 -; RV64IM-NEXT: srli a1, a1, 47 -; RV64IM-NEXT: lui a2, 16 -; RV64IM-NEXT: addiw a2, a2, -1 -; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: sraiw a1, a0, 31 +; RV64IM-NEXT: srliw a1, a1, 16 ; RV64IM-NEXT: add a1, a0, a1 ; RV64IM-NEXT: lui a2, 1048560 ; RV64IM-NEXT: and a1, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll index 6b32f993251bd..6774f0da66ef2 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll @@ -3348,10 +3348,8 @@ define i32 @packh_i32(i32 %a, i32 %b) nounwind { ; RV32I-LABEL: packh_i32: ; RV32I: # %bb.0: ; RV32I-NEXT: andi a0, a0, 255 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srli a1, a1, 16 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; @@ -3375,10 +3373,8 @@ define i64 @packh_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: packh_i64: ; RV32I: # %bb.0: ; RV32I-NEXT: andi a0, a0, 255 -; RV32I-NEXT: slli a1, a2, 8 -; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: slli a1, a2, 24 +; RV32I-NEXT: srli a1, a1, 16 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: mv a1, zero ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll index 8c6c5d79de81c..df0520aee11e5 100644 --- a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll @@ -1652,8 +1652,7 @@ define zeroext i32 @zext_sraiw_aext(i32 %a) nounwind { ; RV64I-LABEL: zext_sraiw_aext: ; RV64I: # %bb.0: ; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: srli a0, a0, 7 -; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: slli a0, a0, 25 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret %1 = ashr i32 %a, 7 @@ -1663,8 +1662,7 @@ define zeroext i32 @zext_sraiw_aext(i32 %a) nounwind { define zeroext i32 @zext_sraiw_sext(i32 signext %a) nounwind { ; RV64I-LABEL: zext_sraiw_sext: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret %1 = ashr i32 %a, 8 @@ -1678,8 +1676,7 @@ define zeroext i32 @zext_sraiw_zext(i32 zeroext %a) nounwind { ; RV64I-LABEL: zext_sraiw_zext: ; RV64I: # %bb.0: ; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: srli a0, a0, 9 -; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: slli a0, a0, 23 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: ret %1 = ashr i32 %a, 9 diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll index 00d3278a3283d..171ee6557271f 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -3882,10 +3882,8 @@ define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind { ; RV64I-LABEL: packh_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a0, a0, 255 -; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srli a1, a1, 48 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; @@ -3909,10 +3907,8 @@ define i64 @packh_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: packh_i64: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a0, a0, 255 -; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -256 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srli a1, a1, 48 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll index a2b7f16f0d017..8efffbabc74c6 100644 --- a/llvm/test/CodeGen/RISCV/srem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll @@ -330,9 +330,8 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind { ; ; RV64I-LABEL: dont_fold_srem_power_of_two: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: srli a1, a1, 57 -; RV64I-NEXT: andi a1, a1, 63 +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 26 ; RV64I-NEXT: add a1, a0, a1 ; RV64I-NEXT: andi a1, a1, -64 ; RV64I-NEXT: subw a0, a0, a1 @@ -340,9 +339,8 @@ define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind { ; ; RV64IM-LABEL: dont_fold_srem_power_of_two: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a1, a0 -; RV64IM-NEXT: srli a1, a1, 57 -; RV64IM-NEXT: andi a1, a1, 63 +; RV64IM-NEXT: sraiw a1, a0, 31 +; RV64IM-NEXT: srliw a1, a1, 26 ; RV64IM-NEXT: add a1, a0, a1 ; RV64IM-NEXT: andi a1, a1, -64 ; RV64IM-NEXT: subw a0, a0, a1 @@ -385,24 +383,20 @@ define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind { ; ; RV64I-LABEL: dont_fold_srem_i32_smax: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: srli a1, a1, 32 -; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: addiw a3, a2, -1 -; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 1 ; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: lui a2, 524288 ; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: dont_fold_srem_i32_smax: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a1, a0 -; RV64IM-NEXT: srli a1, a1, 32 -; RV64IM-NEXT: lui a2, 524288 -; RV64IM-NEXT: addiw a3, a2, -1 -; RV64IM-NEXT: and a1, a1, a3 +; RV64IM-NEXT: sraiw a1, a0, 31 +; RV64IM-NEXT: srliw a1, a1, 1 ; RV64IM-NEXT: add a1, a0, a1 +; RV64IM-NEXT: lui a2, 524288 ; RV64IM-NEXT: and a1, a1, a2 ; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret