diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index dd64676222055..dd0412460f4e1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2605,9 +2605,22 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { SDLoc dl(N); - SDValue Op = GetPromotedInteger(N->getOperand(0)); - Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); - return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType()); + SDValue Src = N->getOperand(0); + SDValue Op = GetPromotedInteger(Src); + EVT VT = N->getValueType(0); + + // If this zext has the nneg flag and the target prefers sext, see if the + // promoted input is already sign extended. + // TODO: Should we have some way to set nneg on ISD::AND instead? + if (N->getFlags().hasNonNeg() && Op.getValueType() == VT && + TLI.isSExtCheaperThanZExt(Src.getValueType(), VT)) { + unsigned OpEffectiveBits = DAG.ComputeMaxSignificantBits(Op); + if (OpEffectiveBits <= Src.getScalarValueSizeInBits()) + return Op; + } + + Op = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op); + return DAG.getZeroExtendInReg(Op, dl, Src.getValueType()); } SDValue DAGTypeLegalizer::PromoteIntOp_VP_ZERO_EXTEND(SDNode *N) { diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll index 32a037918a5a7..7ca1ee1cba2f8 100644 --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -779,3 +779,78 @@ define i128 @shl128_shamt32(i128 %a, i32 signext %b) nounwind { %1 = shl i128 %a, %zext ret i128 %1 } + +; Do some arithmetic on the i32 shift amount before the zext nneg. This +; arithmetic will be promoted using a W instruction RV64. Make sure we can use +; this to avoid an unncessary zext of the shift amount. +define i128 @shl128_shamt32_arith(i128 %a, i32 signext %b) nounwind { +; RV32I-LABEL: shl128_shamt32_arith: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: addi a2, a2, 1 +; RV32I-NEXT: sw zero, 0(sp) +; RV32I-NEXT: sw zero, 4(sp) +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: addi a6, sp, 16 +; RV32I-NEXT: srli a7, a2, 3 +; RV32I-NEXT: andi t0, a2, 31 +; RV32I-NEXT: andi a7, a7, 12 +; RV32I-NEXT: sub a6, a6, a7 +; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw a5, 24(sp) +; RV32I-NEXT: sw a1, 28(sp) +; RV32I-NEXT: lw a1, 0(a6) +; RV32I-NEXT: lw a3, 4(a6) +; RV32I-NEXT: lw a4, 8(a6) +; RV32I-NEXT: lw a5, 12(a6) +; RV32I-NEXT: xori a6, t0, 31 +; RV32I-NEXT: sll a7, a3, a2 +; RV32I-NEXT: srli t0, a1, 1 +; RV32I-NEXT: sll a5, a5, a2 +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: sll a2, a4, a2 +; RV32I-NEXT: srli a3, a3, 1 +; RV32I-NEXT: srli a4, a4, 1 +; RV32I-NEXT: srl t0, t0, a6 +; RV32I-NEXT: srl a3, a3, a6 +; RV32I-NEXT: srl a4, a4, a6 +; RV32I-NEXT: or a6, a7, t0 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a6, 4(a0) +; RV32I-NEXT: sw a2, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: shl128_shamt32_arith: +; RV64I: # %bb.0: +; RV64I-NEXT: addiw a4, a2, 1 +; RV64I-NEXT: addi a3, a4, -64 +; RV64I-NEXT: sll a2, a0, a4 +; RV64I-NEXT: bltz a3, .LBB17_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: j .LBB17_3 +; RV64I-NEXT: .LBB17_2: +; RV64I-NEXT: sll a1, a1, a4 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: not a4, a4 +; RV64I-NEXT: srl a0, a0, a4 +; RV64I-NEXT: or a1, a1, a0 +; RV64I-NEXT: .LBB17_3: +; RV64I-NEXT: srai a0, a3, 63 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: ret + %c = add i32 %b, 1 + %zext = zext nneg i32 %c to i128 + %1 = shl i128 %a, %zext + ret i128 %1 +}