diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 520f593341190..3497dd24fd5c7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1737,6 +1737,22 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, ShAmt = N.getOperand(0); return true; } + } else if (N.getOpcode() == ISD::SUB && + isa(N.getOperand(0))) { + uint64_t Imm = N.getConstantOperandVal(0); + // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to + // generate a NEG instead of a SUB of a constant. + if (Imm != 0 && Imm % ShiftWidth == 0) { + SDLoc DL(N); + EVT VT = N.getValueType(); + SDValue Zero = + CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT); + unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; + MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, + N.getOperand(1)); + ShAmt = SDValue(Neg, 0); + return true; + } } ShAmt = N; diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll index 75943edf4187e..aace4ac34216d 100644 --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -17,8 +17,7 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind { ; RV32I-LABEL: rotl_32: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 32 -; RV32I-NEXT: sub a2, a2, a1 +; RV32I-NEXT: neg a2, a1 ; RV32I-NEXT: sll a1, a0, a1 ; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a0, a1, a0 @@ -26,8 +25,7 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind { ; ; RV64I-LABEL: rotl_32: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 32 -; RV64I-NEXT: subw a2, a2, a1 +; RV64I-NEXT: negw a2, a1 ; RV64I-NEXT: sllw a1, a0, a1 ; RV64I-NEXT: srlw a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 @@ -52,8 +50,7 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind { define i32 @rotr_32(i32 %x, i32 %y) nounwind { ; RV32I-LABEL: rotr_32: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 32 -; RV32I-NEXT: sub a2, a2, a1 +; RV32I-NEXT: neg a2, a1 ; RV32I-NEXT: srl a1, a0, a1 ; RV32I-NEXT: sll a0, a0, a2 ; RV32I-NEXT: or a0, a1, a0 @@ -61,8 +58,7 @@ define i32 @rotr_32(i32 %x, i32 %y) nounwind { ; ; RV64I-LABEL: rotr_32: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 32 -; RV64I-NEXT: subw a2, a2, a1 +; RV64I-NEXT: negw a2, a1 ; RV64I-NEXT: srlw a1, a0, a1 ; RV64I-NEXT: sllw a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 @@ -89,47 +85,48 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: addi a5, a2, -32 -; RV32I-NEXT: li a4, 31 +; RV32I-NEXT: li a6, 31 ; RV32I-NEXT: bltz a5, .LBB2_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sll a1, a0, a5 ; RV32I-NEXT: j .LBB2_3 ; RV32I-NEXT: .LBB2_2: ; RV32I-NEXT: sll a1, a3, a2 -; RV32I-NEXT: sub a6, a4, a2 +; RV32I-NEXT: sub a4, a6, a2 ; RV32I-NEXT: srli a7, a0, 1 -; RV32I-NEXT: srl a6, a7, a6 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: srl a4, a7, a4 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: .LBB2_3: -; RV32I-NEXT: li a6, 32 -; RV32I-NEXT: sub a6, a6, a2 -; RV32I-NEXT: bltz a6, .LBB2_5 +; RV32I-NEXT: neg a7, a2 +; RV32I-NEXT: li a4, 32 +; RV32I-NEXT: sub t0, a4, a2 +; RV32I-NEXT: srl a4, a3, a7 +; RV32I-NEXT: bltz t0, .LBB2_6 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a4, a3, a6 -; RV32I-NEXT: bltz a5, .LBB2_6 -; RV32I-NEXT: j .LBB2_7 +; RV32I-NEXT: bltz a5, .LBB2_7 ; RV32I-NEXT: .LBB2_5: -; RV32I-NEXT: li a6, 64 -; RV32I-NEXT: sub a6, a6, a2 -; RV32I-NEXT: srl a7, a0, a6 -; RV32I-NEXT: sub a4, a4, a6 -; RV32I-NEXT: slli t0, a3, 1 -; RV32I-NEXT: sll a4, t0, a4 -; RV32I-NEXT: or a4, a7, a4 -; RV32I-NEXT: srl a3, a3, a6 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: bgez a5, .LBB2_7 +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: ret ; RV32I-NEXT: .LBB2_6: +; RV32I-NEXT: srl a7, a0, a7 +; RV32I-NEXT: li t0, 64 +; RV32I-NEXT: sub t0, t0, a2 +; RV32I-NEXT: sub a6, a6, t0 +; RV32I-NEXT: slli a3, a3, 1 +; RV32I-NEXT: sll a3, a3, a6 +; RV32I-NEXT: or a3, a7, a3 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: mv a4, a3 +; RV32I-NEXT: bgez a5, .LBB2_5 +; RV32I-NEXT: .LBB2_7: ; RV32I-NEXT: sll a0, a0, a2 ; RV32I-NEXT: or a4, a4, a0 -; RV32I-NEXT: .LBB2_7: ; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 64 -; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: negw a2, a1 ; RV64I-NEXT: sll a1, a0, a1 ; RV64I-NEXT: srl a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 @@ -139,40 +136,42 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: mv a3, a1 ; RV32ZBB-NEXT: addi a5, a2, -32 -; RV32ZBB-NEXT: li a4, 31 +; RV32ZBB-NEXT: li a6, 31 ; RV32ZBB-NEXT: bltz a5, .LBB2_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: sll a1, a0, a5 ; RV32ZBB-NEXT: j .LBB2_3 ; RV32ZBB-NEXT: .LBB2_2: ; RV32ZBB-NEXT: sll a1, a3, a2 -; RV32ZBB-NEXT: sub a6, a4, a2 +; RV32ZBB-NEXT: sub a4, a6, a2 ; RV32ZBB-NEXT: srli a7, a0, 1 -; RV32ZBB-NEXT: srl a6, a7, a6 -; RV32ZBB-NEXT: or a1, a1, a6 +; RV32ZBB-NEXT: srl a4, a7, a4 +; RV32ZBB-NEXT: or a1, a1, a4 ; RV32ZBB-NEXT: .LBB2_3: -; RV32ZBB-NEXT: li a6, 32 -; RV32ZBB-NEXT: sub a6, a6, a2 -; RV32ZBB-NEXT: bltz a6, .LBB2_5 +; RV32ZBB-NEXT: neg a7, a2 +; RV32ZBB-NEXT: li a4, 32 +; RV32ZBB-NEXT: sub t0, a4, a2 +; RV32ZBB-NEXT: srl a4, a3, a7 +; RV32ZBB-NEXT: bltz t0, .LBB2_6 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: srl a4, a3, a6 -; RV32ZBB-NEXT: bltz a5, .LBB2_6 -; RV32ZBB-NEXT: j .LBB2_7 +; RV32ZBB-NEXT: bltz a5, .LBB2_7 ; RV32ZBB-NEXT: .LBB2_5: -; RV32ZBB-NEXT: li a6, 64 -; RV32ZBB-NEXT: sub a6, a6, a2 -; RV32ZBB-NEXT: srl a7, a0, a6 -; RV32ZBB-NEXT: sub a4, a4, a6 -; RV32ZBB-NEXT: slli t0, a3, 1 -; RV32ZBB-NEXT: sll a4, t0, a4 -; RV32ZBB-NEXT: or a4, a7, a4 -; RV32ZBB-NEXT: srl a3, a3, a6 -; RV32ZBB-NEXT: or a1, a1, a3 -; RV32ZBB-NEXT: bgez a5, .LBB2_7 +; RV32ZBB-NEXT: mv a0, a4 +; RV32ZBB-NEXT: ret ; RV32ZBB-NEXT: .LBB2_6: +; RV32ZBB-NEXT: srl a7, a0, a7 +; RV32ZBB-NEXT: li t0, 64 +; RV32ZBB-NEXT: sub t0, t0, a2 +; RV32ZBB-NEXT: sub a6, a6, t0 +; RV32ZBB-NEXT: slli a3, a3, 1 +; RV32ZBB-NEXT: sll a3, a3, a6 +; RV32ZBB-NEXT: or a3, a7, a3 +; RV32ZBB-NEXT: or a1, a1, a4 +; RV32ZBB-NEXT: mv a4, a3 +; RV32ZBB-NEXT: bgez a5, .LBB2_5 +; RV32ZBB-NEXT: .LBB2_7: ; RV32ZBB-NEXT: sll a0, a0, a2 ; RV32ZBB-NEXT: or a4, a4, a0 -; RV32ZBB-NEXT: .LBB2_7: ; RV32ZBB-NEXT: mv a0, a4 ; RV32ZBB-NEXT: ret ; @@ -190,49 +189,50 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotr_64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: addi a5, a2, -32 -; RV32I-NEXT: li a4, 31 +; RV32I-NEXT: li a6, 31 ; RV32I-NEXT: bltz a5, .LBB3_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srl a0, a1, a5 ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: srl a0, a3, a2 -; RV32I-NEXT: sub a6, a4, a2 +; RV32I-NEXT: srl a0, a4, a2 +; RV32I-NEXT: sub a3, a6, a2 ; RV32I-NEXT: slli a7, a1, 1 -; RV32I-NEXT: sll a6, a7, a6 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: sll a3, a7, a3 +; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: li a6, 32 -; RV32I-NEXT: sub a6, a6, a2 -; RV32I-NEXT: bltz a6, .LBB3_5 +; RV32I-NEXT: neg a7, a2 +; RV32I-NEXT: li a3, 32 +; RV32I-NEXT: sub t0, a3, a2 +; RV32I-NEXT: sll a3, a4, a7 +; RV32I-NEXT: bltz t0, .LBB3_6 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a4, a3, a6 -; RV32I-NEXT: bltz a5, .LBB3_6 -; RV32I-NEXT: j .LBB3_7 +; RV32I-NEXT: bltz a5, .LBB3_7 ; RV32I-NEXT: .LBB3_5: -; RV32I-NEXT: li a6, 64 -; RV32I-NEXT: sub a6, a6, a2 -; RV32I-NEXT: sll a7, a1, a6 -; RV32I-NEXT: sub a4, a4, a6 -; RV32I-NEXT: srli t0, a3, 1 -; RV32I-NEXT: srl a4, t0, a4 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB3_6: +; RV32I-NEXT: sll a7, a1, a7 +; RV32I-NEXT: li t0, 64 +; RV32I-NEXT: sub t0, t0, a2 +; RV32I-NEXT: sub a6, a6, t0 +; RV32I-NEXT: srli a4, a4, 1 +; RV32I-NEXT: srl a4, a4, a6 ; RV32I-NEXT: or a4, a7, a4 -; RV32I-NEXT: sll a3, a3, a6 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: bgez a5, .LBB3_7 -; RV32I-NEXT: .LBB3_6: -; RV32I-NEXT: srl a1, a1, a2 -; RV32I-NEXT: or a4, a4, a1 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: bgez a5, .LBB3_5 ; RV32I-NEXT: .LBB3_7: -; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: or a3, a3, a1 +; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotr_64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a2, 64 -; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: negw a2, a1 ; RV64I-NEXT: srl a1, a0, a1 ; RV64I-NEXT: sll a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 @@ -240,43 +240,45 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; ; RV32ZBB-LABEL: rotr_64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: mv a4, a0 ; RV32ZBB-NEXT: addi a5, a2, -32 -; RV32ZBB-NEXT: li a4, 31 +; RV32ZBB-NEXT: li a6, 31 ; RV32ZBB-NEXT: bltz a5, .LBB3_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: srl a0, a1, a5 ; RV32ZBB-NEXT: j .LBB3_3 ; RV32ZBB-NEXT: .LBB3_2: -; RV32ZBB-NEXT: srl a0, a3, a2 -; RV32ZBB-NEXT: sub a6, a4, a2 +; RV32ZBB-NEXT: srl a0, a4, a2 +; RV32ZBB-NEXT: sub a3, a6, a2 ; RV32ZBB-NEXT: slli a7, a1, 1 -; RV32ZBB-NEXT: sll a6, a7, a6 -; RV32ZBB-NEXT: or a0, a0, a6 +; RV32ZBB-NEXT: sll a3, a7, a3 +; RV32ZBB-NEXT: or a0, a0, a3 ; RV32ZBB-NEXT: .LBB3_3: -; RV32ZBB-NEXT: li a6, 32 -; RV32ZBB-NEXT: sub a6, a6, a2 -; RV32ZBB-NEXT: bltz a6, .LBB3_5 +; RV32ZBB-NEXT: neg a7, a2 +; RV32ZBB-NEXT: li a3, 32 +; RV32ZBB-NEXT: sub t0, a3, a2 +; RV32ZBB-NEXT: sll a3, a4, a7 +; RV32ZBB-NEXT: bltz t0, .LBB3_6 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: sll a4, a3, a6 -; RV32ZBB-NEXT: bltz a5, .LBB3_6 -; RV32ZBB-NEXT: j .LBB3_7 +; RV32ZBB-NEXT: bltz a5, .LBB3_7 ; RV32ZBB-NEXT: .LBB3_5: -; RV32ZBB-NEXT: li a6, 64 -; RV32ZBB-NEXT: sub a6, a6, a2 -; RV32ZBB-NEXT: sll a7, a1, a6 -; RV32ZBB-NEXT: sub a4, a4, a6 -; RV32ZBB-NEXT: srli t0, a3, 1 -; RV32ZBB-NEXT: srl a4, t0, a4 +; RV32ZBB-NEXT: mv a1, a3 +; RV32ZBB-NEXT: ret +; RV32ZBB-NEXT: .LBB3_6: +; RV32ZBB-NEXT: sll a7, a1, a7 +; RV32ZBB-NEXT: li t0, 64 +; RV32ZBB-NEXT: sub t0, t0, a2 +; RV32ZBB-NEXT: sub a6, a6, t0 +; RV32ZBB-NEXT: srli a4, a4, 1 +; RV32ZBB-NEXT: srl a4, a4, a6 ; RV32ZBB-NEXT: or a4, a7, a4 -; RV32ZBB-NEXT: sll a3, a3, a6 ; RV32ZBB-NEXT: or a0, a0, a3 -; RV32ZBB-NEXT: bgez a5, .LBB3_7 -; RV32ZBB-NEXT: .LBB3_6: -; RV32ZBB-NEXT: srl a1, a1, a2 -; RV32ZBB-NEXT: or a4, a4, a1 +; RV32ZBB-NEXT: mv a3, a4 +; RV32ZBB-NEXT: bgez a5, .LBB3_5 ; RV32ZBB-NEXT: .LBB3_7: -; RV32ZBB-NEXT: mv a1, a4 +; RV32ZBB-NEXT: srl a1, a1, a2 +; RV32ZBB-NEXT: or a3, a3, a1 +; RV32ZBB-NEXT: mv a1, a3 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotr_64: diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll index 1353f2cd8c638..7e63aa91262a2 100644 --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -151,18 +151,20 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a2, 0(a2) ; RV32I-NEXT: lw a5, 8(a1) ; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: neg a6, a2 ; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: sub t0, a3, a2 -; RV32I-NEXT: li a6, 32 -; RV32I-NEXT: sub t1, a6, a2 ; RV32I-NEXT: li t2, 31 +; RV32I-NEXT: li a7, 32 +; RV32I-NEXT: sub t1, a7, a2 +; RV32I-NEXT: sll t0, a5, a6 ; RV32I-NEXT: bltz t1, .LBB6_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll t6, a5, t1 +; RV32I-NEXT: mv t6, t0 ; RV32I-NEXT: j .LBB6_3 ; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: sll a6, a4, t0 -; RV32I-NEXT: sub a7, t2, t0 +; RV32I-NEXT: sll a6, a4, a6 +; RV32I-NEXT: sub a7, a3, a2 +; RV32I-NEXT: sub a7, t2, a7 ; RV32I-NEXT: srli t3, a5, 1 ; RV32I-NEXT: srl a7, t3, a7 ; RV32I-NEXT: or t6, a6, a7 @@ -206,7 +208,6 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: or t5, t6, t5 ; RV32I-NEXT: bgez t1, .LBB6_15 ; RV32I-NEXT: .LBB6_14: -; RV32I-NEXT: sll t0, a5, t0 ; RV32I-NEXT: or t5, t5, t0 ; RV32I-NEXT: .LBB6_15: ; RV32I-NEXT: slli t0, a4, 1 @@ -289,18 +290,20 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a2, 0(a2) ; RV32I-NEXT: lw a5, 8(a1) ; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: neg a6, a2 ; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: sub t1, a3, a2 -; RV32I-NEXT: li a6, 32 -; RV32I-NEXT: sub t2, a6, a2 -; RV32I-NEXT: li t4, 31 +; RV32I-NEXT: li t3, 31 +; RV32I-NEXT: li a7, 32 +; RV32I-NEXT: sub t2, a7, a2 +; RV32I-NEXT: sll t1, a5, a6 ; RV32I-NEXT: bltz t2, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll s0, a5, t2 +; RV32I-NEXT: mv s0, t1 ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a6, a4, t1 -; RV32I-NEXT: sub a7, t4, t1 +; RV32I-NEXT: sll a6, a4, a6 +; RV32I-NEXT: sub a7, a3, a2 +; RV32I-NEXT: sub a7, t3, a7 ; RV32I-NEXT: srli t0, a5, 1 ; RV32I-NEXT: srl a7, t0, a7 ; RV32I-NEXT: or s0, a6, a7 @@ -312,7 +315,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: srl a7, t6, a2 ; RV32I-NEXT: or s0, s0, a7 ; RV32I-NEXT: .LBB7_5: -; RV32I-NEXT: addi t3, a2, -64 +; RV32I-NEXT: addi t4, a2, -64 ; RV32I-NEXT: addi t5, a2, -96 ; RV32I-NEXT: srai a7, a4, 31 ; RV32I-NEXT: bltz t5, .LBB7_7 @@ -321,7 +324,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: bgeu a2, a3, .LBB7_8 ; RV32I-NEXT: j .LBB7_9 ; RV32I-NEXT: .LBB7_7: -; RV32I-NEXT: sra t0, a4, t3 +; RV32I-NEXT: sra t0, a4, t4 ; RV32I-NEXT: bltu a2, a3, .LBB7_9 ; RV32I-NEXT: .LBB7_8: ; RV32I-NEXT: mv s0, t0 @@ -332,7 +335,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: mv t0, s0 ; RV32I-NEXT: .LBB7_11: ; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: sub t4, t4, a2 +; RV32I-NEXT: sub t3, t3, a2 ; RV32I-NEXT: bltz a6, .LBB7_13 ; RV32I-NEXT: # %bb.12: ; RV32I-NEXT: srl t6, t6, a6 @@ -341,11 +344,10 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: .LBB7_13: ; RV32I-NEXT: srl s0, a1, a2 ; RV32I-NEXT: slli t6, t6, 1 -; RV32I-NEXT: sll t6, t6, t4 +; RV32I-NEXT: sll t6, t6, t3 ; RV32I-NEXT: or t6, s0, t6 ; RV32I-NEXT: bgez t2, .LBB7_15 ; RV32I-NEXT: .LBB7_14: -; RV32I-NEXT: sll t1, a5, t1 ; RV32I-NEXT: or t6, t6, t1 ; RV32I-NEXT: .LBB7_15: ; RV32I-NEXT: slli t1, a4, 1 @@ -358,8 +360,8 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: li t2, 95 ; RV32I-NEXT: sub t2, t2, a2 ; RV32I-NEXT: sll t2, t1, t2 -; RV32I-NEXT: srl t3, a5, t3 -; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: srl t4, a5, t4 +; RV32I-NEXT: or t2, t4, t2 ; RV32I-NEXT: bltu a2, a3, .LBB7_19 ; RV32I-NEXT: .LBB7_18: ; RV32I-NEXT: mv t6, t2 @@ -376,7 +378,7 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: bgez a6, .LBB7_21 ; RV32I-NEXT: .LBB7_23: ; RV32I-NEXT: srl a5, a5, a2 -; RV32I-NEXT: sll t1, t1, t4 +; RV32I-NEXT: sll t1, t1, t3 ; RV32I-NEXT: or a5, a5, t1 ; RV32I-NEXT: bltu a2, a3, .LBB7_25 ; RV32I-NEXT: .LBB7_24: @@ -428,18 +430,20 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a2, 0(a2) ; RV32I-NEXT: lw a5, 4(a1) ; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: neg a6, a2 ; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: sub t0, a3, a2 -; RV32I-NEXT: li a6, 32 -; RV32I-NEXT: sub t1, a6, a2 ; RV32I-NEXT: li t2, 31 +; RV32I-NEXT: li a7, 32 +; RV32I-NEXT: sub t1, a7, a2 +; RV32I-NEXT: srl t0, a5, a6 ; RV32I-NEXT: bltz t1, .LBB8_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl t6, a5, t1 +; RV32I-NEXT: mv t6, t0 ; RV32I-NEXT: j .LBB8_3 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: srl a6, a4, t0 -; RV32I-NEXT: sub a7, t2, t0 +; RV32I-NEXT: srl a6, a4, a6 +; RV32I-NEXT: sub a7, a3, a2 +; RV32I-NEXT: sub a7, t2, a7 ; RV32I-NEXT: slli t3, a5, 1 ; RV32I-NEXT: sll a7, t3, a7 ; RV32I-NEXT: or t6, a6, a7 @@ -483,7 +487,6 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: or t5, t6, t5 ; RV32I-NEXT: bgez t1, .LBB8_15 ; RV32I-NEXT: .LBB8_14: -; RV32I-NEXT: srl t0, a5, t0 ; RV32I-NEXT: or t5, t5, t0 ; RV32I-NEXT: .LBB8_15: ; RV32I-NEXT: srli t0, a4, 1