diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 51f730491207b..209dc7af4dd27 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11319,11 +11319,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { return DAG.getNOT(DL, NewShift, VT); } - // fold (sra (shl X, m), (sub result_size, n)) - // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for - // result_size - n != m. - // If truncate is free for the target sext(shl) is likely to result in better - // code. + // fold (sra (shl X, m), (size - n)) when m < n: + // - (sext (trunc (srl X, (n - m)))) if truncate is free, or + // - (sext_inreg (srl X, (n - m)), TruncVT) if SIGN_EXTEND_INREG is legal on + // TruncVT (targets such as ARM/RISC-V vendor may select this in ISel). if (N0.getOpcode() == ISD::SHL && N1C) { // Get the two constants of the shifts, CN0 = m, CN = n. const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); @@ -11352,6 +11351,22 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND, DL, N->getValueType(0), Trunc); } + + // (sra (shl X, HalfSize), Size-1) is the canonical in-register sign + // extension (e.g. RV64 sext.w), not an arbitrary bitfield extract. + unsigned LeftShAmt = N01C->getZExtValue(); + unsigned RightShAmt = N1C->getZExtValue(); + bool IsCanonicalSignExtension = + LeftShAmt * 2 == OpSizeInBits && RightShAmt == OpSizeInBits - 1; + + if (ShiftAmt > 0 && !IsCanonicalSignExtension && + TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, TruncVT) == + TargetLowering::Legal) { + SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Amt); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Shift, + DAG.getValueType(TruncVT)); + } } } diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 992b03f012ef7..ea80a2b50eb73 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -3942,6 +3942,14 @@ def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)), (UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), (UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; + +// Standalone (srl; and #0xff) is equivalent to uxtb with ROR — avoids a +// separate logical shift when extracting bytes from a word. +def : ARMV6Pat<(and (srl GPRnopc:$Rm, rot_imm:$rot), 0xFF), + (UXTB GPRnopc:$Rm, rot_imm:$rot)>; +// Same for 16-bit zero-extract: (srl; and #0xffff) -> uxth with ROR. +def : ARMV6Pat<(and (srl GPRnopc:$Rm, rot_imm:$rot), 0xFFFF), + (UXTH GPRnopc:$Rm, rot_imm:$rot)>; } // This isn't safe in general, the add is two 16-bit units, not a 32-bit add. @@ -6320,6 +6328,10 @@ def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0xFFFF)), def : ARMV6Pat<(sext_inreg GPR:$Src, i8), (SXTB GPR:$Src, 0)>; def : ARMV6Pat<(sext_inreg GPR:$Src, i16), (SXTH GPR:$Src, 0)>; +def : ARMV6Pat<(sext_inreg (srl GPRnopc:$Rm, rot_imm:$rot), i8), + (SXTB GPRnopc:$Rm, rot_imm:$rot)>; +def : ARMV6Pat<(sext_inreg (srl GPRnopc:$Rm, rot_imm:$rot), i16), + (SXTH GPRnopc:$Rm, rot_imm:$rot)>; def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i8)), (SXTAB GPR:$Rn, GPRnopc:$Rm, 0)>; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 39bf77ba9c585..034619d5a49e0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -24,6 +24,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; @@ -634,17 +635,76 @@ bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) { return true; } -bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { - unsigned Opc; +static std::optional +getSignedBitfieldExtractOpc(const RISCVSubtarget &Subtarget) { + if (Subtarget.hasVendorXTHeadBb()) + return RISCV::TH_EXT; + if (Subtarget.hasVendorXAndesPerf()) + return RISCV::NDS_BFOS; + if (Subtarget.hasVendorXqcibm()) + return RISCV::QC_EXT; + return std::nullopt; +} - if (Subtarget->hasVendorXTHeadBb()) - Opc = RISCV::TH_EXT; - else if (Subtarget->hasVendorXAndesPerf()) - Opc = RISCV::NDS_BFOS; - else if (Subtarget->hasVendorXqcibm()) - Opc = RISCV::QC_EXT; - else - // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment. +SDNode *RISCVDAGToDAGISel::selectSignedBitfieldExtract(SDValue Src, + unsigned Msb, + unsigned Lsb, + const SDLoc &DL, MVT VT, + unsigned Opc) { + if (Opc == RISCV::QC_EXT) { + // QC.EXT X, width, shamt + // shamt is the same as Lsb + // width is the number of bits to extract from the Lsb + Msb = Msb - Lsb + 1; + } + return CurDAG->getMachineNode(Opc, DL, VT, Src, + CurDAG->getTargetConstant(Msb, DL, VT), + CurDAG->getTargetConstant(Lsb, DL, VT)); +} + +bool RISCVDAGToDAGISel::trySignedBitfieldExtractFromSExtInReg(SDNode *Node) { + std::optional Opc = getSignedBitfieldExtractOpc(*Subtarget); + if (!Opc) + return false; + + if (Node->getOpcode() != ISD::SIGN_EXTEND_INREG) + return false; + + SDValue N0 = Node->getOperand(0); + if (N0.getOpcode() != ISD::SRL || !N0.hasOneUse()) + return false; + + auto *ShAmtC = dyn_cast(N0.getOperand(1)); + if (!ShAmtC) + return false; + + SDLoc DL(Node); + MVT VT = Node->getSimpleValueType(0); + unsigned ExtSize = + cast(Node->getOperand(1))->getVT().getSizeInBits(); + // Full-register in-reg extensions use sraiw/sext.w, not vendor bitfield ops. + if (ExtSize >= VT.getScalarSizeInBits()) + return false; + + unsigned Lsb = ShAmtC->getZExtValue(); + unsigned Msb = Lsb + ExtSize - 1; + if (Msb >= Subtarget->getXLen()) + return false; + + // Single-bit extract at the sign bit is a sign-test idiom (srliw/sraiw), not + // a vendor bitfield op. + if (ExtSize == 1 && Lsb + 1 == VT.getScalarSizeInBits()) + return false; + + SDNode *Sbe = + selectSignedBitfieldExtract(N0.getOperand(0), Msb, Lsb, DL, VT, *Opc); + ReplaceNode(Node, Sbe); + return true; +} + +bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { + std::optional Opc = getSignedBitfieldExtractOpc(*Subtarget); + if (!Opc) return false; auto *N1C = dyn_cast(Node->getOperand(1)); @@ -655,19 +715,6 @@ bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { if (!N0.hasOneUse()) return false; - auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, - const SDLoc &DL, MVT VT) { - if (Opc == RISCV::QC_EXT) { - // QC.EXT X, width, shamt - // shamt is the same as Lsb - // width is the number of bits to extract from the Lsb - Msb = Msb - Lsb + 1; - } - return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0), - CurDAG->getTargetConstant(Msb, DL, VT), - CurDAG->getTargetConstant(Lsb, DL, VT)); - }; - SDLoc DL(Node); MVT VT = Node->getSimpleValueType(0); const unsigned RightShAmt = N1C->getZExtValue(); @@ -685,12 +732,18 @@ bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { if (LeftShAmt > RightShAmt) return false; + // (sra (shl X, HalfSize), Size-1) is in-register sign extension, not a + // vendor bitfield extract (see DAGCombiner::visitSRA). + if (LeftShAmt * 2 == VT.getSizeInBits() && + RightShAmt == VT.getSizeInBits() - 1) + return false; + const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt; const unsigned Msb = MsbPlusOne - 1; const unsigned Lsb = RightShAmt - LeftShAmt; - SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT); - ReplaceNode(Node, Sbe); + ReplaceNode(Node, selectSignedBitfieldExtract(N0.getOperand(0), Msb, Lsb, + DL, VT, *Opc)); return true; } @@ -700,8 +753,9 @@ bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { unsigned ExtSize = cast(N0.getOperand(1))->getVT().getSizeInBits(); - // ExtSize of 32 should use sraiw via tablegen pattern. - if (ExtSize == 32) + // Full-register in-reg extensions use sraiw/sext.w, not vendor bitfield + // ops. + if (ExtSize >= VT.getScalarSizeInBits()) return false; const unsigned Msb = ExtSize - 1; @@ -709,8 +763,8 @@ bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { // the X[Msb] bit and sign-extend it. const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt; - SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT); - ReplaceNode(Node, Sbe); + ReplaceNode(Node, selectSignedBitfieldExtract(N0.getOperand(0), Msb, Lsb, + DL, VT, *Opc)); return true; } @@ -1445,7 +1499,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { unsigned ShAmt = N1C->getZExtValue(); unsigned ExtSize = cast(N0.getOperand(1))->getVT().getSizeInBits(); - // ExtSize of 32 should use sraiw via tablegen pattern. + // i32 (or wider) in-reg widths should use sraiw / shift lowering. if (ExtSize >= 32 || ShAmt >= ExtSize) break; unsigned LShAmt = Subtarget->getXLen() - ExtSize; @@ -1459,6 +1513,11 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { return; } case ISD::SIGN_EXTEND_INREG: { + // Match (sext_inreg (srl X, C), iN) to vendor bitfield extract ops. This + // is the form produced by the generic (sra (shl X, C1), C2) combine. + if (trySignedBitfieldExtractFromSExtInReg(Node)) + return; + // Optimize (sext_inreg (srl X, C), i8/i16) -> // (srai (slli X, XLen-ExtSize-C), XLen-ExtSize) // This is a bitfield extract pattern where we're extracting a signed diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 85bbf31425030..6c622e90c2ce5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -75,6 +75,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { bool tryShrinkShlLogicImm(SDNode *Node); bool trySignedBitfieldExtract(SDNode *Node); + bool trySignedBitfieldExtractFromSExtInReg(SDNode *Node); bool trySignedBitfieldInsertInSign(SDNode *Node); bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb); @@ -200,6 +201,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { #include "RISCVGenDAGISel.inc" private: + SDNode *selectSignedBitfieldExtract(SDValue Src, unsigned Msb, unsigned Lsb, + const SDLoc &DL, MVT VT, unsigned Opc); bool doPeepholeSExtW(SDNode *Node); bool doPeepholeMaskedRVV(MachineSDNode *Node); bool doPeepholeNoRegPassThru(); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index e00d64c792ad8..313af41161194 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1626,6 +1626,10 @@ def : Pat<(qc_insb GPRNoX0:$rd, GPR:$rs1, uimm5_plus1:$width, uimm5:$shamt), let Predicates = [HasVendorXqcibm, NoStdExtZbb, IsRV32] in { def : Pat<(sext_inreg (i32 GPR:$rs1), i16), (QC_EXT GPR:$rs1, 16, 0)>; def : Pat<(sext_inreg (i32 GPR:$rs1), i8), (QC_EXT GPR:$rs1, 8, 0)>; +def : Pat<(sext_inreg (srl GPRNoX0:$rs1, uimm5:$shamt), i16), + (QC_EXT GPRNoX0:$rs1, 16, uimm5:$shamt)>; +def : Pat<(sext_inreg (srl GPRNoX0:$rs1, uimm5:$shamt), i8), + (QC_EXT GPRNoX0:$rs1, 8, uimm5:$shamt)>; } // Predicates = [HasVendorXqcibm, NoStdExtZbb, IsRV32] let Predicates = [HasVendorXqcibm, HasStdExtZbb, IsRV32] in { diff --git a/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll b/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll index 487b726253cc7..a744c30ca7a90 100644 --- a/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll +++ b/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll @@ -18,8 +18,9 @@ define @sext_inreg( %a) { define @ashr_shl( %a) { ; CHECK-LABEL: ashr_shl: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl z0.s, z0.s, #8 -; CHECK-NEXT: asr z0.s, z0.s, #16 +; CHECK-NEXT: lsr z0.s, z0.s, #8 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxth z0.s, p0/m, z0.s ; CHECK-NEXT: ret %shl = shl %a, splat(i32 8) %r = ashr %shl, splat(i32 16) diff --git a/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll b/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll index d66b9029dbf99..5236d8299eb5e 100644 --- a/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll +++ b/llvm/test/CodeGen/AMDGPU/s_cmp_0.ll @@ -472,7 +472,7 @@ define amdgpu_ps i32 @orn264(i64 inreg %val0, i64 inreg %val1) { define amdgpu_ps i32 @bfe_i32(i32 inreg %val0) { ; CHECK-LABEL: bfe_i32: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_bfe_i32 s0, s0, 0x80010 +; CHECK-NEXT: s_bfe_u32 s0, s0, 0x80010 ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 diff --git a/llvm/test/CodeGen/ARM/extract-ext-armv6.ll b/llvm/test/CodeGen/ARM/extract-ext-armv6.ll new file mode 100644 index 0000000000000..09dfbc5f04bc5 --- /dev/null +++ b/llvm/test/CodeGen/ARM/extract-ext-armv6.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=armv6-none-eabi < %s | FileCheck %s + +define i32 @zext_u8_from_shift8(i32 %0) { +; CHECK-LABEL: zext_u8_from_shift8: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxtb r0, r0, ror #8 +; CHECK-NEXT: bx lr + %2 = lshr i32 %0, 8 + %3 = and i32 %2, 255 + ret i32 %3 +} + +define i32 @zext_u8_from_shift16(i32 %0) { +; CHECK-LABEL: zext_u8_from_shift16: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxtb r0, r0, ror #16 +; CHECK-NEXT: bx lr + %2 = lshr i32 %0, 16 + %3 = and i32 %2, 255 + ret i32 %3 +} + +define i32 @sext_i8_from_bits8(i32 %0) { +; CHECK-LABEL: sext_i8_from_bits8: +; CHECK: @ %bb.0: +; CHECK-NEXT: sxtb r0, r0, ror #8 +; CHECK-NEXT: bx lr + %2 = shl i32 %0, 16 + %3 = ashr i32 %2, 24 + ret i32 %3 +} + +define i32 @sext_i8_from_bits16(i32 %0) { +; CHECK-LABEL: sext_i8_from_bits16: +; CHECK: @ %bb.0: +; CHECK-NEXT: sxtb r0, r0, ror #16 +; CHECK-NEXT: bx lr + %2 = shl i32 %0, 8 + %3 = ashr i32 %2, 24 + ret i32 %3 +} + +define i32 @zext_u16_from_shift8(i32 %0) { +; CHECK-LABEL: zext_u16_from_shift8: +; CHECK: @ %bb.0: +; CHECK-NEXT: uxth r0, r0, ror #8 +; CHECK-NEXT: bx lr + %2 = lshr i32 %0, 8 + %3 = and i32 %2, 65535 + ret i32 %3 +} + +define i32 @sext_i16_from_shift8(i32 %0) { +; CHECK-LABEL: sext_i16_from_shift8: +; CHECK: @ %bb.0: +; CHECK-NEXT: sxth r0, r0, ror #8 +; CHECK-NEXT: bx lr + %2 = shl i32 %0, 8 + %3 = ashr i32 %2, 16 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/ARM/sxt_rot.ll b/llvm/test/CodeGen/ARM/sxt_rot.ll index 775e45201105c..412b408782d15 100644 --- a/llvm/test/CodeGen/ARM/sxt_rot.ll +++ b/llvm/test/CodeGen/ARM/sxt_rot.ll @@ -14,8 +14,7 @@ define i32 @test0(i8 %A) { define signext i8 @test1(i32 %A) { ; CHECK-V6-LABEL: test1: ; CHECK-V6: @ %bb.0: -; CHECK-V6-NEXT: lsr r0, r0, #8 -; CHECK-V6-NEXT: sxtb r0, r0 +; CHECK-V6-NEXT: sxtb r0, r0, ror #8 ; CHECK-V6-NEXT: bx lr ; ; CHECK-V7-LABEL: test1: diff --git a/llvm/test/CodeGen/ARM/uxt_rot.ll b/llvm/test/CodeGen/ARM/uxt_rot.ll index 15ea34a3d2241..83a40809e8fec 100644 --- a/llvm/test/CodeGen/ARM/uxt_rot.ll +++ b/llvm/test/CodeGen/ARM/uxt_rot.ll @@ -25,8 +25,7 @@ define zeroext i32 @test2(i32 %A.u, i32 %B.u) { define zeroext i32 @test3(i32 %A.u) { ; CHECK-V6-LABEL: test3: ; CHECK-V6: @ %bb.0: -; CHECK-V6-NEXT: lsr r0, r0, #8 -; CHECK-V6-NEXT: uxth r0, r0 +; CHECK-V6-NEXT: uxth r0, r0, ror #8 ; CHECK-V6-NEXT: bx lr ; ; CHECK-V7-LABEL: test3: @@ -44,8 +43,7 @@ define zeroext i32 @test3(i32 %A.u) { define zeroext i32 @test4(i32 %A.u) { ; CHECK-V6-LABEL: test4: ; CHECK-V6: @ %bb.0: -; CHECK-V6-NEXT: lsr r0, r0, #8 -; CHECK-V6-NEXT: uxtb r0, r0 +; CHECK-V6-NEXT: uxtb r0, r0, ror #8 ; CHECK-V6-NEXT: bx lr ; ; CHECK-V7-LABEL: test4: diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll index ce7741d31d93f..5b1e65b32e20c 100644 --- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll @@ -483,12 +483,19 @@ define i64 @sexth_i64(i64 %a) nounwind { } define i64 @no_sexth_i64(i64 %a) nounwind { -; CHECK-LABEL: no_sexth_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: slli a1, a0, 17 -; CHECK-NEXT: srai a0, a1, 16 -; CHECK-NEXT: srai a1, a1, 31 -; CHECK-NEXT: ret +; RV32I-LABEL: no_sexth_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 17 +; RV32I-NEXT: srai a0, a1, 16 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: no_sexth_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: slli a2, a0, 17 +; RV32XTHEADBB-NEXT: th.ext a1, a0, 14, 14 +; RV32XTHEADBB-NEXT: srai a0, a2, 16 +; RV32XTHEADBB-NEXT: ret %shl = shl i64 %a, 49 %shr = ashr exact i64 %shl, 48 ret i64 %shr diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll index 4331d7ca79123..6577f0b0c3891 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll @@ -1116,8 +1116,8 @@ define void @sextw_removal_ext(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-NEXT: .LBB36_1: # %bb2 ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: call foo -; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: srai a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 16 +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: bnez a0, .LBB36_1 ; RV64I-NEXT: # %bb.2: # %bb7 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll index cbcbf1f392ce8..d2e4c79b4e153 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll @@ -203,13 +203,12 @@ define dso_local i32 @b(ptr %c, i32 %d, i32 %e, ptr %n) "frame-pointer"="all" { ; CHECK-NEXT: adds r0, #4 ; CHECK-NEXT: add.w r2, r2, #-2147483648 ; CHECK-NEXT: asrl r12, r5, r2 -; CHECK-NEXT: smull r2, r5, r4, r12 -; CHECK-NEXT: lsll r2, r5, #30 ; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: asr.w r11, r5, #31 -; CHECK-NEXT: mov r12, r5 -; CHECK-NEXT: lsll r12, r11, r4 +; CHECK-NEXT: smull r12, r5, r4, r12 ; CHECK-NEXT: mul r2, r2, r9 +; CHECK-NEXT: lsrl r12, r5, #2 +; CHECK-NEXT: asr.w r11, r12, #31 +; CHECK-NEXT: lsll r12, r11, r4 ; CHECK-NEXT: lsrl r12, r11, #2 ; CHECK-NEXT: adds r2, #2 ; CHECK-NEXT: lsll r12, r11, r2