diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 078a936b061a3..6429947958ee9 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -205,6 +205,7 @@ enum NodeType { /// CopyFromReg - This node indicates that the input value is a virtual or /// physical register that is defined outside of the scope of this /// SelectionDAG. The register is available from the RegisterSDNode object. + /// Note that CopyFromReg is considered as also freezing the value. CopyFromReg, /// UNDEF - An undefined node. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index aa746f1c7b7b3..f984d4b395964 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15452,6 +15452,26 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { N0.getOpcode() == ISD::BUILD_PAIR || N0.getOpcode() == ISD::CONCAT_VECTORS; + // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all + // ones" or "constant" into something that depends on FrozenUndef. We can + // instead pick undef values to keep those properties, while at the same time + // folding away the freeze. + // If we implement a more general solution for folding away freeze(undef) in + // the future, then this special handling can be removed. + if (N0.getOpcode() == ISD::BUILD_VECTOR) { + SDLoc DL(N0); + MVT VT = N0.getSimpleValueType(); + if (llvm::ISD::isBuildVectorAllOnes(N0.getNode())) + return DAG.getAllOnesConstant(DL, VT); + if (llvm::ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + SmallVector NewVecC; + for (const SDValue &Op : N0->op_values()) + NewVecC.push_back( + Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op); + return DAG.getBuildVector(VT, DL, NewVecC); + } + } + SmallSetVector MaybePoisonOperands; for (SDValue Op : N0->ops()) { if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 224c0c5ee9706..dde10fd4b8c8a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5063,6 +5063,7 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, case ISD::VALUETYPE: case ISD::FrameIndex: case ISD::TargetFrameIndex: + case ISD::CopyFromReg: return true; case ISD::UNDEF: diff --git a/llvm/test/CodeGen/AArch64/combine-mul.ll b/llvm/test/CodeGen/AArch64/combine-mul.ll index a2b0425308093..c49e5ae6620a9 100644 --- a/llvm/test/CodeGen/AArch64/combine-mul.ll +++ b/llvm/test/CodeGen/AArch64/combine-mul.ll @@ -44,8 +44,7 @@ define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) { define i64 @combine_mul_self_demandedbits(i64 %x) { ; CHECK-LABEL: combine_mul_self_demandedbits: ; CHECK: // %bb.0: -; CHECK-NEXT: mul x8, x0, x0 -; CHECK-NEXT: and x0, x8, #0xfffffffffffffffd +; CHECK-NEXT: mul x0, x0, x0 ; CHECK-NEXT: ret %1 = mul i64 %x, %x %2 = and i64 %1, -3 @@ -77,7 +76,7 @@ define i8 @one_demanded_bit(i8 %x) { define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) { ; CHECK-LABEL: one_demanded_bit_splat: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 +; CHECK-NEXT: mov w8, #32 // =0x20 ; CHECK-NEXT: shl v0.2d, v0.2d, #5 ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -131,7 +130,7 @@ define i32 @squared_demanded_2_low_bits(i32 %x) { define <2 x i64> @squared_demanded_2_low_bits_splat(<2 x i64> %x) { ; CHECK-LABEL: squared_demanded_2_low_bits_splat: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-2 +; CHECK-NEXT: mov x8, #-2 // =0xfffffffffffffffe ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll index 6629d34405492..25106b456d2f7 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll @@ -123,10 +123,9 @@ define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind { ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 -; CHECK-NEXT: addi.d $a3, $sp, 0 -; CHECK-NEXT: bstrins.d $a3, $a0, 4, 0 -; CHECK-NEXT: st.b $a2, $a3, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0 +; CHECK-NEXT: st.b $a2, $a0, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -150,10 +149,9 @@ define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind { ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 -; CHECK-NEXT: addi.d $a3, $sp, 0 -; CHECK-NEXT: bstrins.d $a3, $a0, 4, 1 -; CHECK-NEXT: st.h $a2, $a3, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1 +; CHECK-NEXT: st.h $a2, $a0, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -177,10 +175,9 @@ define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind { ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 -; CHECK-NEXT: addi.d $a3, $sp, 0 -; CHECK-NEXT: bstrins.d $a3, $a0, 4, 2 -; CHECK-NEXT: st.w $a2, $a3, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2 +; CHECK-NEXT: st.w $a2, $a0, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -204,10 +201,9 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind { ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvst $xr0, $sp, 0 -; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 -; CHECK-NEXT: addi.d $a3, $sp, 0 -; CHECK-NEXT: bstrins.d $a3, $a0, 4, 3 -; CHECK-NEXT: st.d $a2, $a3, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3 +; CHECK-NEXT: st.d $a2, $a0, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -231,10 +227,9 @@ define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwin ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr1, $a0, 0 ; CHECK-NEXT: xvst $xr1, $sp, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: addi.d $a2, $sp, 0 -; CHECK-NEXT: bstrins.d $a2, $a0, 4, 2 -; CHECK-NEXT: fst.s $fa0, $a2, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 +; CHECK-NEXT: fst.s $fa0, $a0, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 @@ -258,10 +253,9 @@ define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounw ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr1, $a0, 0 ; CHECK-NEXT: xvst $xr1, $sp, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: addi.d $a2, $sp, 0 -; CHECK-NEXT: bstrins.d $a2, $a0, 4, 3 -; CHECK-NEXT: fst.d $fa0, $a2, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 +; CHECK-NEXT: fst.d $fa0, $a0, 0 ; CHECK-NEXT: xvld $xr0, $sp, 0 ; CHECK-NEXT: xvst $xr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $fp, -64 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll index 19171b7d8ed78..7f232073ae129 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll @@ -87,10 +87,9 @@ define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $sp, 0 -; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 -; CHECK-NEXT: addi.d $a3, $sp, 0 -; CHECK-NEXT: bstrins.d $a3, $a0, 3, 0 -; CHECK-NEXT: st.b $a2, $a3, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0 +; CHECK-NEXT: st.b $a2, $a0, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -107,10 +106,9 @@ define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $sp, 0 -; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 -; CHECK-NEXT: addi.d $a3, $sp, 0 -; CHECK-NEXT: bstrins.d $a3, $a0, 3, 1 -; CHECK-NEXT: st.h $a2, $a3, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1 +; CHECK-NEXT: st.h $a2, $a0, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -127,10 +125,9 @@ define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $sp, 0 -; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 -; CHECK-NEXT: addi.d $a3, $sp, 0 -; CHECK-NEXT: bstrins.d $a3, $a0, 3, 2 -; CHECK-NEXT: st.w $a2, $a3, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2 +; CHECK-NEXT: st.w $a2, $a0, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -147,10 +144,9 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind { ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vst $vr0, $sp, 0 -; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0 -; CHECK-NEXT: addi.d $a3, $sp, 0 -; CHECK-NEXT: bstrins.d $a3, $a0, 3, 3 -; CHECK-NEXT: st.d $a2, $a3, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3 +; CHECK-NEXT: st.d $a2, $a0, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -167,10 +163,9 @@ define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwi ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr1, $a0, 0 ; CHECK-NEXT: vst $vr1, $sp, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: addi.d $a2, $sp, 0 -; CHECK-NEXT: bstrins.d $a2, $a0, 3, 2 -; CHECK-NEXT: fst.s $fa0, $a2, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2 +; CHECK-NEXT: fst.s $fa0, $a0, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 @@ -187,10 +182,9 @@ define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) noun ; CHECK-NEXT: addi.d $sp, $sp, -16 ; CHECK-NEXT: vld $vr1, $a0, 0 ; CHECK-NEXT: vst $vr1, $sp, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: addi.d $a2, $sp, 0 -; CHECK-NEXT: bstrins.d $a2, $a0, 3, 3 -; CHECK-NEXT: fst.d $fa0, $a2, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3 +; CHECK-NEXT: fst.d $fa0, $a0, 0 ; CHECK-NEXT: vld $vr0, $sp, 0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: addi.d $sp, $sp, 16 diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll index d2ee80e6aa951..f032756e007b6 100644 --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -57,8 +57,8 @@ define i64 @sltiu(i64 %a) nounwind { ; ; RV32I-LABEL: sltiu: ; RV32I: # %bb.0: -; RV32I-NEXT: seqz a1, a1 ; RV32I-NEXT: sltiu a0, a0, 3 +; RV32I-NEXT: seqz a1, a1 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index f96e1bad2e389..a5a2ae79966c3 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; RV32IA-NEXT: # =>This Loop Header: Depth=1 ; RV32IA-NEXT: # Child Loop BB2_3 Depth 2 ; RV32IA-NEXT: mv a3, a2 -; RV32IA-NEXT: addi a4, a2, 1 -; RV32IA-NEXT: sltu a2, a2, a1 -; RV32IA-NEXT: neg a2, a2 -; RV32IA-NEXT: and a4, a2, a4 +; RV32IA-NEXT: addi a2, a2, 1 +; RV32IA-NEXT: sltu a4, a3, a1 +; RV32IA-NEXT: neg a4, a4 +; RV32IA-NEXT: and a4, a4, a2 ; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start ; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1 ; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 @@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB3_3 Depth 2 ; RV64IA-NEXT: mv a3, a2 -; RV64IA-NEXT: addi a4, a2, 1 -; RV64IA-NEXT: sltu a2, a2, a1 -; RV64IA-NEXT: neg a2, a2 -; RV64IA-NEXT: and a4, a2, a4 +; RV64IA-NEXT: addi a2, a2, 1 +; RV64IA-NEXT: sltu a4, a3, a1 +; RV64IA-NEXT: neg a4, a4 +; RV64IA-NEXT: and a4, a4, a2 ; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll index 9e2b0b5c3cbb4..770dcccee882b 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll @@ -456,92 +456,80 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind { define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind { ; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat: ; RV32IZFBFMIN: # %bb.0: # %start -; RV32IZFBFMIN-NEXT: addi sp, sp, -32 -; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill -; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0) -; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) +; RV32IZFBFMIN-NEXT: addi sp, sp, -16 +; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 -; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0 -; RV32IZFBFMIN-NEXT: neg s1, s0 ; RV32IZFBFMIN-NEXT: lui a0, 913408 ; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0 -; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0 -; RV32IZFBFMIN-NEXT: neg s3, s2 +; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0 ; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFBFMIN-NEXT: call __fixsfdi -; RV32IZFBFMIN-NEXT: and a0, s3, a0 -; RV32IZFBFMIN-NEXT: or a0, s1, a0 -; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0 -; RV32IZFBFMIN-NEXT: neg a2, a2 ; RV32IZFBFMIN-NEXT: lui a4, 524288 -; RV32IZFBFMIN-NEXT: lui a3, 524288 -; RV32IZFBFMIN-NEXT: beqz s2, .LBB10_2 +; RV32IZFBFMIN-NEXT: lui a2, 524288 +; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2 ; RV32IZFBFMIN-NEXT: # %bb.1: # %start -; RV32IZFBFMIN-NEXT: mv a3, a1 +; RV32IZFBFMIN-NEXT: mv a2, a1 ; RV32IZFBFMIN-NEXT: .LBB10_2: # %start -; RV32IZFBFMIN-NEXT: and a0, a2, a0 -; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4 +; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0) +; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4 ; RV32IZFBFMIN-NEXT: # %bb.3: -; RV32IZFBFMIN-NEXT: addi a3, a4, -1 +; RV32IZFBFMIN-NEXT: addi a2, a4, -1 ; RV32IZFBFMIN-NEXT: .LBB10_4: # %start -; RV32IZFBFMIN-NEXT: and a1, a2, a3 -; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload -; RV32IZFBFMIN-NEXT: addi sp, sp, 32 +; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0 +; RV32IZFBFMIN-NEXT: neg a4, a1 +; RV32IZFBFMIN-NEXT: and a1, a4, a2 +; RV32IZFBFMIN-NEXT: neg a2, a3 +; RV32IZFBFMIN-NEXT: neg a3, s0 +; RV32IZFBFMIN-NEXT: and a0, a3, a0 +; RV32IZFBFMIN-NEXT: or a0, a2, a0 +; RV32IZFBFMIN-NEXT: and a0, a4, a0 +; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFBFMIN-NEXT: addi sp, sp, 16 ; RV32IZFBFMIN-NEXT: ret ; ; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat: ; R32IDZFBFMIN: # %bb.0: # %start -; R32IDZFBFMIN-NEXT: addi sp, sp, -32 -; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: addi sp, sp, -16 +; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill -; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0) -; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0 -; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0 -; R32IDZFBFMIN-NEXT: neg s1, s0 ; R32IDZFBFMIN-NEXT: lui a0, 913408 ; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0 -; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0 -; R32IDZFBFMIN-NEXT: neg s3, s2 +; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0 ; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0 ; R32IDZFBFMIN-NEXT: call __fixsfdi -; R32IDZFBFMIN-NEXT: and a0, s3, a0 -; R32IDZFBFMIN-NEXT: or a0, s1, a0 -; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0 -; R32IDZFBFMIN-NEXT: neg a2, a2 ; R32IDZFBFMIN-NEXT: lui a4, 524288 -; R32IDZFBFMIN-NEXT: lui a3, 524288 -; R32IDZFBFMIN-NEXT: beqz s2, .LBB10_2 +; R32IDZFBFMIN-NEXT: lui a2, 524288 +; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2 ; R32IDZFBFMIN-NEXT: # %bb.1: # %start -; R32IDZFBFMIN-NEXT: mv a3, a1 +; R32IDZFBFMIN-NEXT: mv a2, a1 ; R32IDZFBFMIN-NEXT: .LBB10_2: # %start -; R32IDZFBFMIN-NEXT: and a0, a2, a0 -; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4 +; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0) +; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0 +; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4 ; R32IDZFBFMIN-NEXT: # %bb.3: -; R32IDZFBFMIN-NEXT: addi a3, a4, -1 +; R32IDZFBFMIN-NEXT: addi a2, a4, -1 ; R32IDZFBFMIN-NEXT: .LBB10_4: # %start -; R32IDZFBFMIN-NEXT: and a1, a2, a3 -; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0 +; R32IDZFBFMIN-NEXT: neg a4, a1 +; R32IDZFBFMIN-NEXT: and a1, a4, a2 +; R32IDZFBFMIN-NEXT: neg a2, a3 +; R32IDZFBFMIN-NEXT: neg a3, s0 +; R32IDZFBFMIN-NEXT: and a0, a3, a0 +; R32IDZFBFMIN-NEXT: or a0, a2, a0 +; R32IDZFBFMIN-NEXT: and a0, a4, a0 +; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; R32IDZFBFMIN-NEXT: addi sp, sp, 32 +; R32IDZFBFMIN-NEXT: addi sp, sp, 16 ; R32IDZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_l_bf16_sat: diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index c147d6ec6d9b1..6024a29da33d2 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -692,28 +692,27 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi -; RV32IFD-NEXT: lui a3, 524288 -; RV32IFD-NEXT: li a4, 1 +; RV32IFD-NEXT: lui a4, 524288 ; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: bne s0, a4, .LBB12_2 +; RV32IFD-NEXT: beqz s0, .LBB12_2 ; RV32IFD-NEXT: # %bb.1: # %start ; RV32IFD-NEXT: mv a2, a1 ; RV32IFD-NEXT: .LBB12_2: # %start ; RV32IFD-NEXT: lui a1, %hi(.LCPI12_1) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_1)(a1) -; RV32IFD-NEXT: flt.d a4, fa5, fs0 -; RV32IFD-NEXT: beqz a4, .LBB12_4 +; RV32IFD-NEXT: flt.d a3, fa5, fs0 +; RV32IFD-NEXT: beqz a3, .LBB12_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a3, -1 +; RV32IFD-NEXT: addi a2, a4, -1 ; RV32IFD-NEXT: .LBB12_4: # %start ; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a3, a1 -; RV32IFD-NEXT: and a1, a3, a2 -; RV32IFD-NEXT: neg a2, a4 -; RV32IFD-NEXT: neg a4, s0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: neg a4, a1 +; RV32IFD-NEXT: and a1, a4, a2 +; RV32IFD-NEXT: neg a2, a3 +; RV32IFD-NEXT: neg a3, s0 ; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -790,33 +789,32 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lui a3, 278016 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: li a2, -1 +; RV32I-NEXT: call __gtdf2 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 802304 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixdfdi -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv s4, a1 -; RV32I-NEXT: lui s6, 524288 -; RV32I-NEXT: bgez s2, .LBB12_2 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: lui a0, 524288 +; RV32I-NEXT: bgez s3, .LBB12_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: lui s4, 524288 +; RV32I-NEXT: lui s5, 524288 ; RV32I-NEXT: .LBB12_2: # %start -; RV32I-NEXT: lui a3, 278016 -; RV32I-NEXT: addi a3, a3, -1 -; RV32I-NEXT: li a2, -1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: call __gtdf2 -; RV32I-NEXT: mv s5, a0 -; RV32I-NEXT: blez a0, .LBB12_4 +; RV32I-NEXT: blez s2, .LBB12_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: addi s4, s6, -1 +; RV32I-NEXT: addi s5, a0, -1 ; RV32I-NEXT: .LBB12_4: # %start ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 @@ -825,11 +823,11 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32I-NEXT: call __unorddf2 ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a1, a0, s4 -; RV32I-NEXT: slti a2, s2, 0 +; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: slti a2, s3, 0 ; RV32I-NEXT: addi a2, a2, -1 -; RV32I-NEXT: and a2, a2, s3 -; RV32I-NEXT: sgtz a3, s5 +; RV32I-NEXT: and a2, a2, s4 +; RV32I-NEXT: sgtz a3, s2 ; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: or a2, a3, a2 ; RV32I-NEXT: and a0, a0, a2 @@ -840,7 +838,6 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -949,22 +946,23 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: lui a0, %hi(.LCPI14_0) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a0) -; RV32IFD-NEXT: flt.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: fle.d a0, fa5, fa0 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: neg s0, a0 ; RV32IFD-NEXT: call __fixunsdfdi -; RV32IFD-NEXT: and a0, s1, a0 -; RV32IFD-NEXT: or a0, s0, a0 -; RV32IFD-NEXT: and a1, s1, a1 -; RV32IFD-NEXT: or a1, s0, a1 +; RV32IFD-NEXT: lui a2, %hi(.LCPI14_0) +; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a2) +; RV32IFD-NEXT: and a0, s0, a0 +; RV32IFD-NEXT: flt.d a2, fa5, fs0 +; RV32IFD-NEXT: neg a2, a2 +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a1, s0, a1 +; RV32IFD-NEXT: or a1, a2, a1 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -983,27 +981,24 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: mv s1, a1 -; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: mv s0, a0 -; RV32IZFINXZDINX-NEXT: fle.d a0, a2, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a0 -; RV32IZFINXZDINX-NEXT: mv a0, s0 ; RV32IZFINXZDINX-NEXT: call __fixunsdfdi -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI14_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI14_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI14_0)(a2) -; RV32IZFINXZDINX-NEXT: and a0, s2, a0 -; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0 +; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero +; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI14_0) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a4) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a4) +; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 ; RV32IZFINXZDINX-NEXT: neg a2, a2 -; RV32IZFINXZDINX-NEXT: or a0, a2, a0 -; RV32IZFINXZDINX-NEXT: and a1, s2, a1 -; RV32IZFINXZDINX-NEXT: or a1, a2, a1 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a3, a3 +; RV32IZFINXZDINX-NEXT: or a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a1 +; RV32IZFINXZDINX-NEXT: or a1, a3, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV32IZFINXZDINX-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll index f1c56b320b76c..927eee2e9e545 100644 --- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll @@ -53,28 +53,27 @@ define i64 @test_floor_si64(double %x) nounwind { ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi -; RV32IFD-NEXT: lui a3, 524288 -; RV32IFD-NEXT: li a4, 1 +; RV32IFD-NEXT: lui a4, 524288 ; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: bne s0, a4, .LBB1_2 +; RV32IFD-NEXT: beqz s0, .LBB1_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: mv a2, a1 ; RV32IFD-NEXT: .LBB1_2: ; RV32IFD-NEXT: lui a1, %hi(.LCPI1_1) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI1_1)(a1) -; RV32IFD-NEXT: flt.d a4, fa5, fs0 -; RV32IFD-NEXT: beqz a4, .LBB1_4 +; RV32IFD-NEXT: flt.d a3, fa5, fs0 +; RV32IFD-NEXT: beqz a3, .LBB1_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a3, -1 +; RV32IFD-NEXT: addi a2, a4, -1 ; RV32IFD-NEXT: .LBB1_4: ; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a3, a1 -; RV32IFD-NEXT: and a1, a3, a2 -; RV32IFD-NEXT: neg a2, a4 -; RV32IFD-NEXT: neg a4, s0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: neg a4, a1 +; RV32IFD-NEXT: and a1, a4, a2 +; RV32IFD-NEXT: neg a2, a3 +; RV32IFD-NEXT: neg a3, s0 ; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -299,28 +298,27 @@ define i64 @test_ceil_si64(double %x) nounwind { ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi -; RV32IFD-NEXT: lui a3, 524288 -; RV32IFD-NEXT: li a4, 1 +; RV32IFD-NEXT: lui a4, 524288 ; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: bne s0, a4, .LBB5_2 +; RV32IFD-NEXT: beqz s0, .LBB5_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: mv a2, a1 ; RV32IFD-NEXT: .LBB5_2: ; RV32IFD-NEXT: lui a1, %hi(.LCPI5_1) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI5_1)(a1) -; RV32IFD-NEXT: flt.d a4, fa5, fs0 -; RV32IFD-NEXT: beqz a4, .LBB5_4 +; RV32IFD-NEXT: flt.d a3, fa5, fs0 +; RV32IFD-NEXT: beqz a3, .LBB5_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a3, -1 +; RV32IFD-NEXT: addi a2, a4, -1 ; RV32IFD-NEXT: .LBB5_4: ; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a3, a1 -; RV32IFD-NEXT: and a1, a3, a2 -; RV32IFD-NEXT: neg a2, a4 -; RV32IFD-NEXT: neg a4, s0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: neg a4, a1 +; RV32IFD-NEXT: and a1, a4, a2 +; RV32IFD-NEXT: neg a2, a3 +; RV32IFD-NEXT: neg a3, s0 ; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -545,28 +543,27 @@ define i64 @test_trunc_si64(double %x) nounwind { ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi -; RV32IFD-NEXT: lui a3, 524288 -; RV32IFD-NEXT: li a4, 1 +; RV32IFD-NEXT: lui a4, 524288 ; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: bne s0, a4, .LBB9_2 +; RV32IFD-NEXT: beqz s0, .LBB9_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: mv a2, a1 ; RV32IFD-NEXT: .LBB9_2: ; RV32IFD-NEXT: lui a1, %hi(.LCPI9_1) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI9_1)(a1) -; RV32IFD-NEXT: flt.d a4, fa5, fs0 -; RV32IFD-NEXT: beqz a4, .LBB9_4 +; RV32IFD-NEXT: flt.d a3, fa5, fs0 +; RV32IFD-NEXT: beqz a3, .LBB9_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a3, -1 +; RV32IFD-NEXT: addi a2, a4, -1 ; RV32IFD-NEXT: .LBB9_4: ; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a3, a1 -; RV32IFD-NEXT: and a1, a3, a2 -; RV32IFD-NEXT: neg a2, a4 -; RV32IFD-NEXT: neg a4, s0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: neg a4, a1 +; RV32IFD-NEXT: and a1, a4, a2 +; RV32IFD-NEXT: neg a2, a3 +; RV32IFD-NEXT: neg a3, s0 ; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -791,28 +788,27 @@ define i64 @test_round_si64(double %x) nounwind { ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi -; RV32IFD-NEXT: lui a3, 524288 -; RV32IFD-NEXT: li a4, 1 +; RV32IFD-NEXT: lui a4, 524288 ; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: bne s0, a4, .LBB13_2 +; RV32IFD-NEXT: beqz s0, .LBB13_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: mv a2, a1 ; RV32IFD-NEXT: .LBB13_2: ; RV32IFD-NEXT: lui a1, %hi(.LCPI13_1) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI13_1)(a1) -; RV32IFD-NEXT: flt.d a4, fa5, fs0 -; RV32IFD-NEXT: beqz a4, .LBB13_4 +; RV32IFD-NEXT: flt.d a3, fa5, fs0 +; RV32IFD-NEXT: beqz a3, .LBB13_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a3, -1 +; RV32IFD-NEXT: addi a2, a4, -1 ; RV32IFD-NEXT: .LBB13_4: ; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a3, a1 -; RV32IFD-NEXT: and a1, a3, a2 -; RV32IFD-NEXT: neg a2, a4 -; RV32IFD-NEXT: neg a4, s0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: neg a4, a1 +; RV32IFD-NEXT: and a1, a4, a2 +; RV32IFD-NEXT: neg a2, a3 +; RV32IFD-NEXT: neg a3, s0 ; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -1037,28 +1033,27 @@ define i64 @test_roundeven_si64(double %x) nounwind { ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi -; RV32IFD-NEXT: lui a3, 524288 -; RV32IFD-NEXT: li a4, 1 +; RV32IFD-NEXT: lui a4, 524288 ; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: bne s0, a4, .LBB17_2 +; RV32IFD-NEXT: beqz s0, .LBB17_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: mv a2, a1 ; RV32IFD-NEXT: .LBB17_2: ; RV32IFD-NEXT: lui a1, %hi(.LCPI17_1) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI17_1)(a1) -; RV32IFD-NEXT: flt.d a4, fa5, fs0 -; RV32IFD-NEXT: beqz a4, .LBB17_4 +; RV32IFD-NEXT: flt.d a3, fa5, fs0 +; RV32IFD-NEXT: beqz a3, .LBB17_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a3, -1 +; RV32IFD-NEXT: addi a2, a4, -1 ; RV32IFD-NEXT: .LBB17_4: ; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a3, a1 -; RV32IFD-NEXT: and a1, a3, a2 -; RV32IFD-NEXT: neg a2, a4 -; RV32IFD-NEXT: neg a4, s0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: neg a4, a1 +; RV32IFD-NEXT: and a1, a4, a2 +; RV32IFD-NEXT: neg a2, a3 +; RV32IFD-NEXT: neg a3, s0 ; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload @@ -1283,28 +1278,27 @@ define i64 @test_rint_si64(double %x) nounwind { ; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi -; RV32IFD-NEXT: lui a3, 524288 -; RV32IFD-NEXT: li a4, 1 +; RV32IFD-NEXT: lui a4, 524288 ; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: bne s0, a4, .LBB21_2 +; RV32IFD-NEXT: beqz s0, .LBB21_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: mv a2, a1 ; RV32IFD-NEXT: .LBB21_2: ; RV32IFD-NEXT: lui a1, %hi(.LCPI21_1) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_1)(a1) -; RV32IFD-NEXT: flt.d a4, fa5, fs0 -; RV32IFD-NEXT: beqz a4, .LBB21_4 +; RV32IFD-NEXT: flt.d a3, fa5, fs0 +; RV32IFD-NEXT: beqz a3, .LBB21_4 ; RV32IFD-NEXT: # %bb.3: -; RV32IFD-NEXT: addi a2, a3, -1 +; RV32IFD-NEXT: addi a2, a4, -1 ; RV32IFD-NEXT: .LBB21_4: ; RV32IFD-NEXT: feq.d a1, fs0, fs0 -; RV32IFD-NEXT: neg a3, a1 -; RV32IFD-NEXT: and a1, a3, a2 -; RV32IFD-NEXT: neg a2, a4 -; RV32IFD-NEXT: neg a4, s0 -; RV32IFD-NEXT: and a0, a4, a0 -; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: neg a4, a1 +; RV32IFD-NEXT: and a1, a4, a2 +; RV32IFD-NEXT: neg a2, a3 +; RV32IFD-NEXT: neg a3, s0 ; RV32IFD-NEXT: and a0, a3, a0 +; RV32IFD-NEXT: or a0, a2, a0 +; RV32IFD-NEXT: and a0, a4, a0 ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index 653b64ec73049..7eabd3f5f2273 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -275,26 +275,24 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lui a1, 325632 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: call __gtsf2 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s1, a0 -; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2 ; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: addi s1, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfsi -; RV32I-NEXT: and a0, s2, a0 -; RV32I-NEXT: or a0, s1, a0 +; RV32I-NEXT: and s1, s1, a0 +; RV32I-NEXT: lui a1, 325632 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: or a0, a0, s1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -618,38 +616,36 @@ define i64 @fcvt_l_s_sat(float %a) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fa0 -; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: call __fixsfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI12_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a2) -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: flt.s a3, fa5, fs0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: lui a5, 524288 ; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: lui a2, 524288 ; RV32IF-NEXT: beqz s0, .LBB12_2 ; RV32IF-NEXT: # %bb.1: # %start -; RV32IF-NEXT: mv a4, a1 +; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB12_2: # %start -; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: lui a1, %hi(.LCPI12_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a1) +; RV32IF-NEXT: flt.s a3, fa5, fs0 ; RV32IF-NEXT: beqz a3, .LBB12_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: addi a4, a5, -1 +; RV32IF-NEXT: addi a2, a4, -1 ; RV32IF-NEXT: .LBB12_4: # %start -; RV32IF-NEXT: and a1, a2, a4 +; RV32IF-NEXT: feq.s a1, fs0, fs0 +; RV32IF-NEXT: neg a4, a1 +; RV32IF-NEXT: and a1, a4, a2 +; RV32IF-NEXT: neg a2, s0 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -867,22 +863,23 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: lui a0, %hi(.LCPI14_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a0) -; RV32IF-NEXT: flt.s a0, fa5, fa0 -; RV32IF-NEXT: neg s0, a0 +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: fmv.w.x fa5, zero ; RV32IF-NEXT: fle.s a0, fa5, fa0 -; RV32IF-NEXT: neg s1, a0 +; RV32IF-NEXT: neg s0, a0 ; RV32IF-NEXT: call __fixunssfdi -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: or a0, s0, a0 -; RV32IF-NEXT: and a1, s1, a1 -; RV32IF-NEXT: or a1, s0, a1 +; RV32IF-NEXT: lui a2, %hi(.LCPI14_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a2) +; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: flt.s a2, fa5, fs0 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a1, s0, a1 +; RV32IF-NEXT: or a1, a2, a1 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -901,17 +898,19 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFINX-NEXT: lui a1, %hi(.LCPI14_0) -; RV32IZFINX-NEXT: lw a1, %lo(.LCPI14_0)(a1) -; RV32IZFINX-NEXT: flt.s a1, a1, a0 -; RV32IZFINX-NEXT: neg s0, a1 -; RV32IZFINX-NEXT: fle.s a1, zero, a0 -; RV32IZFINX-NEXT: neg s1, a1 +; RV32IZFINX-NEXT: mv s0, a0 +; RV32IZFINX-NEXT: fle.s a0, zero, a0 +; RV32IZFINX-NEXT: neg s1, a0 +; RV32IZFINX-NEXT: mv a0, s0 ; RV32IZFINX-NEXT: call __fixunssfdi +; RV32IZFINX-NEXT: lui a2, %hi(.LCPI14_0) +; RV32IZFINX-NEXT: lw a2, %lo(.LCPI14_0)(a2) ; RV32IZFINX-NEXT: and a0, s1, a0 -; RV32IZFINX-NEXT: or a0, s0, a0 +; RV32IZFINX-NEXT: flt.s a2, a2, s0 +; RV32IZFINX-NEXT: neg a2, a2 +; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: and a1, s1, a1 -; RV32IZFINX-NEXT: or a1, s0, a1 +; RV32IZFINX-NEXT: or a1, a2, a1 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -929,33 +928,36 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; ; RV32I-LABEL: fcvt_lu_s_sat: ; RV32I: # %bb.0: # %start -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lui a1, 391168 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: call __gtsf2 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s1, a0 -; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2 ; RV32I-NEXT: slti a0, a0, 0 ; RV32I-NEXT: addi s2, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfdi -; RV32I-NEXT: and a0, s2, a0 -; RV32I-NEXT: or a0, s1, a0 -; RV32I-NEXT: and a1, s2, a1 -; RV32I-NEXT: or a1, s1, a1 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, s2, a0 +; RV32I-NEXT: lui a1, 391168 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: or a0, a1, s3 +; RV32I-NEXT: and a2, s2, s1 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fcvt_lu_s_sat: @@ -2089,26 +2091,24 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lui a1, 325632 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: call __gtsf2 -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s1, a0 -; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2 ; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: addi s1, a0, -1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfsi -; RV32I-NEXT: and a0, s2, a0 -; RV32I-NEXT: or a0, s1, a0 +; RV32I-NEXT: and s1, s1, a0 +; RV32I-NEXT: lui a1, 325632 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __gtsf2 +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: or a0, a0, s1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll index 4f747c278da03..5e99c7eb90562 100644 --- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll @@ -37,8 +37,7 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -53,33 +52,32 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 -; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI1_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI1_0)(a2) -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: flt.s a3, fa5, fs0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: lui a5, 524288 ; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: lui a2, 524288 ; RV32IF-NEXT: beqz s0, .LBB1_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a4, a1 +; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB1_4: -; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: lui a1, %hi(.LCPI1_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI1_0)(a1) +; RV32IF-NEXT: flt.s a3, fa5, fs0 ; RV32IF-NEXT: beqz a3, .LBB1_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a4, a5, -1 +; RV32IF-NEXT: addi a2, a4, -1 ; RV32IF-NEXT: .LBB1_6: -; RV32IF-NEXT: and a1, a2, a4 +; RV32IF-NEXT: feq.s a1, fs0, fs0 +; RV32IF-NEXT: neg a4, a1 +; RV32IF-NEXT: and a1, a4, a2 +; RV32IF-NEXT: neg a2, s0 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -117,23 +115,23 @@ define i64 @test_floor_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI1_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI1_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a3, a2, s0 -; RV32IZFINX-NEXT: neg a2, a3 +; RV32IZFINX-NEXT: flt.s a4, a2, s0 +; RV32IZFINX-NEXT: neg a2, a4 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: lui a3, 524288 ; RV32IZFINX-NEXT: beqz s1, .LBB1_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a4, a1 +; RV32IZFINX-NEXT: mv a3, a1 ; RV32IZFINX-NEXT: .LBB1_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a3, .LBB1_6 +; RV32IZFINX-NEXT: beqz a4, .LBB1_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a4, a5, -1 +; RV32IZFINX-NEXT: addi a3, a5, -1 ; RV32IZFINX-NEXT: .LBB1_6: -; RV32IZFINX-NEXT: and a1, a2, a4 +; RV32IZFINX-NEXT: and a1, a2, a3 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -299,8 +297,7 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -315,33 +312,32 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 -; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI5_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI5_0)(a2) -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: flt.s a3, fa5, fs0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: lui a5, 524288 ; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: lui a2, 524288 ; RV32IF-NEXT: beqz s0, .LBB5_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a4, a1 +; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB5_4: -; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: lui a1, %hi(.LCPI5_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI5_0)(a1) +; RV32IF-NEXT: flt.s a3, fa5, fs0 ; RV32IF-NEXT: beqz a3, .LBB5_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a4, a5, -1 +; RV32IF-NEXT: addi a2, a4, -1 ; RV32IF-NEXT: .LBB5_6: -; RV32IF-NEXT: and a1, a2, a4 +; RV32IF-NEXT: feq.s a1, fs0, fs0 +; RV32IF-NEXT: neg a4, a1 +; RV32IF-NEXT: and a1, a4, a2 +; RV32IF-NEXT: neg a2, s0 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -379,23 +375,23 @@ define i64 @test_ceil_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI5_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI5_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a3, a2, s0 -; RV32IZFINX-NEXT: neg a2, a3 +; RV32IZFINX-NEXT: flt.s a4, a2, s0 +; RV32IZFINX-NEXT: neg a2, a4 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: lui a3, 524288 ; RV32IZFINX-NEXT: beqz s1, .LBB5_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a4, a1 +; RV32IZFINX-NEXT: mv a3, a1 ; RV32IZFINX-NEXT: .LBB5_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a3, .LBB5_6 +; RV32IZFINX-NEXT: beqz a4, .LBB5_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a4, a5, -1 +; RV32IZFINX-NEXT: addi a3, a5, -1 ; RV32IZFINX-NEXT: .LBB5_6: -; RV32IZFINX-NEXT: and a1, a2, a4 +; RV32IZFINX-NEXT: and a1, a2, a3 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -561,8 +557,7 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -577,33 +572,32 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 -; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI9_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI9_0)(a2) -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: flt.s a3, fa5, fs0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: lui a5, 524288 ; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: lui a2, 524288 ; RV32IF-NEXT: beqz s0, .LBB9_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a4, a1 +; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB9_4: -; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: lui a1, %hi(.LCPI9_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI9_0)(a1) +; RV32IF-NEXT: flt.s a3, fa5, fs0 ; RV32IF-NEXT: beqz a3, .LBB9_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a4, a5, -1 +; RV32IF-NEXT: addi a2, a4, -1 ; RV32IF-NEXT: .LBB9_6: -; RV32IF-NEXT: and a1, a2, a4 +; RV32IF-NEXT: feq.s a1, fs0, fs0 +; RV32IF-NEXT: neg a4, a1 +; RV32IF-NEXT: and a1, a4, a2 +; RV32IF-NEXT: neg a2, s0 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -641,23 +635,23 @@ define i64 @test_trunc_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI9_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI9_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a3, a2, s0 -; RV32IZFINX-NEXT: neg a2, a3 +; RV32IZFINX-NEXT: flt.s a4, a2, s0 +; RV32IZFINX-NEXT: neg a2, a4 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: lui a3, 524288 ; RV32IZFINX-NEXT: beqz s1, .LBB9_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a4, a1 +; RV32IZFINX-NEXT: mv a3, a1 ; RV32IZFINX-NEXT: .LBB9_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a3, .LBB9_6 +; RV32IZFINX-NEXT: beqz a4, .LBB9_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a4, a5, -1 +; RV32IZFINX-NEXT: addi a3, a5, -1 ; RV32IZFINX-NEXT: .LBB9_6: -; RV32IZFINX-NEXT: and a1, a2, a4 +; RV32IZFINX-NEXT: and a1, a2, a3 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -823,8 +817,7 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -839,33 +832,32 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 -; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI13_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI13_0)(a2) -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: flt.s a3, fa5, fs0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: lui a5, 524288 ; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: lui a2, 524288 ; RV32IF-NEXT: beqz s0, .LBB13_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a4, a1 +; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB13_4: -; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: lui a1, %hi(.LCPI13_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI13_0)(a1) +; RV32IF-NEXT: flt.s a3, fa5, fs0 ; RV32IF-NEXT: beqz a3, .LBB13_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a4, a5, -1 +; RV32IF-NEXT: addi a2, a4, -1 ; RV32IF-NEXT: .LBB13_6: -; RV32IF-NEXT: and a1, a2, a4 +; RV32IF-NEXT: feq.s a1, fs0, fs0 +; RV32IF-NEXT: neg a4, a1 +; RV32IF-NEXT: and a1, a4, a2 +; RV32IF-NEXT: neg a2, s0 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -903,23 +895,23 @@ define i64 @test_round_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI13_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI13_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a3, a2, s0 -; RV32IZFINX-NEXT: neg a2, a3 +; RV32IZFINX-NEXT: flt.s a4, a2, s0 +; RV32IZFINX-NEXT: neg a2, a4 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: lui a3, 524288 ; RV32IZFINX-NEXT: beqz s1, .LBB13_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a4, a1 +; RV32IZFINX-NEXT: mv a3, a1 ; RV32IZFINX-NEXT: .LBB13_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a3, .LBB13_6 +; RV32IZFINX-NEXT: beqz a4, .LBB13_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a4, a5, -1 +; RV32IZFINX-NEXT: addi a3, a5, -1 ; RV32IZFINX-NEXT: .LBB13_6: -; RV32IZFINX-NEXT: and a1, a2, a4 +; RV32IZFINX-NEXT: and a1, a2, a3 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1085,8 +1077,7 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1101,33 +1092,32 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 -; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI17_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI17_0)(a2) -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: flt.s a3, fa5, fs0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: lui a5, 524288 ; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: lui a2, 524288 ; RV32IF-NEXT: beqz s0, .LBB17_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a4, a1 +; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB17_4: -; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: lui a1, %hi(.LCPI17_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI17_0)(a1) +; RV32IF-NEXT: flt.s a3, fa5, fs0 ; RV32IF-NEXT: beqz a3, .LBB17_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a4, a5, -1 +; RV32IF-NEXT: addi a2, a4, -1 ; RV32IF-NEXT: .LBB17_6: -; RV32IF-NEXT: and a1, a2, a4 +; RV32IF-NEXT: feq.s a1, fs0, fs0 +; RV32IF-NEXT: neg a4, a1 +; RV32IF-NEXT: and a1, a4, a2 +; RV32IF-NEXT: neg a2, s0 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1165,23 +1155,23 @@ define i64 @test_roundeven_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI17_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI17_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a3, a2, s0 -; RV32IZFINX-NEXT: neg a2, a3 +; RV32IZFINX-NEXT: flt.s a4, a2, s0 +; RV32IZFINX-NEXT: neg a2, a4 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: lui a3, 524288 ; RV32IZFINX-NEXT: beqz s1, .LBB17_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a4, a1 +; RV32IZFINX-NEXT: mv a3, a1 ; RV32IZFINX-NEXT: .LBB17_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a3, .LBB17_6 +; RV32IZFINX-NEXT: beqz a4, .LBB17_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a4, a5, -1 +; RV32IZFINX-NEXT: addi a3, a5, -1 ; RV32IZFINX-NEXT: .LBB17_6: -; RV32IZFINX-NEXT: and a1, a2, a4 +; RV32IZFINX-NEXT: and a1, a2, a3 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1347,8 +1337,7 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa0 ; RV32IF-NEXT: lui a0, 307200 ; RV32IF-NEXT: fmv.w.x fa5, a0 @@ -1363,33 +1352,32 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IF-NEXT: lui a0, 913408 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fle.s s0, fa5, fs0 -; RV32IF-NEXT: neg s1, s0 ; RV32IF-NEXT: fmv.s fa0, fs0 ; RV32IF-NEXT: call __fixsfdi -; RV32IF-NEXT: lui a2, %hi(.LCPI21_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a2) -; RV32IF-NEXT: and a0, s1, a0 -; RV32IF-NEXT: flt.s a3, fa5, fs0 -; RV32IF-NEXT: neg a2, a3 -; RV32IF-NEXT: or a0, a2, a0 -; RV32IF-NEXT: feq.s a2, fs0, fs0 -; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: lui a5, 524288 ; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: lui a2, 524288 ; RV32IF-NEXT: beqz s0, .LBB21_4 ; RV32IF-NEXT: # %bb.3: -; RV32IF-NEXT: mv a4, a1 +; RV32IF-NEXT: mv a2, a1 ; RV32IF-NEXT: .LBB21_4: -; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: lui a1, %hi(.LCPI21_0) +; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a1) +; RV32IF-NEXT: flt.s a3, fa5, fs0 ; RV32IF-NEXT: beqz a3, .LBB21_6 ; RV32IF-NEXT: # %bb.5: -; RV32IF-NEXT: addi a4, a5, -1 +; RV32IF-NEXT: addi a2, a4, -1 ; RV32IF-NEXT: .LBB21_6: -; RV32IF-NEXT: and a1, a2, a4 +; RV32IF-NEXT: feq.s a1, fs0, fs0 +; RV32IF-NEXT: neg a4, a1 +; RV32IF-NEXT: and a1, a4, a2 +; RV32IF-NEXT: neg a2, s0 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: neg a2, a3 +; RV32IF-NEXT: or a0, a2, a0 +; RV32IF-NEXT: and a0, a4, a0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -1427,23 +1415,23 @@ define i64 @test_rint_si64(float %x) nounwind { ; RV32IZFINX-NEXT: lui a2, %hi(.LCPI21_0) ; RV32IZFINX-NEXT: lw a2, %lo(.LCPI21_0)(a2) ; RV32IZFINX-NEXT: and a0, s2, a0 -; RV32IZFINX-NEXT: flt.s a3, a2, s0 -; RV32IZFINX-NEXT: neg a2, a3 +; RV32IZFINX-NEXT: flt.s a4, a2, s0 +; RV32IZFINX-NEXT: neg a2, a4 ; RV32IZFINX-NEXT: or a0, a2, a0 ; RV32IZFINX-NEXT: feq.s a2, s0, s0 ; RV32IZFINX-NEXT: neg a2, a2 ; RV32IZFINX-NEXT: lui a5, 524288 -; RV32IZFINX-NEXT: lui a4, 524288 +; RV32IZFINX-NEXT: lui a3, 524288 ; RV32IZFINX-NEXT: beqz s1, .LBB21_4 ; RV32IZFINX-NEXT: # %bb.3: -; RV32IZFINX-NEXT: mv a4, a1 +; RV32IZFINX-NEXT: mv a3, a1 ; RV32IZFINX-NEXT: .LBB21_4: ; RV32IZFINX-NEXT: and a0, a2, a0 -; RV32IZFINX-NEXT: beqz a3, .LBB21_6 +; RV32IZFINX-NEXT: beqz a4, .LBB21_6 ; RV32IZFINX-NEXT: # %bb.5: -; RV32IZFINX-NEXT: addi a4, a5, -1 +; RV32IZFINX-NEXT: addi a3, a5, -1 ; RV32IZFINX-NEXT: .LBB21_6: -; RV32IZFINX-NEXT: and a1, a2, a4 +; RV32IZFINX-NEXT: and a1, a2, a3 ; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll index c303690aadfff..f6a53a9d76dd3 100644 --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -3567,8 +3567,8 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind { ; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1 ; RV32-NEXT: neg a3, a0 ; RV32-NEXT: and a3, a3, a1 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a4, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a1, sp ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 @@ -3659,8 +3659,8 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind { ; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1 ; RV32-NEXT: neg a3, a0 ; RV32-NEXT: and a3, a3, a1 -; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: sw a4, 0(sp) +; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a1, sp ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 06ab813faf025..deb5a6d4013d4 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -114,8 +114,8 @@ define i32 @utest_f64i32(double %x) { ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: .cfi_offset ra, -4 ; RV32IF-NEXT: call __fixunsdfdi -; RV32IF-NEXT: seqz a1, a1 ; RV32IF-NEXT: sltiu a2, a0, -1 +; RV32IF-NEXT: seqz a1, a1 ; RV32IF-NEXT: and a1, a1, a2 ; RV32IF-NEXT: addi a1, a1, -1 ; RV32IF-NEXT: or a0, a1, a0 @@ -429,8 +429,8 @@ define i32 @utesth_f16i32(half %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: call __fixunssfdi -; RV32-NEXT: seqz a1, a1 ; RV32-NEXT: sltiu a2, a0, -1 +; RV32-NEXT: seqz a1, a1 ; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: or a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index 277749c75bbbf..31fb6e2ee9c84 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -2145,47 +2145,41 @@ define i64 @fcvt_l_h(half %a) nounwind { define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_l_h_sat: ; RV32IZFH: # %bb.0: # %start -; RV32IZFH-NEXT: addi sp, sp, -32 -; RV32IZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: lui a0, %hi(.LCPI10_0) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a0) +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IZFH-NEXT: flt.s s0, fa5, fs0 -; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 -; RV32IZFH-NEXT: fle.s s2, fa5, fs0 -; RV32IZFH-NEXT: neg s3, s2 +; RV32IZFH-NEXT: fle.s s0, fa5, fs0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi -; RV32IZFH-NEXT: and a0, s3, a0 -; RV32IZFH-NEXT: or a0, s1, a0 -; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: neg a2, a2 ; RV32IZFH-NEXT: lui a4, 524288 -; RV32IZFH-NEXT: lui a3, 524288 -; RV32IZFH-NEXT: beqz s2, .LBB10_2 +; RV32IZFH-NEXT: lui a2, 524288 +; RV32IZFH-NEXT: beqz s0, .LBB10_2 ; RV32IZFH-NEXT: # %bb.1: # %start -; RV32IZFH-NEXT: mv a3, a1 +; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB10_2: # %start -; RV32IZFH-NEXT: and a0, a2, a0 -; RV32IZFH-NEXT: beqz s0, .LBB10_4 +; RV32IZFH-NEXT: lui a1, %hi(.LCPI10_0) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: beqz a3, .LBB10_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: addi a3, a4, -1 +; RV32IZFH-NEXT: addi a2, a4, -1 ; RV32IZFH-NEXT: .LBB10_4: # %start -; RV32IZFH-NEXT: and a1, a2, a3 -; RV32IZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 32 +; RV32IZFH-NEXT: feq.s a1, fs0, fs0 +; RV32IZFH-NEXT: neg a4, a1 +; RV32IZFH-NEXT: and a1, a4, a2 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: neg a3, s0 +; RV32IZFH-NEXT: and a0, a3, a0 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a4, a0 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_l_h_sat: @@ -2199,47 +2193,41 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32IDZFH-LABEL: fcvt_l_h_sat: ; RV32IDZFH: # %bb.0: # %start -; RV32IDZFH-NEXT: addi sp, sp, -32 -; RV32IDZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IDZFH-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill -; RV32IDZFH-NEXT: lui a0, %hi(.LCPI10_0) -; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IDZFH-NEXT: fcvt.s.h fs0, fa0 -; RV32IDZFH-NEXT: flt.s s0, fa5, fs0 -; RV32IDZFH-NEXT: neg s1, s0 ; RV32IDZFH-NEXT: lui a0, 913408 ; RV32IDZFH-NEXT: fmv.w.x fa5, a0 -; RV32IDZFH-NEXT: fle.s s2, fa5, fs0 -; RV32IDZFH-NEXT: neg s3, s2 +; RV32IDZFH-NEXT: fle.s s0, fa5, fs0 ; RV32IDZFH-NEXT: fmv.s fa0, fs0 ; RV32IDZFH-NEXT: call __fixsfdi -; RV32IDZFH-NEXT: and a0, s3, a0 -; RV32IDZFH-NEXT: or a0, s1, a0 -; RV32IDZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IDZFH-NEXT: neg a2, a2 ; RV32IDZFH-NEXT: lui a4, 524288 -; RV32IDZFH-NEXT: lui a3, 524288 -; RV32IDZFH-NEXT: beqz s2, .LBB10_2 +; RV32IDZFH-NEXT: lui a2, 524288 +; RV32IDZFH-NEXT: beqz s0, .LBB10_2 ; RV32IDZFH-NEXT: # %bb.1: # %start -; RV32IDZFH-NEXT: mv a3, a1 +; RV32IDZFH-NEXT: mv a2, a1 ; RV32IDZFH-NEXT: .LBB10_2: # %start -; RV32IDZFH-NEXT: and a0, a2, a0 -; RV32IDZFH-NEXT: beqz s0, .LBB10_4 +; RV32IDZFH-NEXT: lui a1, %hi(.LCPI10_0) +; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32IDZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IDZFH-NEXT: beqz a3, .LBB10_4 ; RV32IDZFH-NEXT: # %bb.3: -; RV32IDZFH-NEXT: addi a3, a4, -1 +; RV32IDZFH-NEXT: addi a2, a4, -1 ; RV32IDZFH-NEXT: .LBB10_4: # %start -; RV32IDZFH-NEXT: and a1, a2, a3 -; RV32IDZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32IDZFH-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: feq.s a1, fs0, fs0 +; RV32IDZFH-NEXT: neg a4, a1 +; RV32IDZFH-NEXT: and a1, a4, a2 +; RV32IDZFH-NEXT: neg a2, a3 +; RV32IDZFH-NEXT: neg a3, s0 +; RV32IDZFH-NEXT: and a0, a3, a0 +; RV32IDZFH-NEXT: or a0, a2, a0 +; RV32IDZFH-NEXT: and a0, a4, a0 +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IDZFH-NEXT: addi sp, sp, 32 +; RV32IDZFH-NEXT: addi sp, sp, 16 ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_l_h_sat: @@ -2515,47 +2503,41 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32ID-LABEL: fcvt_l_h_sat: ; RV32ID: # %bb.0: # %start -; RV32ID-NEXT: addi sp, sp, -32 -; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32ID-NEXT: addi sp, sp, -16 +; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 -; RV32ID-NEXT: lui a0, %hi(.LCPI10_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32ID-NEXT: fmv.s fs0, fa0 -; RV32ID-NEXT: flt.s s0, fa5, fa0 -; RV32ID-NEXT: neg s1, s0 ; RV32ID-NEXT: lui a0, 913408 ; RV32ID-NEXT: fmv.w.x fa5, a0 -; RV32ID-NEXT: fle.s s2, fa5, fa0 -; RV32ID-NEXT: neg s3, s2 +; RV32ID-NEXT: fle.s s0, fa5, fa0 ; RV32ID-NEXT: call __fixsfdi -; RV32ID-NEXT: and a0, s3, a0 -; RV32ID-NEXT: or a0, s1, a0 -; RV32ID-NEXT: feq.s a2, fs0, fs0 -; RV32ID-NEXT: neg a2, a2 ; RV32ID-NEXT: lui a4, 524288 -; RV32ID-NEXT: lui a3, 524288 -; RV32ID-NEXT: beqz s2, .LBB10_2 +; RV32ID-NEXT: lui a2, 524288 +; RV32ID-NEXT: beqz s0, .LBB10_2 ; RV32ID-NEXT: # %bb.1: # %start -; RV32ID-NEXT: mv a3, a1 +; RV32ID-NEXT: mv a2, a1 ; RV32ID-NEXT: .LBB10_2: # %start -; RV32ID-NEXT: and a0, a2, a0 -; RV32ID-NEXT: beqz s0, .LBB10_4 +; RV32ID-NEXT: lui a1, %hi(.LCPI10_0) +; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32ID-NEXT: flt.s a3, fa5, fs0 +; RV32ID-NEXT: beqz a3, .LBB10_4 ; RV32ID-NEXT: # %bb.3: -; RV32ID-NEXT: addi a3, a4, -1 +; RV32ID-NEXT: addi a2, a4, -1 ; RV32ID-NEXT: .LBB10_4: # %start -; RV32ID-NEXT: and a1, a2, a3 -; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32ID-NEXT: feq.s a1, fs0, fs0 +; RV32ID-NEXT: neg a4, a1 +; RV32ID-NEXT: and a1, a4, a2 +; RV32ID-NEXT: neg a2, s0 +; RV32ID-NEXT: and a0, a2, a0 +; RV32ID-NEXT: neg a2, a3 +; RV32ID-NEXT: or a0, a2, a0 +; RV32ID-NEXT: and a0, a4, a0 +; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32ID-NEXT: addi sp, sp, 32 +; RV32ID-NEXT: addi sp, sp, 16 ; RV32ID-NEXT: ret ; ; RV64ID-LABEL: fcvt_l_h_sat: @@ -2574,47 +2556,41 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32IFZFHMIN-LABEL: fcvt_l_h_sat: ; RV32IFZFHMIN: # %bb.0: # %start -; RV32IFZFHMIN-NEXT: addi sp, sp, -32 -; RV32IFZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32IFZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IFZFHMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IFZFHMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IFZFHMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32IFZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill -; RV32IFZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) -; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) +; RV32IFZFHMIN-NEXT: addi sp, sp, -16 +; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IFZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IFZFHMIN-NEXT: fcvt.s.h fs0, fa0 -; RV32IFZFHMIN-NEXT: flt.s s0, fa5, fs0 -; RV32IFZFHMIN-NEXT: neg s1, s0 ; RV32IFZFHMIN-NEXT: lui a0, 913408 ; RV32IFZFHMIN-NEXT: fmv.w.x fa5, a0 -; RV32IFZFHMIN-NEXT: fle.s s2, fa5, fs0 -; RV32IFZFHMIN-NEXT: neg s3, s2 +; RV32IFZFHMIN-NEXT: fle.s s0, fa5, fs0 ; RV32IFZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IFZFHMIN-NEXT: call __fixsfdi -; RV32IFZFHMIN-NEXT: and a0, s3, a0 -; RV32IFZFHMIN-NEXT: or a0, s1, a0 -; RV32IFZFHMIN-NEXT: feq.s a2, fs0, fs0 -; RV32IFZFHMIN-NEXT: neg a2, a2 ; RV32IFZFHMIN-NEXT: lui a4, 524288 -; RV32IFZFHMIN-NEXT: lui a3, 524288 -; RV32IFZFHMIN-NEXT: beqz s2, .LBB10_2 +; RV32IFZFHMIN-NEXT: lui a2, 524288 +; RV32IFZFHMIN-NEXT: beqz s0, .LBB10_2 ; RV32IFZFHMIN-NEXT: # %bb.1: # %start -; RV32IFZFHMIN-NEXT: mv a3, a1 +; RV32IFZFHMIN-NEXT: mv a2, a1 ; RV32IFZFHMIN-NEXT: .LBB10_2: # %start -; RV32IFZFHMIN-NEXT: and a0, a2, a0 -; RV32IFZFHMIN-NEXT: beqz s0, .LBB10_4 +; RV32IFZFHMIN-NEXT: lui a1, %hi(.LCPI10_0) +; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32IFZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IFZFHMIN-NEXT: beqz a3, .LBB10_4 ; RV32IFZFHMIN-NEXT: # %bb.3: -; RV32IFZFHMIN-NEXT: addi a3, a4, -1 +; RV32IFZFHMIN-NEXT: addi a2, a4, -1 ; RV32IFZFHMIN-NEXT: .LBB10_4: # %start -; RV32IFZFHMIN-NEXT: and a1, a2, a3 -; RV32IFZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload -; RV32IFZFHMIN-NEXT: addi sp, sp, 32 +; RV32IFZFHMIN-NEXT: feq.s a1, fs0, fs0 +; RV32IFZFHMIN-NEXT: neg a4, a1 +; RV32IFZFHMIN-NEXT: and a1, a4, a2 +; RV32IFZFHMIN-NEXT: neg a2, a3 +; RV32IFZFHMIN-NEXT: neg a3, s0 +; RV32IFZFHMIN-NEXT: and a0, a3, a0 +; RV32IFZFHMIN-NEXT: or a0, a2, a0 +; RV32IFZFHMIN-NEXT: and a0, a4, a0 +; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; RV32IFZFHMIN-NEXT: addi sp, sp, 16 ; RV32IFZFHMIN-NEXT: ret ; ; CHECK64-IZFHMIN-LABEL: fcvt_l_h_sat: @@ -2629,47 +2605,41 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; ; RV32IDZFHMIN-LABEL: fcvt_l_h_sat: ; RV32IDZFHMIN: # %bb.0: # %start -; RV32IDZFHMIN-NEXT: addi sp, sp, -32 -; RV32IDZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32IDZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IDZFHMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IDZFHMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IDZFHMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: addi sp, sp, -16 +; RV32IDZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IDZFHMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill -; RV32IDZFHMIN-NEXT: lui a0, %hi(.LCPI10_0) -; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0) ; RV32IDZFHMIN-NEXT: fcvt.s.h fs0, fa0 -; RV32IDZFHMIN-NEXT: flt.s s0, fa5, fs0 -; RV32IDZFHMIN-NEXT: neg s1, s0 ; RV32IDZFHMIN-NEXT: lui a0, 913408 ; RV32IDZFHMIN-NEXT: fmv.w.x fa5, a0 -; RV32IDZFHMIN-NEXT: fle.s s2, fa5, fs0 -; RV32IDZFHMIN-NEXT: neg s3, s2 +; RV32IDZFHMIN-NEXT: fle.s s0, fa5, fs0 ; RV32IDZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IDZFHMIN-NEXT: call __fixsfdi -; RV32IDZFHMIN-NEXT: and a0, s3, a0 -; RV32IDZFHMIN-NEXT: or a0, s1, a0 -; RV32IDZFHMIN-NEXT: feq.s a2, fs0, fs0 -; RV32IDZFHMIN-NEXT: neg a2, a2 ; RV32IDZFHMIN-NEXT: lui a4, 524288 -; RV32IDZFHMIN-NEXT: lui a3, 524288 -; RV32IDZFHMIN-NEXT: beqz s2, .LBB10_2 +; RV32IDZFHMIN-NEXT: lui a2, 524288 +; RV32IDZFHMIN-NEXT: beqz s0, .LBB10_2 ; RV32IDZFHMIN-NEXT: # %bb.1: # %start -; RV32IDZFHMIN-NEXT: mv a3, a1 +; RV32IDZFHMIN-NEXT: mv a2, a1 ; RV32IDZFHMIN-NEXT: .LBB10_2: # %start -; RV32IDZFHMIN-NEXT: and a0, a2, a0 -; RV32IDZFHMIN-NEXT: beqz s0, .LBB10_4 +; RV32IDZFHMIN-NEXT: lui a1, %hi(.LCPI10_0) +; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1) +; RV32IDZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IDZFHMIN-NEXT: beqz a3, .LBB10_4 ; RV32IDZFHMIN-NEXT: # %bb.3: -; RV32IDZFHMIN-NEXT: addi a3, a4, -1 +; RV32IDZFHMIN-NEXT: addi a2, a4, -1 ; RV32IDZFHMIN-NEXT: .LBB10_4: # %start -; RV32IDZFHMIN-NEXT: and a1, a2, a3 -; RV32IDZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32IDZFHMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IDZFHMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IDZFHMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32IDZFHMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: feq.s a1, fs0, fs0 +; RV32IDZFHMIN-NEXT: neg a4, a1 +; RV32IDZFHMIN-NEXT: and a1, a4, a2 +; RV32IDZFHMIN-NEXT: neg a2, a3 +; RV32IDZFHMIN-NEXT: neg a3, s0 +; RV32IDZFHMIN-NEXT: and a0, a3, a0 +; RV32IDZFHMIN-NEXT: or a0, a2, a0 +; RV32IDZFHMIN-NEXT: and a0, a4, a0 +; RV32IDZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IDZFHMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IDZFHMIN-NEXT: addi sp, sp, 32 +; RV32IDZFHMIN-NEXT: addi sp, sp, 16 ; RV32IDZFHMIN-NEXT: ret ; ; CHECK32-IZHINXMIN-LABEL: fcvt_l_h_sat: diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll index 9c95210bfa7c0..04a8a66f44598 100644 --- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll @@ -108,40 +108,38 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 -; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI1_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI1_1)(a2) -; RV32IZFH-NEXT: and a0, s1, a0 -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: lui a5, 524288 ; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: lui a2, 524288 ; RV32IZFH-NEXT: beqz s0, .LBB1_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a4, a1 +; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB1_4: +; RV32IZFH-NEXT: lui a1, %hi(.LCPI1_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI1_1)(a1) +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: beqz a3, .LBB1_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a2, a4, -1 +; RV32IZFH-NEXT: .LBB1_6: +; RV32IZFH-NEXT: feq.s a1, fs0, fs0 +; RV32IZFH-NEXT: neg a4, a1 +; RV32IZFH-NEXT: and a1, a4, a2 +; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: beqz a3, .LBB1_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a4, a5, -1 -; RV32IZFH-NEXT: .LBB1_6: -; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_floor_si64: @@ -179,16 +177,16 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI1_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI1_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a3, a2, s0 -; RV32IZHINX-NEXT: neg a2, a3 +; RV32IZHINX-NEXT: flt.s a4, a2, s0 +; RV32IZHINX-NEXT: neg a2, a4 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: lui a3, 524288 ; RV32IZHINX-NEXT: beqz s1, .LBB1_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a4, a1 +; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB1_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -196,11 +194,11 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a3, .LBB1_6 +; RV32IZHINX-NEXT: beqz a4, .LBB1_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a4, a5, -1 +; RV32IZHINX-NEXT: addi a3, a5, -1 ; RV32IZHINX-NEXT: .LBB1_6: -; RV32IZHINX-NEXT: and a1, a2, a4 +; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_floor_si64: @@ -238,41 +236,39 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI1_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI1_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s1, a0 -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: lui a5, 524288 ; RV32IZFHMIN-NEXT: lui a4, 524288 +; RV32IZFHMIN-NEXT: lui a2, 524288 ; RV32IZFHMIN-NEXT: beqz s0, .LBB1_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a4, a1 +; RV32IZFHMIN-NEXT: mv a2, a1 ; RV32IZFHMIN-NEXT: .LBB1_4: +; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI1_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI1_0)(a1) +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: beqz a3, .LBB1_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a2, a4, -1 +; RV32IZFHMIN-NEXT: .LBB1_6: +; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a4, a1 +; RV32IZFHMIN-NEXT: and a1, a4, a2 +; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 -; RV32IZFHMIN-NEXT: beqz a3, .LBB1_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a4, a5, -1 -; RV32IZFHMIN-NEXT: .LBB1_6: -; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_floor_si64: @@ -324,16 +320,16 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI1_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI1_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a3 +; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a4 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: lui a3, 524288 ; RV32IZHINXMIN-NEXT: beqz s1, .LBB1_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a4, a1 +; RV32IZHINXMIN-NEXT: mv a3, a1 ; RV32IZHINXMIN-NEXT: .LBB1_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -341,11 +337,11 @@ define i64 @test_floor_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a3, .LBB1_6 +; RV32IZHINXMIN-NEXT: beqz a4, .LBB1_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a4, a5, -1 +; RV32IZHINXMIN-NEXT: addi a3, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB1_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a4 +; RV32IZHINXMIN-NEXT: and a1, a2, a3 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_floor_si64: @@ -824,40 +820,38 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 -; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI5_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI5_1)(a2) -; RV32IZFH-NEXT: and a0, s1, a0 -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: lui a5, 524288 ; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: lui a2, 524288 ; RV32IZFH-NEXT: beqz s0, .LBB5_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a4, a1 +; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB5_4: +; RV32IZFH-NEXT: lui a1, %hi(.LCPI5_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI5_1)(a1) +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: beqz a3, .LBB5_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a2, a4, -1 +; RV32IZFH-NEXT: .LBB5_6: +; RV32IZFH-NEXT: feq.s a1, fs0, fs0 +; RV32IZFH-NEXT: neg a4, a1 +; RV32IZFH-NEXT: and a1, a4, a2 +; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: beqz a3, .LBB5_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a4, a5, -1 -; RV32IZFH-NEXT: .LBB5_6: -; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_ceil_si64: @@ -895,16 +889,16 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI5_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI5_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a3, a2, s0 -; RV32IZHINX-NEXT: neg a2, a3 +; RV32IZHINX-NEXT: flt.s a4, a2, s0 +; RV32IZHINX-NEXT: neg a2, a4 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: lui a3, 524288 ; RV32IZHINX-NEXT: beqz s1, .LBB5_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a4, a1 +; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB5_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -912,11 +906,11 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a3, .LBB5_6 +; RV32IZHINX-NEXT: beqz a4, .LBB5_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a4, a5, -1 +; RV32IZHINX-NEXT: addi a3, a5, -1 ; RV32IZHINX-NEXT: .LBB5_6: -; RV32IZHINX-NEXT: and a1, a2, a4 +; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_ceil_si64: @@ -954,41 +948,39 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI5_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI5_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s1, a0 -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: lui a5, 524288 ; RV32IZFHMIN-NEXT: lui a4, 524288 +; RV32IZFHMIN-NEXT: lui a2, 524288 ; RV32IZFHMIN-NEXT: beqz s0, .LBB5_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a4, a1 +; RV32IZFHMIN-NEXT: mv a2, a1 ; RV32IZFHMIN-NEXT: .LBB5_4: +; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI5_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI5_0)(a1) +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: beqz a3, .LBB5_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a2, a4, -1 +; RV32IZFHMIN-NEXT: .LBB5_6: +; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a4, a1 +; RV32IZFHMIN-NEXT: and a1, a4, a2 +; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 -; RV32IZFHMIN-NEXT: beqz a3, .LBB5_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a4, a5, -1 -; RV32IZFHMIN-NEXT: .LBB5_6: -; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_ceil_si64: @@ -1040,16 +1032,16 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI5_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI5_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a3 +; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a4 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: lui a3, 524288 ; RV32IZHINXMIN-NEXT: beqz s1, .LBB5_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a4, a1 +; RV32IZHINXMIN-NEXT: mv a3, a1 ; RV32IZHINXMIN-NEXT: .LBB5_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1057,11 +1049,11 @@ define i64 @test_ceil_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a3, .LBB5_6 +; RV32IZHINXMIN-NEXT: beqz a4, .LBB5_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a4, a5, -1 +; RV32IZHINXMIN-NEXT: addi a3, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB5_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a4 +; RV32IZHINXMIN-NEXT: and a1, a2, a3 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_ceil_si64: @@ -1540,40 +1532,38 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 -; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI9_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI9_1)(a2) -; RV32IZFH-NEXT: and a0, s1, a0 -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: lui a5, 524288 ; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: lui a2, 524288 ; RV32IZFH-NEXT: beqz s0, .LBB9_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a4, a1 +; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB9_4: +; RV32IZFH-NEXT: lui a1, %hi(.LCPI9_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI9_1)(a1) +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: beqz a3, .LBB9_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a2, a4, -1 +; RV32IZFH-NEXT: .LBB9_6: +; RV32IZFH-NEXT: feq.s a1, fs0, fs0 +; RV32IZFH-NEXT: neg a4, a1 +; RV32IZFH-NEXT: and a1, a4, a2 +; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: beqz a3, .LBB9_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a4, a5, -1 -; RV32IZFH-NEXT: .LBB9_6: -; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_trunc_si64: @@ -1611,16 +1601,16 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI9_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI9_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a3, a2, s0 -; RV32IZHINX-NEXT: neg a2, a3 +; RV32IZHINX-NEXT: flt.s a4, a2, s0 +; RV32IZHINX-NEXT: neg a2, a4 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: lui a3, 524288 ; RV32IZHINX-NEXT: beqz s1, .LBB9_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a4, a1 +; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB9_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1628,11 +1618,11 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a3, .LBB9_6 +; RV32IZHINX-NEXT: beqz a4, .LBB9_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a4, a5, -1 +; RV32IZHINX-NEXT: addi a3, a5, -1 ; RV32IZHINX-NEXT: .LBB9_6: -; RV32IZHINX-NEXT: and a1, a2, a4 +; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_trunc_si64: @@ -1670,41 +1660,39 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI9_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI9_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s1, a0 -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: lui a5, 524288 ; RV32IZFHMIN-NEXT: lui a4, 524288 +; RV32IZFHMIN-NEXT: lui a2, 524288 ; RV32IZFHMIN-NEXT: beqz s0, .LBB9_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a4, a1 +; RV32IZFHMIN-NEXT: mv a2, a1 ; RV32IZFHMIN-NEXT: .LBB9_4: +; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI9_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI9_0)(a1) +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: beqz a3, .LBB9_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a2, a4, -1 +; RV32IZFHMIN-NEXT: .LBB9_6: +; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a4, a1 +; RV32IZFHMIN-NEXT: and a1, a4, a2 +; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 -; RV32IZFHMIN-NEXT: beqz a3, .LBB9_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a4, a5, -1 -; RV32IZFHMIN-NEXT: .LBB9_6: -; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_trunc_si64: @@ -1756,16 +1744,16 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI9_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI9_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a3 +; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a4 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: lui a3, 524288 ; RV32IZHINXMIN-NEXT: beqz s1, .LBB9_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a4, a1 +; RV32IZHINXMIN-NEXT: mv a3, a1 ; RV32IZHINXMIN-NEXT: .LBB9_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1773,11 +1761,11 @@ define i64 @test_trunc_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a3, .LBB9_6 +; RV32IZHINXMIN-NEXT: beqz a4, .LBB9_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a4, a5, -1 +; RV32IZHINXMIN-NEXT: addi a3, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB9_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a4 +; RV32IZHINXMIN-NEXT: and a1, a2, a3 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_trunc_si64: @@ -2256,40 +2244,38 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 -; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI13_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI13_1)(a2) -; RV32IZFH-NEXT: and a0, s1, a0 -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: lui a5, 524288 ; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: lui a2, 524288 ; RV32IZFH-NEXT: beqz s0, .LBB13_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a4, a1 +; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB13_4: +; RV32IZFH-NEXT: lui a1, %hi(.LCPI13_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI13_1)(a1) +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: beqz a3, .LBB13_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a2, a4, -1 +; RV32IZFH-NEXT: .LBB13_6: +; RV32IZFH-NEXT: feq.s a1, fs0, fs0 +; RV32IZFH-NEXT: neg a4, a1 +; RV32IZFH-NEXT: and a1, a4, a2 +; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: beqz a3, .LBB13_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a4, a5, -1 -; RV32IZFH-NEXT: .LBB13_6: -; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_round_si64: @@ -2327,16 +2313,16 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI13_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI13_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a3, a2, s0 -; RV32IZHINX-NEXT: neg a2, a3 +; RV32IZHINX-NEXT: flt.s a4, a2, s0 +; RV32IZHINX-NEXT: neg a2, a4 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: lui a3, 524288 ; RV32IZHINX-NEXT: beqz s1, .LBB13_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a4, a1 +; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB13_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -2344,11 +2330,11 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a3, .LBB13_6 +; RV32IZHINX-NEXT: beqz a4, .LBB13_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a4, a5, -1 +; RV32IZHINX-NEXT: addi a3, a5, -1 ; RV32IZHINX-NEXT: .LBB13_6: -; RV32IZHINX-NEXT: and a1, a2, a4 +; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_round_si64: @@ -2386,41 +2372,39 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI13_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI13_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s1, a0 -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: lui a5, 524288 ; RV32IZFHMIN-NEXT: lui a4, 524288 +; RV32IZFHMIN-NEXT: lui a2, 524288 ; RV32IZFHMIN-NEXT: beqz s0, .LBB13_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a4, a1 +; RV32IZFHMIN-NEXT: mv a2, a1 ; RV32IZFHMIN-NEXT: .LBB13_4: +; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI13_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI13_0)(a1) +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: beqz a3, .LBB13_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a2, a4, -1 +; RV32IZFHMIN-NEXT: .LBB13_6: +; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a4, a1 +; RV32IZFHMIN-NEXT: and a1, a4, a2 +; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 -; RV32IZFHMIN-NEXT: beqz a3, .LBB13_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a4, a5, -1 -; RV32IZFHMIN-NEXT: .LBB13_6: -; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_round_si64: @@ -2472,16 +2456,16 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI13_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI13_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a3 +; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a4 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: lui a3, 524288 ; RV32IZHINXMIN-NEXT: beqz s1, .LBB13_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a4, a1 +; RV32IZHINXMIN-NEXT: mv a3, a1 ; RV32IZHINXMIN-NEXT: .LBB13_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -2489,11 +2473,11 @@ define i64 @test_round_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a3, .LBB13_6 +; RV32IZHINXMIN-NEXT: beqz a4, .LBB13_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a4, a5, -1 +; RV32IZHINXMIN-NEXT: addi a3, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB13_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a4 +; RV32IZHINXMIN-NEXT: and a1, a2, a3 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_round_si64: @@ -2972,40 +2956,38 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 -; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI17_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI17_1)(a2) -; RV32IZFH-NEXT: and a0, s1, a0 -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: lui a5, 524288 ; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: lui a2, 524288 ; RV32IZFH-NEXT: beqz s0, .LBB17_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a4, a1 +; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB17_4: +; RV32IZFH-NEXT: lui a1, %hi(.LCPI17_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI17_1)(a1) +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: beqz a3, .LBB17_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a2, a4, -1 +; RV32IZFH-NEXT: .LBB17_6: +; RV32IZFH-NEXT: feq.s a1, fs0, fs0 +; RV32IZFH-NEXT: neg a4, a1 +; RV32IZFH-NEXT: and a1, a4, a2 +; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: beqz a3, .LBB17_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a4, a5, -1 -; RV32IZFH-NEXT: .LBB17_6: -; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_roundeven_si64: @@ -3043,16 +3025,16 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI17_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI17_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a3, a2, s0 -; RV32IZHINX-NEXT: neg a2, a3 +; RV32IZHINX-NEXT: flt.s a4, a2, s0 +; RV32IZHINX-NEXT: neg a2, a4 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: lui a3, 524288 ; RV32IZHINX-NEXT: beqz s1, .LBB17_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a4, a1 +; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB17_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3060,11 +3042,11 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a3, .LBB17_6 +; RV32IZHINX-NEXT: beqz a4, .LBB17_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a4, a5, -1 +; RV32IZHINX-NEXT: addi a3, a5, -1 ; RV32IZHINX-NEXT: .LBB17_6: -; RV32IZHINX-NEXT: and a1, a2, a4 +; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_roundeven_si64: @@ -3102,41 +3084,39 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI17_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI17_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s1, a0 -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: lui a5, 524288 ; RV32IZFHMIN-NEXT: lui a4, 524288 +; RV32IZFHMIN-NEXT: lui a2, 524288 ; RV32IZFHMIN-NEXT: beqz s0, .LBB17_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a4, a1 +; RV32IZFHMIN-NEXT: mv a2, a1 ; RV32IZFHMIN-NEXT: .LBB17_4: +; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI17_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI17_0)(a1) +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: beqz a3, .LBB17_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a2, a4, -1 +; RV32IZFHMIN-NEXT: .LBB17_6: +; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a4, a1 +; RV32IZFHMIN-NEXT: and a1, a4, a2 +; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 -; RV32IZFHMIN-NEXT: beqz a3, .LBB17_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a4, a5, -1 -; RV32IZFHMIN-NEXT: .LBB17_6: -; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_roundeven_si64: @@ -3188,16 +3168,16 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI17_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI17_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a3 +; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a4 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: lui a3, 524288 ; RV32IZHINXMIN-NEXT: beqz s1, .LBB17_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a4, a1 +; RV32IZHINXMIN-NEXT: mv a3, a1 ; RV32IZHINXMIN-NEXT: .LBB17_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3205,11 +3185,11 @@ define i64 @test_roundeven_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a3, .LBB17_6 +; RV32IZHINXMIN-NEXT: beqz a4, .LBB17_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a4, a5, -1 +; RV32IZHINXMIN-NEXT: addi a3, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB17_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a4 +; RV32IZHINXMIN-NEXT: and a1, a2, a3 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_roundeven_si64: @@ -3688,40 +3668,38 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: fcvt.s.h fs0, fa0 ; RV32IZFH-NEXT: lui a0, 913408 ; RV32IZFH-NEXT: fmv.w.x fa5, a0 ; RV32IZFH-NEXT: fle.s s0, fa5, fs0 -; RV32IZFH-NEXT: neg s1, s0 ; RV32IZFH-NEXT: fmv.s fa0, fs0 ; RV32IZFH-NEXT: call __fixsfdi -; RV32IZFH-NEXT: lui a2, %hi(.LCPI21_1) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI21_1)(a2) -; RV32IZFH-NEXT: and a0, s1, a0 -; RV32IZFH-NEXT: flt.s a3, fa5, fs0 -; RV32IZFH-NEXT: neg a2, a3 -; RV32IZFH-NEXT: or a0, a2, a0 -; RV32IZFH-NEXT: feq.s a2, fs0, fs0 -; RV32IZFH-NEXT: neg a2, a2 -; RV32IZFH-NEXT: lui a5, 524288 ; RV32IZFH-NEXT: lui a4, 524288 +; RV32IZFH-NEXT: lui a2, 524288 ; RV32IZFH-NEXT: beqz s0, .LBB21_4 ; RV32IZFH-NEXT: # %bb.3: -; RV32IZFH-NEXT: mv a4, a1 +; RV32IZFH-NEXT: mv a2, a1 ; RV32IZFH-NEXT: .LBB21_4: +; RV32IZFH-NEXT: lui a1, %hi(.LCPI21_1) +; RV32IZFH-NEXT: flw fa5, %lo(.LCPI21_1)(a1) +; RV32IZFH-NEXT: flt.s a3, fa5, fs0 +; RV32IZFH-NEXT: beqz a3, .LBB21_6 +; RV32IZFH-NEXT: # %bb.5: +; RV32IZFH-NEXT: addi a2, a4, -1 +; RV32IZFH-NEXT: .LBB21_6: +; RV32IZFH-NEXT: feq.s a1, fs0, fs0 +; RV32IZFH-NEXT: neg a4, a1 +; RV32IZFH-NEXT: and a1, a4, a2 +; RV32IZFH-NEXT: neg a2, s0 ; RV32IZFH-NEXT: and a0, a2, a0 +; RV32IZFH-NEXT: neg a2, a3 +; RV32IZFH-NEXT: or a0, a2, a0 +; RV32IZFH-NEXT: and a0, a4, a0 ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: beqz a3, .LBB21_6 -; RV32IZFH-NEXT: # %bb.5: -; RV32IZFH-NEXT: addi a4, a5, -1 -; RV32IZFH-NEXT: .LBB21_6: -; RV32IZFH-NEXT: and a1, a2, a4 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: test_rint_si64: @@ -3759,16 +3737,16 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lui a2, %hi(.LCPI21_1) ; RV32IZHINX-NEXT: lw a2, %lo(.LCPI21_1)(a2) ; RV32IZHINX-NEXT: and a0, s2, a0 -; RV32IZHINX-NEXT: flt.s a3, a2, s0 -; RV32IZHINX-NEXT: neg a2, a3 +; RV32IZHINX-NEXT: flt.s a4, a2, s0 +; RV32IZHINX-NEXT: neg a2, a4 ; RV32IZHINX-NEXT: or a0, a2, a0 ; RV32IZHINX-NEXT: feq.s a2, s0, s0 ; RV32IZHINX-NEXT: neg a2, a2 ; RV32IZHINX-NEXT: lui a5, 524288 -; RV32IZHINX-NEXT: lui a4, 524288 +; RV32IZHINX-NEXT: lui a3, 524288 ; RV32IZHINX-NEXT: beqz s1, .LBB21_4 ; RV32IZHINX-NEXT: # %bb.3: -; RV32IZHINX-NEXT: mv a4, a1 +; RV32IZHINX-NEXT: mv a3, a1 ; RV32IZHINX-NEXT: .LBB21_4: ; RV32IZHINX-NEXT: and a0, a2, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3776,11 +3754,11 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: beqz a3, .LBB21_6 +; RV32IZHINX-NEXT: beqz a4, .LBB21_6 ; RV32IZHINX-NEXT: # %bb.5: -; RV32IZHINX-NEXT: addi a4, a5, -1 +; RV32IZHINX-NEXT: addi a3, a5, -1 ; RV32IZHINX-NEXT: .LBB21_6: -; RV32IZHINX-NEXT: and a1, a2, a4 +; RV32IZHINX-NEXT: and a1, a2, a3 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: test_rint_si64: @@ -3818,41 +3796,39 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZFHMIN-NEXT: addi sp, sp, -16 ; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5 ; RV32IZFHMIN-NEXT: lui a0, 913408 ; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0 ; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0 -; RV32IZFHMIN-NEXT: neg s1, s0 ; RV32IZFHMIN-NEXT: fmv.s fa0, fs0 ; RV32IZFHMIN-NEXT: call __fixsfdi -; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI21_0) -; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI21_0)(a2) -; RV32IZFHMIN-NEXT: and a0, s1, a0 -; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 -; RV32IZFHMIN-NEXT: neg a2, a3 -; RV32IZFHMIN-NEXT: or a0, a2, a0 -; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0 -; RV32IZFHMIN-NEXT: neg a2, a2 -; RV32IZFHMIN-NEXT: lui a5, 524288 ; RV32IZFHMIN-NEXT: lui a4, 524288 +; RV32IZFHMIN-NEXT: lui a2, 524288 ; RV32IZFHMIN-NEXT: beqz s0, .LBB21_4 ; RV32IZFHMIN-NEXT: # %bb.3: -; RV32IZFHMIN-NEXT: mv a4, a1 +; RV32IZFHMIN-NEXT: mv a2, a1 ; RV32IZFHMIN-NEXT: .LBB21_4: +; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI21_0) +; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI21_0)(a1) +; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0 +; RV32IZFHMIN-NEXT: beqz a3, .LBB21_6 +; RV32IZFHMIN-NEXT: # %bb.5: +; RV32IZFHMIN-NEXT: addi a2, a4, -1 +; RV32IZFHMIN-NEXT: .LBB21_6: +; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0 +; RV32IZFHMIN-NEXT: neg a4, a1 +; RV32IZFHMIN-NEXT: and a1, a4, a2 +; RV32IZFHMIN-NEXT: neg a2, s0 ; RV32IZFHMIN-NEXT: and a0, a2, a0 +; RV32IZFHMIN-NEXT: neg a2, a3 +; RV32IZFHMIN-NEXT: or a0, a2, a0 +; RV32IZFHMIN-NEXT: and a0, a4, a0 ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 -; RV32IZFHMIN-NEXT: beqz a3, .LBB21_6 -; RV32IZFHMIN-NEXT: # %bb.5: -; RV32IZFHMIN-NEXT: addi a4, a5, -1 -; RV32IZFHMIN-NEXT: .LBB21_6: -; RV32IZFHMIN-NEXT: and a1, a2, a4 ; RV32IZFHMIN-NEXT: ret ; ; RV64IZFHMIN-LABEL: test_rint_si64: @@ -3904,16 +3880,16 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI21_0) ; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI21_0)(a2) ; RV32IZHINXMIN-NEXT: and a0, s2, a0 -; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0 -; RV32IZHINXMIN-NEXT: neg a2, a3 +; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0 +; RV32IZHINXMIN-NEXT: neg a2, a4 ; RV32IZHINXMIN-NEXT: or a0, a2, a0 ; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0 ; RV32IZHINXMIN-NEXT: neg a2, a2 ; RV32IZHINXMIN-NEXT: lui a5, 524288 -; RV32IZHINXMIN-NEXT: lui a4, 524288 +; RV32IZHINXMIN-NEXT: lui a3, 524288 ; RV32IZHINXMIN-NEXT: beqz s1, .LBB21_4 ; RV32IZHINXMIN-NEXT: # %bb.3: -; RV32IZHINXMIN-NEXT: mv a4, a1 +; RV32IZHINXMIN-NEXT: mv a3, a1 ; RV32IZHINXMIN-NEXT: .LBB21_4: ; RV32IZHINXMIN-NEXT: and a0, a2, a0 ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -3921,11 +3897,11 @@ define i64 @test_rint_si64(half %x) nounwind { ; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: beqz a3, .LBB21_6 +; RV32IZHINXMIN-NEXT: beqz a4, .LBB21_6 ; RV32IZHINXMIN-NEXT: # %bb.5: -; RV32IZHINXMIN-NEXT: addi a4, a5, -1 +; RV32IZHINXMIN-NEXT: addi a3, a5, -1 ; RV32IZHINXMIN-NEXT: .LBB21_6: -; RV32IZHINXMIN-NEXT: and a1, a2, a4 +; RV32IZHINXMIN-NEXT: and a1, a2, a3 ; RV32IZHINXMIN-NEXT: ret ; ; RV64IZHINXMIN-LABEL: test_rint_si64: diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll index 98c886333d69a..a0c85ab4dca7f 100644 --- a/llvm/test/CodeGen/RISCV/iabs.ll +++ b/llvm/test/CodeGen/RISCV/iabs.ll @@ -630,8 +630,8 @@ define void @zext16_abs8(i8 %x, ptr %p) { ; RV32I-LABEL: zext16_abs8: ; RV32I: # %bb.0: ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: srai a2, a0, 31 +; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: xor a0, a0, a2 ; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: sh a0, 0(a1) @@ -648,8 +648,8 @@ define void @zext16_abs8(i8 %x, ptr %p) { ; RV64I-LABEL: zext16_abs8: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: srai a2, a0, 63 +; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: xor a0, a0, a2 ; RV64I-NEXT: subw a0, a0, a2 ; RV64I-NEXT: sh a0, 0(a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index b3bda5973eb8c..a6b2d3141f22f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -2190,65 +2190,66 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.d fs0, fa0 -; CHECK-NOV-NEXT: fmv.d fa0, fa1 +; CHECK-NOV-NEXT: fmv.d fs0, fa1 ; CHECK-NOV-NEXT: call __fixdfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixdfti -; CHECK-NOV-NEXT: li a2, -1 -; CHECK-NOV-NEXT: srli a3, a2, 1 -; CHECK-NOV-NEXT: beqz s1, .LBB18_3 +; CHECK-NOV-NEXT: mv a2, a0 +; CHECK-NOV-NEXT: li a0, -1 +; CHECK-NOV-NEXT: srli a3, a0, 1 +; CHECK-NOV-NEXT: beqz a1, .LBB18_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: slti a4, s1, 0 -; CHECK-NOV-NEXT: bnez a1, .LBB18_4 +; CHECK-NOV-NEXT: slti a4, a1, 0 +; CHECK-NOV-NEXT: bnez s1, .LBB18_4 ; CHECK-NOV-NEXT: .LBB18_2: -; CHECK-NOV-NEXT: sltu a5, a0, a3 +; CHECK-NOV-NEXT: sltu a5, s0, a3 ; CHECK-NOV-NEXT: beqz a5, .LBB18_5 ; CHECK-NOV-NEXT: j .LBB18_6 ; CHECK-NOV-NEXT: .LBB18_3: -; CHECK-NOV-NEXT: sltu a4, s0, a3 -; CHECK-NOV-NEXT: beqz a1, .LBB18_2 +; CHECK-NOV-NEXT: sltu a4, a2, a3 +; CHECK-NOV-NEXT: beqz s1, .LBB18_2 ; CHECK-NOV-NEXT: .LBB18_4: # %entry -; CHECK-NOV-NEXT: slti a5, a1, 0 +; CHECK-NOV-NEXT: slti a5, s1, 0 ; CHECK-NOV-NEXT: bnez a5, .LBB18_6 ; CHECK-NOV-NEXT: .LBB18_5: # %entry -; CHECK-NOV-NEXT: mv a0, a3 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB18_6: # %entry ; CHECK-NOV-NEXT: neg a6, a5 ; CHECK-NOV-NEXT: neg a5, a4 -; CHECK-NOV-NEXT: and a5, a5, s1 +; CHECK-NOV-NEXT: and a5, a5, a1 ; CHECK-NOV-NEXT: bnez a4, .LBB18_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: .LBB18_8: # %entry -; CHECK-NOV-NEXT: and a4, a6, a1 -; CHECK-NOV-NEXT: slli a1, a2, 63 -; CHECK-NOV-NEXT: beq a5, a2, .LBB18_11 +; CHECK-NOV-NEXT: and a4, a6, s1 +; CHECK-NOV-NEXT: slli a1, a0, 63 +; CHECK-NOV-NEXT: beq a5, a0, .LBB18_11 ; CHECK-NOV-NEXT: # %bb.9: # %entry ; CHECK-NOV-NEXT: slti a3, a5, 0 ; CHECK-NOV-NEXT: xori a3, a3, 1 -; CHECK-NOV-NEXT: bne a4, a2, .LBB18_12 +; CHECK-NOV-NEXT: bne a4, a0, .LBB18_12 ; CHECK-NOV-NEXT: .LBB18_10: -; CHECK-NOV-NEXT: sltu a2, a1, a0 -; CHECK-NOV-NEXT: beqz a2, .LBB18_13 +; CHECK-NOV-NEXT: sltu a0, a1, s0 +; CHECK-NOV-NEXT: beqz a0, .LBB18_13 ; CHECK-NOV-NEXT: j .LBB18_14 ; CHECK-NOV-NEXT: .LBB18_11: -; CHECK-NOV-NEXT: sltu a3, a1, s0 -; CHECK-NOV-NEXT: beq a4, a2, .LBB18_10 +; CHECK-NOV-NEXT: sltu a3, a1, a2 +; CHECK-NOV-NEXT: beq a4, a0, .LBB18_10 ; CHECK-NOV-NEXT: .LBB18_12: # %entry -; CHECK-NOV-NEXT: slti a2, a4, 0 -; CHECK-NOV-NEXT: xori a2, a2, 1 -; CHECK-NOV-NEXT: bnez a2, .LBB18_14 +; CHECK-NOV-NEXT: slti a0, a4, 0 +; CHECK-NOV-NEXT: xori a0, a0, 1 +; CHECK-NOV-NEXT: bnez a0, .LBB18_14 ; CHECK-NOV-NEXT: .LBB18_13: # %entry -; CHECK-NOV-NEXT: mv a0, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB18_14: # %entry ; CHECK-NOV-NEXT: bnez a3, .LBB18_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: mv a2, a1 ; CHECK-NOV-NEXT: .LBB18_16: # %entry -; CHECK-NOV-NEXT: mv a1, s0 +; CHECK-NOV-NEXT: mv a0, s0 +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2273,43 +2274,43 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz s1, .LBB18_3 +; CHECK-V-NEXT: beqz a1, .LBB18_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 -; CHECK-V-NEXT: bnez a1, .LBB18_4 +; CHECK-V-NEXT: slti a4, a1, 0 +; CHECK-V-NEXT: bnez s1, .LBB18_4 ; CHECK-V-NEXT: .LBB18_2: -; CHECK-V-NEXT: sltu a5, a0, a3 +; CHECK-V-NEXT: sltu a5, s0, a3 ; CHECK-V-NEXT: beqz a5, .LBB18_5 ; CHECK-V-NEXT: j .LBB18_6 ; CHECK-V-NEXT: .LBB18_3: -; CHECK-V-NEXT: sltu a4, s0, a3 -; CHECK-V-NEXT: beqz a1, .LBB18_2 +; CHECK-V-NEXT: sltu a4, a0, a3 +; CHECK-V-NEXT: beqz s1, .LBB18_2 ; CHECK-V-NEXT: .LBB18_4: # %entry -; CHECK-V-NEXT: slti a5, a1, 0 +; CHECK-V-NEXT: slti a5, s1, 0 ; CHECK-V-NEXT: bnez a5, .LBB18_6 ; CHECK-V-NEXT: .LBB18_5: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB18_6: # %entry ; CHECK-V-NEXT: neg a6, a5 ; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: and a5, a5, s1 +; CHECK-V-NEXT: and a5, a5, a1 ; CHECK-V-NEXT: bnez a4, .LBB18_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB18_8: # %entry -; CHECK-V-NEXT: and a4, a6, a1 +; CHECK-V-NEXT: and a4, a6, s1 ; CHECK-V-NEXT: slli a1, a2, 63 ; CHECK-V-NEXT: beq a5, a2, .LBB18_11 ; CHECK-V-NEXT: # %bb.9: # %entry @@ -2317,26 +2318,26 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB18_12 ; CHECK-V-NEXT: .LBB18_10: -; CHECK-V-NEXT: sltu a2, a1, a0 +; CHECK-V-NEXT: sltu a2, a1, s0 ; CHECK-V-NEXT: beqz a2, .LBB18_13 ; CHECK-V-NEXT: j .LBB18_14 ; CHECK-V-NEXT: .LBB18_11: -; CHECK-V-NEXT: sltu a3, a1, s0 +; CHECK-V-NEXT: sltu a3, a1, a0 ; CHECK-V-NEXT: beq a4, a2, .LBB18_10 ; CHECK-V-NEXT: .LBB18_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB18_14 ; CHECK-V-NEXT: .LBB18_13: # %entry -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB18_14: # %entry ; CHECK-V-NEXT: bnez a3, .LBB18_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB18_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, s0 -; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2369,19 +2370,19 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.d fs0, fa0 -; CHECK-NOV-NEXT: fmv.d fa0, fa1 +; CHECK-NOV-NEXT: fmv.d fs0, fa1 ; CHECK-NOV-NEXT: call __fixunsdfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixunsdfti -; CHECK-NOV-NEXT: snez a2, s1 ; CHECK-NOV-NEXT: snez a1, a1 +; CHECK-NOV-NEXT: snez a2, s1 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a2, a2, s0 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a0, a1, a0 -; CHECK-NOV-NEXT: addi a1, a2, -1 -; CHECK-NOV-NEXT: and a1, a1, s0 +; CHECK-NOV-NEXT: and a1, a1, a0 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2406,25 +2407,25 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixunsdfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunsdfti -; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: snez a1, a1 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a2 -; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2466,32 +2467,32 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB20_2: # %entry -; CHECK-NOV-NEXT: slti a4, s1, 1 ; CHECK-NOV-NEXT: slti a3, a1, 1 +; CHECK-NOV-NEXT: slti a4, s1, 1 ; CHECK-NOV-NEXT: blez a1, .LBB20_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB20_4: # %entry +; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 -; CHECK-NOV-NEXT: neg a0, a4 ; CHECK-NOV-NEXT: beqz a1, .LBB20_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: and a4, a4, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB20_8 ; CHECK-NOV-NEXT: .LBB20_6: -; CHECK-NOV-NEXT: snez a2, a0 +; CHECK-NOV-NEXT: snez a0, a4 ; CHECK-NOV-NEXT: j .LBB20_9 ; CHECK-NOV-NEXT: .LBB20_7: ; CHECK-NOV-NEXT: snez a1, a3 -; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: and a4, a4, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB20_6 ; CHECK-NOV-NEXT: .LBB20_8: # %entry -; CHECK-NOV-NEXT: sgtz a2, a2 +; CHECK-NOV-NEXT: sgtz a0, a2 ; CHECK-NOV-NEXT: .LBB20_9: # %entry -; CHECK-NOV-NEXT: neg a2, a2 -; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: neg a0, a0 +; CHECK-NOV-NEXT: and a0, a0, a4 ; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -2533,15 +2534,15 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB20_2: # %entry -; CHECK-V-NEXT: slti a3, s1, 1 ; CHECK-V-NEXT: slti a4, a1, 1 +; CHECK-V-NEXT: slti a3, s1, 1 ; CHECK-V-NEXT: blez a1, .LBB20_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB20_4: # %entry +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 -; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: beqz a1, .LBB20_7 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a1 @@ -2596,65 +2597,66 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.s fa0, fa1 +; CHECK-NOV-NEXT: fmv.s fs0, fa1 ; CHECK-NOV-NEXT: call __fixsfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixsfti -; CHECK-NOV-NEXT: li a2, -1 -; CHECK-NOV-NEXT: srli a3, a2, 1 -; CHECK-NOV-NEXT: beqz s1, .LBB21_3 +; CHECK-NOV-NEXT: mv a2, a0 +; CHECK-NOV-NEXT: li a0, -1 +; CHECK-NOV-NEXT: srli a3, a0, 1 +; CHECK-NOV-NEXT: beqz a1, .LBB21_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: slti a4, s1, 0 -; CHECK-NOV-NEXT: bnez a1, .LBB21_4 +; CHECK-NOV-NEXT: slti a4, a1, 0 +; CHECK-NOV-NEXT: bnez s1, .LBB21_4 ; CHECK-NOV-NEXT: .LBB21_2: -; CHECK-NOV-NEXT: sltu a5, a0, a3 +; CHECK-NOV-NEXT: sltu a5, s0, a3 ; CHECK-NOV-NEXT: beqz a5, .LBB21_5 ; CHECK-NOV-NEXT: j .LBB21_6 ; CHECK-NOV-NEXT: .LBB21_3: -; CHECK-NOV-NEXT: sltu a4, s0, a3 -; CHECK-NOV-NEXT: beqz a1, .LBB21_2 +; CHECK-NOV-NEXT: sltu a4, a2, a3 +; CHECK-NOV-NEXT: beqz s1, .LBB21_2 ; CHECK-NOV-NEXT: .LBB21_4: # %entry -; CHECK-NOV-NEXT: slti a5, a1, 0 +; CHECK-NOV-NEXT: slti a5, s1, 0 ; CHECK-NOV-NEXT: bnez a5, .LBB21_6 ; CHECK-NOV-NEXT: .LBB21_5: # %entry -; CHECK-NOV-NEXT: mv a0, a3 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB21_6: # %entry ; CHECK-NOV-NEXT: neg a6, a5 ; CHECK-NOV-NEXT: neg a5, a4 -; CHECK-NOV-NEXT: and a5, a5, s1 +; CHECK-NOV-NEXT: and a5, a5, a1 ; CHECK-NOV-NEXT: bnez a4, .LBB21_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: .LBB21_8: # %entry -; CHECK-NOV-NEXT: and a4, a6, a1 -; CHECK-NOV-NEXT: slli a1, a2, 63 -; CHECK-NOV-NEXT: beq a5, a2, .LBB21_11 +; CHECK-NOV-NEXT: and a4, a6, s1 +; CHECK-NOV-NEXT: slli a1, a0, 63 +; CHECK-NOV-NEXT: beq a5, a0, .LBB21_11 ; CHECK-NOV-NEXT: # %bb.9: # %entry ; CHECK-NOV-NEXT: slti a3, a5, 0 ; CHECK-NOV-NEXT: xori a3, a3, 1 -; CHECK-NOV-NEXT: bne a4, a2, .LBB21_12 +; CHECK-NOV-NEXT: bne a4, a0, .LBB21_12 ; CHECK-NOV-NEXT: .LBB21_10: -; CHECK-NOV-NEXT: sltu a2, a1, a0 -; CHECK-NOV-NEXT: beqz a2, .LBB21_13 +; CHECK-NOV-NEXT: sltu a0, a1, s0 +; CHECK-NOV-NEXT: beqz a0, .LBB21_13 ; CHECK-NOV-NEXT: j .LBB21_14 ; CHECK-NOV-NEXT: .LBB21_11: -; CHECK-NOV-NEXT: sltu a3, a1, s0 -; CHECK-NOV-NEXT: beq a4, a2, .LBB21_10 +; CHECK-NOV-NEXT: sltu a3, a1, a2 +; CHECK-NOV-NEXT: beq a4, a0, .LBB21_10 ; CHECK-NOV-NEXT: .LBB21_12: # %entry -; CHECK-NOV-NEXT: slti a2, a4, 0 -; CHECK-NOV-NEXT: xori a2, a2, 1 -; CHECK-NOV-NEXT: bnez a2, .LBB21_14 +; CHECK-NOV-NEXT: slti a0, a4, 0 +; CHECK-NOV-NEXT: xori a0, a0, 1 +; CHECK-NOV-NEXT: bnez a0, .LBB21_14 ; CHECK-NOV-NEXT: .LBB21_13: # %entry -; CHECK-NOV-NEXT: mv a0, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB21_14: # %entry ; CHECK-NOV-NEXT: bnez a3, .LBB21_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: mv a2, a1 ; CHECK-NOV-NEXT: .LBB21_16: # %entry -; CHECK-NOV-NEXT: mv a1, s0 +; CHECK-NOV-NEXT: mv a0, s0 +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2679,43 +2681,43 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz s1, .LBB21_3 +; CHECK-V-NEXT: beqz a1, .LBB21_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 -; CHECK-V-NEXT: bnez a1, .LBB21_4 +; CHECK-V-NEXT: slti a4, a1, 0 +; CHECK-V-NEXT: bnez s1, .LBB21_4 ; CHECK-V-NEXT: .LBB21_2: -; CHECK-V-NEXT: sltu a5, a0, a3 +; CHECK-V-NEXT: sltu a5, s0, a3 ; CHECK-V-NEXT: beqz a5, .LBB21_5 ; CHECK-V-NEXT: j .LBB21_6 ; CHECK-V-NEXT: .LBB21_3: -; CHECK-V-NEXT: sltu a4, s0, a3 -; CHECK-V-NEXT: beqz a1, .LBB21_2 +; CHECK-V-NEXT: sltu a4, a0, a3 +; CHECK-V-NEXT: beqz s1, .LBB21_2 ; CHECK-V-NEXT: .LBB21_4: # %entry -; CHECK-V-NEXT: slti a5, a1, 0 +; CHECK-V-NEXT: slti a5, s1, 0 ; CHECK-V-NEXT: bnez a5, .LBB21_6 ; CHECK-V-NEXT: .LBB21_5: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB21_6: # %entry ; CHECK-V-NEXT: neg a6, a5 ; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: and a5, a5, s1 +; CHECK-V-NEXT: and a5, a5, a1 ; CHECK-V-NEXT: bnez a4, .LBB21_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB21_8: # %entry -; CHECK-V-NEXT: and a4, a6, a1 +; CHECK-V-NEXT: and a4, a6, s1 ; CHECK-V-NEXT: slli a1, a2, 63 ; CHECK-V-NEXT: beq a5, a2, .LBB21_11 ; CHECK-V-NEXT: # %bb.9: # %entry @@ -2723,26 +2725,26 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB21_12 ; CHECK-V-NEXT: .LBB21_10: -; CHECK-V-NEXT: sltu a2, a1, a0 +; CHECK-V-NEXT: sltu a2, a1, s0 ; CHECK-V-NEXT: beqz a2, .LBB21_13 ; CHECK-V-NEXT: j .LBB21_14 ; CHECK-V-NEXT: .LBB21_11: -; CHECK-V-NEXT: sltu a3, a1, s0 +; CHECK-V-NEXT: sltu a3, a1, a0 ; CHECK-V-NEXT: beq a4, a2, .LBB21_10 ; CHECK-V-NEXT: .LBB21_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB21_14 ; CHECK-V-NEXT: .LBB21_13: # %entry -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB21_14: # %entry ; CHECK-V-NEXT: bnez a3, .LBB21_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB21_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, s0 -; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2775,19 +2777,19 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset fs0, -32 -; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.s fa0, fa1 +; CHECK-NOV-NEXT: fmv.s fs0, fa1 ; CHECK-NOV-NEXT: call __fixunssfti ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixunssfti -; CHECK-NOV-NEXT: snez a2, s1 ; CHECK-NOV-NEXT: snez a1, a1 +; CHECK-NOV-NEXT: snez a2, s1 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a2, a2, s0 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a0, a1, a0 -; CHECK-NOV-NEXT: addi a1, a2, -1 -; CHECK-NOV-NEXT: and a1, a1, s0 +; CHECK-NOV-NEXT: and a1, a1, a0 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2812,25 +2814,25 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vfmv.f.s fa0, v8 +; CHECK-V-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-V-NEXT: vfmv.f.s fa0, v9 ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: mv s1, a1 ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixunssfti -; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: snez a1, a1 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: snez a2, s1 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s0 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a2 -; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 @@ -2872,32 +2874,32 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB23_2: # %entry -; CHECK-NOV-NEXT: slti a4, s1, 1 ; CHECK-NOV-NEXT: slti a3, a1, 1 +; CHECK-NOV-NEXT: slti a4, s1, 1 ; CHECK-NOV-NEXT: blez a1, .LBB23_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB23_4: # %entry +; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 -; CHECK-NOV-NEXT: neg a0, a4 ; CHECK-NOV-NEXT: beqz a1, .LBB23_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: and a4, a4, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB23_8 ; CHECK-NOV-NEXT: .LBB23_6: -; CHECK-NOV-NEXT: snez a2, a0 +; CHECK-NOV-NEXT: snez a0, a4 ; CHECK-NOV-NEXT: j .LBB23_9 ; CHECK-NOV-NEXT: .LBB23_7: ; CHECK-NOV-NEXT: snez a1, a3 -; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: and a4, a4, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB23_6 ; CHECK-NOV-NEXT: .LBB23_8: # %entry -; CHECK-NOV-NEXT: sgtz a2, a2 +; CHECK-NOV-NEXT: sgtz a0, a2 ; CHECK-NOV-NEXT: .LBB23_9: # %entry -; CHECK-NOV-NEXT: neg a2, a2 -; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: neg a0, a0 +; CHECK-NOV-NEXT: and a0, a0, a4 ; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -2939,15 +2941,15 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB23_2: # %entry -; CHECK-V-NEXT: slti a3, s1, 1 ; CHECK-V-NEXT: slti a4, a1, 1 +; CHECK-V-NEXT: slti a3, s1, 1 ; CHECK-V-NEXT: blez a1, .LBB23_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB23_4: # %entry +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 -; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: beqz a1, .LBB23_7 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a1 @@ -3002,8 +3004,8 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset s2, -32 -; CHECK-NOV-NEXT: mv s2, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: mv s2, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixsfti ; CHECK-NOV-NEXT: mv s0, a0 @@ -3011,58 +3013,60 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixsfti -; CHECK-NOV-NEXT: li a2, -1 -; CHECK-NOV-NEXT: srli a3, a2, 1 -; CHECK-NOV-NEXT: beqz s1, .LBB24_3 +; CHECK-NOV-NEXT: mv a2, a0 +; CHECK-NOV-NEXT: li a0, -1 +; CHECK-NOV-NEXT: srli a3, a0, 1 +; CHECK-NOV-NEXT: beqz a1, .LBB24_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: slti a4, s1, 0 -; CHECK-NOV-NEXT: bnez a1, .LBB24_4 +; CHECK-NOV-NEXT: slti a4, a1, 0 +; CHECK-NOV-NEXT: bnez s1, .LBB24_4 ; CHECK-NOV-NEXT: .LBB24_2: -; CHECK-NOV-NEXT: sltu a5, a0, a3 +; CHECK-NOV-NEXT: sltu a5, s0, a3 ; CHECK-NOV-NEXT: beqz a5, .LBB24_5 ; CHECK-NOV-NEXT: j .LBB24_6 ; CHECK-NOV-NEXT: .LBB24_3: -; CHECK-NOV-NEXT: sltu a4, s0, a3 -; CHECK-NOV-NEXT: beqz a1, .LBB24_2 +; CHECK-NOV-NEXT: sltu a4, a2, a3 +; CHECK-NOV-NEXT: beqz s1, .LBB24_2 ; CHECK-NOV-NEXT: .LBB24_4: # %entry -; CHECK-NOV-NEXT: slti a5, a1, 0 +; CHECK-NOV-NEXT: slti a5, s1, 0 ; CHECK-NOV-NEXT: bnez a5, .LBB24_6 ; CHECK-NOV-NEXT: .LBB24_5: # %entry -; CHECK-NOV-NEXT: mv a0, a3 +; CHECK-NOV-NEXT: mv s0, a3 ; CHECK-NOV-NEXT: .LBB24_6: # %entry ; CHECK-NOV-NEXT: neg a6, a5 ; CHECK-NOV-NEXT: neg a5, a4 -; CHECK-NOV-NEXT: and a5, a5, s1 +; CHECK-NOV-NEXT: and a5, a5, a1 ; CHECK-NOV-NEXT: bnez a4, .LBB24_8 ; CHECK-NOV-NEXT: # %bb.7: # %entry -; CHECK-NOV-NEXT: mv s0, a3 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: .LBB24_8: # %entry -; CHECK-NOV-NEXT: and a4, a6, a1 -; CHECK-NOV-NEXT: slli a1, a2, 63 -; CHECK-NOV-NEXT: beq a5, a2, .LBB24_11 +; CHECK-NOV-NEXT: and a4, a6, s1 +; CHECK-NOV-NEXT: slli a1, a0, 63 +; CHECK-NOV-NEXT: beq a5, a0, .LBB24_11 ; CHECK-NOV-NEXT: # %bb.9: # %entry ; CHECK-NOV-NEXT: slti a3, a5, 0 ; CHECK-NOV-NEXT: xori a3, a3, 1 -; CHECK-NOV-NEXT: bne a4, a2, .LBB24_12 +; CHECK-NOV-NEXT: bne a4, a0, .LBB24_12 ; CHECK-NOV-NEXT: .LBB24_10: -; CHECK-NOV-NEXT: sltu a2, a1, a0 -; CHECK-NOV-NEXT: beqz a2, .LBB24_13 +; CHECK-NOV-NEXT: sltu a0, a1, s0 +; CHECK-NOV-NEXT: beqz a0, .LBB24_13 ; CHECK-NOV-NEXT: j .LBB24_14 ; CHECK-NOV-NEXT: .LBB24_11: -; CHECK-NOV-NEXT: sltu a3, a1, s0 -; CHECK-NOV-NEXT: beq a4, a2, .LBB24_10 +; CHECK-NOV-NEXT: sltu a3, a1, a2 +; CHECK-NOV-NEXT: beq a4, a0, .LBB24_10 ; CHECK-NOV-NEXT: .LBB24_12: # %entry -; CHECK-NOV-NEXT: slti a2, a4, 0 -; CHECK-NOV-NEXT: xori a2, a2, 1 -; CHECK-NOV-NEXT: bnez a2, .LBB24_14 +; CHECK-NOV-NEXT: slti a0, a4, 0 +; CHECK-NOV-NEXT: xori a0, a0, 1 +; CHECK-NOV-NEXT: bnez a0, .LBB24_14 ; CHECK-NOV-NEXT: .LBB24_13: # %entry -; CHECK-NOV-NEXT: mv a0, a1 +; CHECK-NOV-NEXT: mv s0, a1 ; CHECK-NOV-NEXT: .LBB24_14: # %entry ; CHECK-NOV-NEXT: bnez a3, .LBB24_16 ; CHECK-NOV-NEXT: # %bb.15: # %entry -; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: mv a2, a1 ; CHECK-NOV-NEXT: .LBB24_16: # %entry -; CHECK-NOV-NEXT: mv a1, s0 +; CHECK-NOV-NEXT: mv a0, s0 +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3082,8 +3086,8 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s2, a0 -; CHECK-V-NEXT: fmv.w.x fa0, a1 +; CHECK-V-NEXT: mv s2, a1 +; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: mv s0, a0 @@ -3093,31 +3097,31 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: call __fixsfti ; CHECK-V-NEXT: li a2, -1 ; CHECK-V-NEXT: srli a3, a2, 1 -; CHECK-V-NEXT: beqz s1, .LBB24_3 +; CHECK-V-NEXT: beqz a1, .LBB24_3 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: slti a4, s1, 0 -; CHECK-V-NEXT: bnez a1, .LBB24_4 +; CHECK-V-NEXT: slti a4, a1, 0 +; CHECK-V-NEXT: bnez s1, .LBB24_4 ; CHECK-V-NEXT: .LBB24_2: -; CHECK-V-NEXT: sltu a5, a0, a3 +; CHECK-V-NEXT: sltu a5, s0, a3 ; CHECK-V-NEXT: beqz a5, .LBB24_5 ; CHECK-V-NEXT: j .LBB24_6 ; CHECK-V-NEXT: .LBB24_3: -; CHECK-V-NEXT: sltu a4, s0, a3 -; CHECK-V-NEXT: beqz a1, .LBB24_2 +; CHECK-V-NEXT: sltu a4, a0, a3 +; CHECK-V-NEXT: beqz s1, .LBB24_2 ; CHECK-V-NEXT: .LBB24_4: # %entry -; CHECK-V-NEXT: slti a5, a1, 0 +; CHECK-V-NEXT: slti a5, s1, 0 ; CHECK-V-NEXT: bnez a5, .LBB24_6 ; CHECK-V-NEXT: .LBB24_5: # %entry -; CHECK-V-NEXT: mv a0, a3 +; CHECK-V-NEXT: mv s0, a3 ; CHECK-V-NEXT: .LBB24_6: # %entry ; CHECK-V-NEXT: neg a6, a5 ; CHECK-V-NEXT: neg a5, a4 -; CHECK-V-NEXT: and a5, a5, s1 +; CHECK-V-NEXT: and a5, a5, a1 ; CHECK-V-NEXT: bnez a4, .LBB24_8 ; CHECK-V-NEXT: # %bb.7: # %entry -; CHECK-V-NEXT: mv s0, a3 +; CHECK-V-NEXT: mv a0, a3 ; CHECK-V-NEXT: .LBB24_8: # %entry -; CHECK-V-NEXT: and a4, a6, a1 +; CHECK-V-NEXT: and a4, a6, s1 ; CHECK-V-NEXT: slli a1, a2, 63 ; CHECK-V-NEXT: beq a5, a2, .LBB24_11 ; CHECK-V-NEXT: # %bb.9: # %entry @@ -3125,26 +3129,26 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: xori a3, a3, 1 ; CHECK-V-NEXT: bne a4, a2, .LBB24_12 ; CHECK-V-NEXT: .LBB24_10: -; CHECK-V-NEXT: sltu a2, a1, a0 +; CHECK-V-NEXT: sltu a2, a1, s0 ; CHECK-V-NEXT: beqz a2, .LBB24_13 ; CHECK-V-NEXT: j .LBB24_14 ; CHECK-V-NEXT: .LBB24_11: -; CHECK-V-NEXT: sltu a3, a1, s0 +; CHECK-V-NEXT: sltu a3, a1, a0 ; CHECK-V-NEXT: beq a4, a2, .LBB24_10 ; CHECK-V-NEXT: .LBB24_12: # %entry ; CHECK-V-NEXT: slti a2, a4, 0 ; CHECK-V-NEXT: xori a2, a2, 1 ; CHECK-V-NEXT: bnez a2, .LBB24_14 ; CHECK-V-NEXT: .LBB24_13: # %entry -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: .LBB24_14: # %entry ; CHECK-V-NEXT: bnez a3, .LBB24_16 ; CHECK-V-NEXT: # %bb.15: # %entry -; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: mv a0, a1 ; CHECK-V-NEXT: .LBB24_16: # %entry ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, s0 -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3175,8 +3179,8 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset s0, -16 ; CHECK-NOV-NEXT: .cfi_offset s1, -24 ; CHECK-NOV-NEXT: .cfi_offset s2, -32 -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a1 +; CHECK-NOV-NEXT: mv s0, a1 +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixunssfti ; CHECK-NOV-NEXT: mv s1, a0 @@ -3184,12 +3188,13 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixunssfti -; CHECK-NOV-NEXT: snez a2, s2 ; CHECK-NOV-NEXT: snez a1, a1 +; CHECK-NOV-NEXT: snez a2, s2 +; CHECK-NOV-NEXT: addi a2, a2, -1 +; CHECK-NOV-NEXT: and a2, a2, s1 ; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: and a0, a1, a0 -; CHECK-NOV-NEXT: addi a1, a2, -1 -; CHECK-NOV-NEXT: and a1, a1, s1 +; CHECK-NOV-NEXT: and a1, a1, a0 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3209,8 +3214,8 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s0, a0 -; CHECK-V-NEXT: fmv.w.x fa0, a1 +; CHECK-V-NEXT: mv s0, a1 +; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixunssfti ; CHECK-V-NEXT: mv s1, a0 @@ -3218,15 +3223,15 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixunssfti -; CHECK-V-NEXT: snez a2, s2 ; CHECK-V-NEXT: snez a1, a1 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: and a0, a1, a0 +; CHECK-V-NEXT: snez a2, s2 ; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a2, a2, s1 +; CHECK-V-NEXT: addi a1, a1, -1 +; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v9, a2 -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vmv.s.x v9, a0 +; CHECK-V-NEXT: vmv.s.x v8, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3269,32 +3274,32 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB26_2: # %entry -; CHECK-NOV-NEXT: slti a4, s1, 1 ; CHECK-NOV-NEXT: slti a3, a1, 1 +; CHECK-NOV-NEXT: slti a4, s1, 1 ; CHECK-NOV-NEXT: blez a1, .LBB26_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry ; CHECK-NOV-NEXT: li a1, 1 ; CHECK-NOV-NEXT: .LBB26_4: # %entry +; CHECK-NOV-NEXT: neg a4, a4 ; CHECK-NOV-NEXT: neg a3, a3 ; CHECK-NOV-NEXT: and a3, a3, a0 -; CHECK-NOV-NEXT: neg a0, a4 ; CHECK-NOV-NEXT: beqz a1, .LBB26_7 ; CHECK-NOV-NEXT: # %bb.5: # %entry ; CHECK-NOV-NEXT: sgtz a1, a1 -; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: and a4, a4, s0 ; CHECK-NOV-NEXT: bnez a2, .LBB26_8 ; CHECK-NOV-NEXT: .LBB26_6: -; CHECK-NOV-NEXT: snez a2, a0 +; CHECK-NOV-NEXT: snez a0, a4 ; CHECK-NOV-NEXT: j .LBB26_9 ; CHECK-NOV-NEXT: .LBB26_7: ; CHECK-NOV-NEXT: snez a1, a3 -; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: and a4, a4, s0 ; CHECK-NOV-NEXT: beqz a2, .LBB26_6 ; CHECK-NOV-NEXT: .LBB26_8: # %entry -; CHECK-NOV-NEXT: sgtz a2, a2 +; CHECK-NOV-NEXT: sgtz a0, a2 ; CHECK-NOV-NEXT: .LBB26_9: # %entry -; CHECK-NOV-NEXT: neg a2, a2 -; CHECK-NOV-NEXT: and a0, a2, a0 +; CHECK-NOV-NEXT: neg a0, a0 +; CHECK-NOV-NEXT: and a0, a0, a4 ; CHECK-NOV-NEXT: neg a1, a1 ; CHECK-NOV-NEXT: and a1, a1, a3 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -3330,15 +3335,15 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-V-NEXT: # %bb.1: # %entry ; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB26_2: # %entry -; CHECK-V-NEXT: slti a3, s1, 1 ; CHECK-V-NEXT: slti a4, a1, 1 +; CHECK-V-NEXT: slti a3, s1, 1 ; CHECK-V-NEXT: blez a1, .LBB26_4 ; CHECK-V-NEXT: # %bb.3: # %entry ; CHECK-V-NEXT: li a1, 1 ; CHECK-V-NEXT: .LBB26_4: # %entry +; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: neg a4, a4 ; CHECK-V-NEXT: and a0, a4, a0 -; CHECK-V-NEXT: neg a3, a3 ; CHECK-V-NEXT: beqz a1, .LBB26_7 ; CHECK-V-NEXT: # %bb.5: # %entry ; CHECK-V-NEXT: sgtz a1, a1 @@ -5811,15 +5816,15 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixdfti -; CHECK-NOV-NEXT: mv a2, s1 -; CHECK-NOV-NEXT: mv a3, a1 +; CHECK-NOV-NEXT: mv a2, a1 ; CHECK-NOV-NEXT: blez a1, .LBB47_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: li a3, 1 +; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB47_2: # %entry -; CHECK-NOV-NEXT: blez a2, .LBB47_4 +; CHECK-NOV-NEXT: mv a3, s1 +; CHECK-NOV-NEXT: blez s1, .LBB47_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: li a2, 1 +; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB47_4: # %entry ; CHECK-NOV-NEXT: slti a1, a1, 1 ; CHECK-NOV-NEXT: neg a1, a1 @@ -5827,11 +5832,11 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: slti a0, s1, 1 ; CHECK-NOV-NEXT: neg a0, a0 ; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: slti a3, a3, 0 +; CHECK-NOV-NEXT: addi a3, a3, -1 +; CHECK-NOV-NEXT: and a0, a3, a0 ; CHECK-NOV-NEXT: slti a2, a2, 0 ; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: slti a2, a3, 0 -; CHECK-NOV-NEXT: addi a2, a2, -1 ; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -5867,15 +5872,15 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixdfti -; CHECK-V-NEXT: mv a2, s1 -; CHECK-V-NEXT: mv a3, a1 +; CHECK-V-NEXT: mv a2, a1 ; CHECK-V-NEXT: blez a1, .LBB47_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: li a3, 1 +; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB47_2: # %entry -; CHECK-V-NEXT: blez a2, .LBB47_4 +; CHECK-V-NEXT: mv a3, s1 +; CHECK-V-NEXT: blez s1, .LBB47_4 ; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li a2, 1 +; CHECK-V-NEXT: li a3, 1 ; CHECK-V-NEXT: .LBB47_4: # %entry ; CHECK-V-NEXT: slti a1, a1, 1 ; CHECK-V-NEXT: neg a1, a1 @@ -5883,11 +5888,11 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: slti a1, s1, 1 ; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a1, a1, s0 +; CHECK-V-NEXT: slti a3, a3, 0 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a1, a3, a1 ; CHECK-V-NEXT: slti a2, a2, 0 ; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a1, a2, a1 -; CHECK-V-NEXT: slti a2, a3, 0 -; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a0, a2, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 @@ -6197,15 +6202,15 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixsfti -; CHECK-NOV-NEXT: mv a2, s1 -; CHECK-NOV-NEXT: mv a3, a1 +; CHECK-NOV-NEXT: mv a2, a1 ; CHECK-NOV-NEXT: blez a1, .LBB50_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: li a3, 1 +; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB50_2: # %entry -; CHECK-NOV-NEXT: blez a2, .LBB50_4 +; CHECK-NOV-NEXT: mv a3, s1 +; CHECK-NOV-NEXT: blez s1, .LBB50_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: li a2, 1 +; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB50_4: # %entry ; CHECK-NOV-NEXT: slti a1, a1, 1 ; CHECK-NOV-NEXT: neg a1, a1 @@ -6213,11 +6218,11 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: slti a0, s1, 1 ; CHECK-NOV-NEXT: neg a0, a0 ; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: slti a3, a3, 0 +; CHECK-NOV-NEXT: addi a3, a3, -1 +; CHECK-NOV-NEXT: and a0, a3, a0 ; CHECK-NOV-NEXT: slti a2, a2, 0 ; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: slti a2, a3, 0 -; CHECK-NOV-NEXT: addi a2, a2, -1 ; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -6253,15 +6258,15 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vfmv.f.s fa0, v8 ; CHECK-V-NEXT: call __fixsfti -; CHECK-V-NEXT: mv a2, s1 -; CHECK-V-NEXT: mv a3, a1 +; CHECK-V-NEXT: mv a2, a1 ; CHECK-V-NEXT: blez a1, .LBB50_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: li a3, 1 +; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB50_2: # %entry -; CHECK-V-NEXT: blez a2, .LBB50_4 +; CHECK-V-NEXT: mv a3, s1 +; CHECK-V-NEXT: blez s1, .LBB50_4 ; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li a2, 1 +; CHECK-V-NEXT: li a3, 1 ; CHECK-V-NEXT: .LBB50_4: # %entry ; CHECK-V-NEXT: slti a1, a1, 1 ; CHECK-V-NEXT: neg a1, a1 @@ -6269,11 +6274,11 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: slti a1, s1, 1 ; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a1, a1, s0 +; CHECK-V-NEXT: slti a3, a3, 0 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a1, a3, a1 ; CHECK-V-NEXT: slti a2, a2, 0 ; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a1, a2, a1 -; CHECK-V-NEXT: slti a2, a3, 0 -; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a0, a2, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 @@ -6575,15 +6580,15 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: call __fixsfti -; CHECK-NOV-NEXT: mv a2, s1 -; CHECK-NOV-NEXT: mv a3, a1 +; CHECK-NOV-NEXT: mv a2, a1 ; CHECK-NOV-NEXT: blez a1, .LBB53_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: li a3, 1 +; CHECK-NOV-NEXT: li a2, 1 ; CHECK-NOV-NEXT: .LBB53_2: # %entry -; CHECK-NOV-NEXT: blez a2, .LBB53_4 +; CHECK-NOV-NEXT: mv a3, s1 +; CHECK-NOV-NEXT: blez s1, .LBB53_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: li a2, 1 +; CHECK-NOV-NEXT: li a3, 1 ; CHECK-NOV-NEXT: .LBB53_4: # %entry ; CHECK-NOV-NEXT: slti a1, a1, 1 ; CHECK-NOV-NEXT: neg a1, a1 @@ -6591,11 +6596,11 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: slti a0, s1, 1 ; CHECK-NOV-NEXT: neg a0, a0 ; CHECK-NOV-NEXT: and a0, a0, s0 +; CHECK-NOV-NEXT: slti a3, a3, 0 +; CHECK-NOV-NEXT: addi a3, a3, -1 +; CHECK-NOV-NEXT: and a0, a3, a0 ; CHECK-NOV-NEXT: slti a2, a2, 0 ; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: slti a2, a3, 0 -; CHECK-NOV-NEXT: addi a2, a2, -1 ; CHECK-NOV-NEXT: and a1, a2, a1 ; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -6625,15 +6630,15 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: call __fixsfti -; CHECK-V-NEXT: mv a2, s1 -; CHECK-V-NEXT: mv a3, a1 +; CHECK-V-NEXT: mv a2, a1 ; CHECK-V-NEXT: blez a1, .LBB53_2 ; CHECK-V-NEXT: # %bb.1: # %entry -; CHECK-V-NEXT: li a3, 1 +; CHECK-V-NEXT: li a2, 1 ; CHECK-V-NEXT: .LBB53_2: # %entry -; CHECK-V-NEXT: blez a2, .LBB53_4 +; CHECK-V-NEXT: mv a3, s1 +; CHECK-V-NEXT: blez s1, .LBB53_4 ; CHECK-V-NEXT: # %bb.3: # %entry -; CHECK-V-NEXT: li a2, 1 +; CHECK-V-NEXT: li a3, 1 ; CHECK-V-NEXT: .LBB53_4: # %entry ; CHECK-V-NEXT: slti a1, a1, 1 ; CHECK-V-NEXT: neg a1, a1 @@ -6641,11 +6646,11 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: slti a1, s1, 1 ; CHECK-V-NEXT: neg a1, a1 ; CHECK-V-NEXT: and a1, a1, s0 +; CHECK-V-NEXT: slti a3, a3, 0 +; CHECK-V-NEXT: addi a3, a3, -1 +; CHECK-V-NEXT: and a1, a3, a1 ; CHECK-V-NEXT: slti a2, a2, 0 ; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a1, a2, a1 -; CHECK-V-NEXT: slti a2, a3, 0 -; CHECK-V-NEXT: addi a2, a2, -1 ; CHECK-V-NEXT: and a0, a2, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v9, a0 diff --git a/llvm/test/CodeGen/X86/abdu-vector-128.ll b/llvm/test/CodeGen/X86/abdu-vector-128.ll index dd180b67e492a..0c33e8973c2d2 100644 --- a/llvm/test/CodeGen/X86/abdu-vector-128.ll +++ b/llvm/test/CodeGen/X86/abdu-vector-128.ll @@ -715,43 +715,41 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin ; ; SSE42-LABEL: abd_cmp_v2i64_multiuse_cmp: ; SSE42: # %bb.0: -; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; SSE42-NEXT: movdqa %xmm0, %xmm2 +; SSE42-NEXT: psubq %xmm1, %xmm2 ; SSE42-NEXT: movdqa %xmm1, %xmm3 -; SSE42-NEXT: pxor %xmm2, %xmm3 -; SSE42-NEXT: pxor %xmm0, %xmm2 -; SSE42-NEXT: pcmpgtq %xmm3, %xmm2 -; SSE42-NEXT: movdqa %xmm0, %xmm3 -; SSE42-NEXT: psubq %xmm1, %xmm3 -; SSE42-NEXT: psubq %xmm0, %xmm1 -; SSE42-NEXT: movdqa %xmm2, %xmm0 -; SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 -; SSE42-NEXT: paddq %xmm1, %xmm2 -; SSE42-NEXT: movdqa %xmm2, %xmm0 +; SSE42-NEXT: psubq %xmm0, %xmm3 +; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; SSE42-NEXT: pxor %xmm4, %xmm1 +; SSE42-NEXT: pxor %xmm4, %xmm0 +; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 +; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 +; SSE42-NEXT: paddq %xmm3, %xmm0 ; SSE42-NEXT: retq ; ; AVX1-LABEL: abd_cmp_v2i64_multiuse_cmp: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: # xmm2 = mem[0,0] -; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3 -; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm3 -; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm3 +; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: # xmm4 = mem[0,0] +; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm1 +; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: abd_cmp_v2i64_multiuse_cmp: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2 -; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm3 -; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0 -; AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm3 +; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %xmm4, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm4, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm1 +; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: abd_cmp_v2i64_multiuse_cmp: diff --git a/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll b/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll index 017024c173c3f..b2cb2c3e04b3f 100644 --- a/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll +++ b/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll @@ -52,10 +52,7 @@ alloca_21: define i32 @kmovrk_1(<4 x ptr> %arg) { ; AVX512-LABEL: kmovrk_1: ; AVX512: # %bb.0: # %bb -; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0] -; AVX512-NEXT: kmovw %k0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x93,0xc0] -; AVX512-NEXT: testb $15, %al # encoding: [0xa8,0x0f] +; AVX512-NEXT: vptest %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc0] ; AVX512-NEXT: jne .LBB2_1 # encoding: [0x75,A] ; AVX512-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1 ; AVX512-NEXT: # %bb.2: # %bb3 @@ -66,10 +63,7 @@ define i32 @kmovrk_1(<4 x ptr> %arg) { ; ; AVX512BW-LABEL: kmovrk_1: ; AVX512BW: # %bb.0: # %bb -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0] -; AVX512BW-NEXT: kmovd %k0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x93,0xc0] -; AVX512BW-NEXT: testb $15, %al # encoding: [0xa8,0x0f] +; AVX512BW-NEXT: vptest %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc0] ; AVX512BW-NEXT: jne .LBB2_1 # encoding: [0x75,A] ; AVX512BW-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1 ; AVX512BW-NEXT: # %bb.2: # %bb3 diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll b/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll index 13d1265a249d1..7e48b3719cf0f 100644 --- a/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll @@ -30,13 +30,13 @@ define <64 x i8> @add_v64i8_broadcasts(<64 x i8> %a0, i64 %a1, i8 %a2) { ; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 -; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm3 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 +; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm3 ; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm4 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm4 +; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm3 ; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 ; AVX512F-NEXT: vpternlogq $226, %zmm4, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll index 8d2bb77a9e1af..5d7bf4a2c9788 100644 --- a/llvm/test/CodeGen/X86/combine-mul.ll +++ b/llvm/test/CodeGen/X86/combine-mul.ll @@ -80,13 +80,13 @@ define <4 x i32> @combine_vec_mul_pow2b(<4 x i32> %x) { define <4 x i64> @combine_vec_mul_pow2c(<4 x i64> %x) { ; SSE-LABEL: combine_vec_mul_pow2c: ; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm2 +; SSE-NEXT: paddq %xmm0, %xmm2 +; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; SSE-NEXT: movdqa %xmm1, %xmm2 ; SSE-NEXT: psllq $4, %xmm2 ; SSE-NEXT: psllq $2, %xmm1 ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: paddq %xmm0, %xmm2 -; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_mul_pow2c: @@ -399,14 +399,12 @@ define i64 @combine_mul_self_demandedbits(i64 %x) { ; SSE: # %bb.0: ; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: imulq %rdi, %rax -; SSE-NEXT: andq $-3, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: combine_mul_self_demandedbits: ; AVX: # %bb.0: ; AVX-NEXT: movq %rdi, %rax ; AVX-NEXT: imulq %rdi, %rax -; AVX-NEXT: andq $-3, %rax ; AVX-NEXT: retq %1 = mul i64 %x, %x %2 = and i64 %1, -3 diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll index e12ca56023a7f..bf7c1c00c71df 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll @@ -178,15 +178,15 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: subl $152, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarl $31, %eax -; X86-NEXT: movl %edx, %edi -; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %ebp, %edx +; X86-NEXT: sarl $31, %edx ; X86-NEXT: movl %eax, %esi ; X86-NEXT: xorl %ecx, %esi -; X86-NEXT: movl %esi, %ebp +; X86-NEXT: movl %esi, %edi ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %ebx @@ -195,67 +195,66 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %eax, %esi ; X86-NEXT: xorl {{[0-9]+}}(%esp), %esi ; X86-NEXT: subl %eax, %esi -; X86-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %eax, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %eax, %ebp -; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, %esi -; X86-NEXT: xorl %edx, %esi -; X86-NEXT: movl %edi, %edx -; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edi, %ebx +; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %edi +; X86-NEXT: xorl %ebp, %edi +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: xorl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %edi, %ebp +; X86-NEXT: movl %edx, %esi +; X86-NEXT: xorl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %edx, %ebp ; X86-NEXT: xorl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: subl %edi, %ebp -; X86-NEXT: sbbl %edi, %ebx -; X86-NEXT: sbbl %edi, %edx -; X86-NEXT: sbbl %edi, %esi -; X86-NEXT: xorl %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: orl %esi, %eax +; X86-NEXT: subl %edx, %ebp +; X86-NEXT: sbbl %edx, %esi +; X86-NEXT: sbbl %edx, %ebx +; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: orl %edi, %eax ; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: movl %edx, %edi +; X86-NEXT: orl %ebx, %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: sete %cl ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: orl (%esp), %edx # 4-byte Folded Reload ; X86-NEXT: orl %eax, %edx ; X86-NEXT: sete %al ; X86-NEXT: orb %cl, %al ; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: bsrl %esi, %edx +; X86-NEXT: bsrl %edi, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: bsrl %edi, %ecx +; X86-NEXT: bsrl %ebx, %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: addl $32, %ecx -; X86-NEXT: testl %esi, %esi +; X86-NEXT: testl %edi, %edi ; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: bsrl %ebx, %edx +; X86-NEXT: bsrl %esi, %edx ; X86-NEXT: xorl $31, %edx ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: bsrl %ebp, %ebp ; X86-NEXT: xorl $31, %ebp ; X86-NEXT: addl $32, %ebp -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testl %esi, %esi ; X86-NEXT: cmovnel %edx, %ebp ; X86-NEXT: addl $64, %ebp +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %esi, %edi +; X86-NEXT: orl %edi, %ebx ; X86-NEXT: cmovnel %ecx, %ebp ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: bsrl %edi, %edx ; X86-NEXT: xorl $31, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NEXT: bsrl %eax, %ecx ; X86-NEXT: xorl $31, %ecx ; X86-NEXT: addl $32, %ecx @@ -264,7 +263,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: bsrl %ebx, %esi ; X86-NEXT: xorl $31, %esi -; X86-NEXT: bsrl (%esp), %edx # 4-byte Folded Reload +; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: xorl $31, %edx ; X86-NEXT: addl $32, %edx ; X86-NEXT: testl %ebx, %ebx @@ -272,52 +271,56 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: addl $64, %edx ; X86-NEXT: movl %eax, %esi ; X86-NEXT: orl %edi, %esi +; X86-NEXT: movl %edi, %ebx ; X86-NEXT: cmovnel %ecx, %edx ; X86-NEXT: xorl %esi, %esi ; X86-NEXT: subl %edx, %ebp -; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %ebx, %ebx ; X86-NEXT: movl $0, %edx ; X86-NEXT: sbbl %edx, %edx ; X86-NEXT: movl $0, %eax ; X86-NEXT: sbbl %eax, %eax +; X86-NEXT: movl $0, %edi +; X86-NEXT: sbbl %edi, %edi ; X86-NEXT: movl $127, %ecx ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: cmpl %ebp, %ecx ; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %ebx, %ecx -; X86-NEXT: movl $0, %ecx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %edx, %ecx ; X86-NEXT: movl $0, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %edi, %ecx ; X86-NEXT: setb %cl ; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload +; X86-NEXT: movl %ebx, %edi ; X86-NEXT: cmovnel %esi, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NEXT: cmovnel %esi, %edx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: cmovnel %esi, %eax -; X86-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: jne .LBB4_8 -; X86-NEXT: # %bb.1: # %_udiv-special-cases -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: xorl $127, %ebx -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: jne .LBB4_1 +; X86-NEXT: # %bb.8: # %_udiv-special-cases +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: xorl $127, %ebp +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: orl %ebx, %ecx -; X86-NEXT: je .LBB4_8 -; X86-NEXT: # %bb.2: # %udiv-bb1 -; X86-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: orl %ebp, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: je .LBB4_9 +; X86-NEXT: # %bb.5: # %udiv-bb1 ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -332,78 +335,82 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $15, %al ; X86-NEXT: negb %al -; X86-NEXT: movsbl %al, %ebx -; X86-NEXT: movl 144(%esp,%ebx), %edx -; X86-NEXT: movl 148(%esp,%ebx), %edi +; X86-NEXT: movsbl %al, %edi +; X86-NEXT: movl 144(%esp,%edi), %edx +; X86-NEXT: movl 148(%esp,%edi), %esi ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shldl %cl, %edx, %edi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: shll %cl, %edx ; X86-NEXT: notb %cl -; X86-NEXT: movl 140(%esp,%ebx), %eax +; X86-NEXT: movl 140(%esp,%edi), %eax ; X86-NEXT: movl %eax, %esi ; X86-NEXT: shrl %esi ; X86-NEXT: shrl %cl, %esi ; X86-NEXT: orl %edx, %esi ; X86-NEXT: movl %esi, %edx -; X86-NEXT: movl 136(%esp,%ebx), %esi +; X86-NEXT: movl 136(%esp,%edi), %esi ; X86-NEXT: movb %ch, %cl ; X86-NEXT: shldl %cl, %esi, %eax ; X86-NEXT: shll %cl, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: adcl $0, %edi ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: jae .LBB4_3 +; X86-NEXT: jae .LBB4_2 ; X86-NEXT: # %bb.6: -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: xorl %esi, %esi +; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: jmp .LBB4_7 -; X86-NEXT: .LBB4_3: # %udiv-preheader -; X86-NEXT: movl (%esp), %esi # 4-byte Reload -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: .LBB4_1: +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: jmp .LBB4_9 +; X86-NEXT: .LBB4_2: # %udiv-preheader +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movb %bl, %ch ; X86-NEXT: andb $7, %ch ; X86-NEXT: movb %bl, %cl ; X86-NEXT: shrb $3, %cl ; X86-NEXT: andb $15, %cl -; X86-NEXT: movzbl %cl, %ebp -; X86-NEXT: movl 100(%esp,%ebp), %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 96(%esp,%ebp), %ebx -; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movzbl %cl, %ebx +; X86-NEXT: movl 100(%esp,%ebx), %ebp +; X86-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NEXT: movl 96(%esp,%ebx), %edi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %edx +; X86-NEXT: movl %edi, %edx ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrdl %cl, %esi, %edx -; X86-NEXT: movl 88(%esp,%ebp), %ebp -; X86-NEXT: movl 92(%esp,%eax), %esi -; X86-NEXT: movl %esi, %eax +; X86-NEXT: shrdl %cl, %ebp, %edx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 88(%esp,%ebx), %edx +; X86-NEXT: movl 92(%esp,%ebx), %ebx +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: shrl %cl, %eax ; X86-NEXT: notb %cl -; X86-NEXT: addl %ebx, %ebx -; X86-NEXT: shll %cl, %ebx -; X86-NEXT: orl %eax, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %edi, %edi +; X86-NEXT: shll %cl, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: shrdl %cl, %esi, %ebp +; X86-NEXT: shrl %cl, (%esp) # 4-byte Folded Spill +; X86-NEXT: shrdl %cl, %ebx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: addl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -413,113 +420,109 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: xorl %esi, %esi ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB4_4: # %udiv-do-while +; X86-NEXT: .LBB4_3: # %udiv-do-while ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: shldl $1, %edx, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: shldl $1, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl %ebp, %edx +; X86-NEXT: shldl $1, %ebp, (%esp) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X86-NEXT: shldl $1, %ebp, %edx -; X86-NEXT: shldl $1, %edi, %ebp -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: shldl $1, %eax, %edi -; X86-NEXT: orl %esi, %edi -; X86-NEXT: movl %edi, (%esp) # 4-byte Spill -; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl $1, %ebx, %ebp +; X86-NEXT: shldl $1, %edi, %ebx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shldl $1, %ecx, %eax -; X86-NEXT: orl %esi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $1, %ecx, %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: shldl $1, %eax, %ecx -; X86-NEXT: orl %esi, %ecx +; X86-NEXT: orl %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shldl $1, %edi, %ecx +; X86-NEXT: orl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %eax, %eax -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: shldl $1, %esi, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl %esi, %esi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %edx, %ecx +; X86-NEXT: sbbl %ebp, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: sbbl %edx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %ebx, %ecx +; X86-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload ; X86-NEXT: sarl $31, %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %ecx, %esi -; X86-NEXT: andl %edi, %esi +; X86-NEXT: andl $1, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: movl %ecx, %edi ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: subl %ecx, %ebp -; X86-NEXT: sbbl %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: sbbl %edi, %edx -; X86-NEXT: movl (%esp), %edi # 4-byte Reload -; X86-NEXT: sbbl %esi, %ebx +; X86-NEXT: subl %ecx, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: sbbl %eax, %ebp +; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: sbbl %esi, (%esp) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: addl $-1, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $-1, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: adcl $-1, %esi -; X86-NEXT: adcl $-1, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: adcl $-1, %edi +; X86-NEXT: adcl $-1, %edx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ebx, %eax +; X86-NEXT: orl %edx, %eax ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edi, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: orl %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: jne .LBB4_4 -; X86-NEXT: # %bb.5: +; X86-NEXT: jne .LBB4_3 +; X86-NEXT: # %bb.4: +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: .LBB4_7: # %udiv-loop-exit +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: shldl $1, %edx, %edi -; X86-NEXT: orl %esi, %edi +; X86-NEXT: orl %ecx, %edi ; X86-NEXT: shldl $1, %eax, %edx -; X86-NEXT: orl %esi, %edx -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: orl %ecx, %edx ; X86-NEXT: shldl $1, %esi, %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: addl %esi, %esi -; X86-NEXT: orl %ebx, %esi -; X86-NEXT: .LBB4_8: # %udiv-end -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: xorl %ecx, %edi -; X86-NEXT: xorl %ecx, %edx -; X86-NEXT: xorl %ecx, %eax -; X86-NEXT: xorl %ecx, %esi -; X86-NEXT: subl %ecx, %esi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: .LBB4_9: # %udiv-end +; X86-NEXT: xorl %ebx, %edi +; X86-NEXT: xorl %ebx, %edx +; X86-NEXT: xorl %ebx, %eax +; X86-NEXT: xorl %ebx, %esi +; X86-NEXT: subl %ebx, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %ecx, %eax +; X86-NEXT: sbbl %ebx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %ecx, %edx -; X86-NEXT: sbbl %ecx, %edi -; X86-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-NEXT: sbbl %ebx, %edx +; X86-NEXT: sbbl %ebx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %esi, (%ebp) ; X86-NEXT: movl %eax, 4(%ebp) ; X86-NEXT: movl %edx, 8(%ebp) @@ -532,7 +535,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull %edi -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl %edx, %edi ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: adcl $0, %ecx @@ -553,10 +556,10 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: adcl %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: imull %eax, %ecx ; X86-NEXT: mull %ebx -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: imull {{[0-9]+}}(%esp), %ebx ; X86-NEXT: addl %edx, %ebx ; X86-NEXT: addl %ecx, %ebx @@ -568,12 +571,12 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: mull %edx ; X86-NEXT: addl %edx, %ebp ; X86-NEXT: addl %ecx, %ebp -; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: adcl %ebx, %ebp ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: subl (%esp), %edx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi diff --git a/llvm/test/CodeGen/X86/fold-masked-merge.ll b/llvm/test/CodeGen/X86/fold-masked-merge.ll index 135494ac25f8c..b2614c5fe0493 100644 --- a/llvm/test/CodeGen/X86/fold-masked-merge.ll +++ b/llvm/test/CodeGen/X86/fold-masked-merge.ll @@ -56,9 +56,7 @@ define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) { ; NOBMI-LABEL: masked_merge2: ; NOBMI: # %bb.0: ; NOBMI-NEXT: movl %esi, %eax -; NOBMI-NEXT: xorb %sil, %al -; NOBMI-NEXT: andb %dil, %al -; NOBMI-NEXT: xorb %sil, %al +; NOBMI-NEXT: # kill: def $al killed $al killed $eax ; NOBMI-NEXT: retq ; ; BMI-LABEL: masked_merge2: diff --git a/llvm/test/CodeGen/X86/freeze-combine.ll b/llvm/test/CodeGen/X86/freeze-combine.ll index b037a6d9a1b93..1cfb8627a4dd4 100644 --- a/llvm/test/CodeGen/X86/freeze-combine.ll +++ b/llvm/test/CodeGen/X86/freeze-combine.ll @@ -3,9 +3,9 @@ define i32 @const() { ; CHECK-LABEL: name: const ; CHECK: bb.0 (%ir-block.0): - ; CHECK: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri 1 - ; CHECK: $eax = COPY [[MOV32ri]] - ; CHECK: RET 0, $eax + ; CHECK-NEXT: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri 1 + ; CHECK-NEXT: $eax = COPY [[MOV32ri]] + ; CHECK-NEXT: RET 0, $eax %y = freeze i32 1 ret i32 %y } @@ -13,11 +13,11 @@ define i32 @const() { define i32 @fold(i32 %x) { ; CHECK-LABEL: name: fold ; CHECK: bb.0 (%ir-block.0): - ; CHECK: liveins: $edi - ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi - ; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY [[COPY]] - ; CHECK: $eax = COPY [[COPY1]] - ; CHECK: RET 0, $eax + ; CHECK-NEXT: liveins: $edi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: $eax = COPY [[COPY]] + ; CHECK-NEXT: RET 0, $eax %y = freeze i32 %x %z = freeze i32 %y ret i32 %z diff --git a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll index 0c341dc63a9ec..afe0ebb9dcb4f 100644 --- a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll +++ b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll @@ -522,17 +522,17 @@ declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) define <16 x i8> @splatconstant_fshr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; GFNISSE-LABEL: splatconstant_fshr_v16i8: ; GFNISSE: # %bb.0: +; GFNISSE-NEXT: paddb %xmm0, %xmm0 ; GFNISSE-NEXT: psrlw $7, %xmm1 ; GFNISSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; GFNISSE-NEXT: paddb %xmm0, %xmm0 ; GFNISSE-NEXT: por %xmm1, %xmm0 ; GFNISSE-NEXT: retq ; ; GFNIAVX1OR2-LABEL: splatconstant_fshr_v16i8: ; GFNIAVX1OR2: # %bb.0: +; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: vpsrlw $7, %xmm1, %xmm1 ; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/gfni-rotates.ll b/llvm/test/CodeGen/X86/gfni-rotates.ll index 7ab8300b269a4..96aff5b2af315 100644 --- a/llvm/test/CodeGen/X86/gfni-rotates.ll +++ b/llvm/test/CodeGen/X86/gfni-rotates.ll @@ -421,18 +421,18 @@ define <16 x i8> @splatconstant_rotr_v16i8(<16 x i8> %a) nounwind { ; GFNISSE-LABEL: splatconstant_rotr_v16i8: ; GFNISSE: # %bb.0: ; GFNISSE-NEXT: movdqa %xmm0, %xmm1 -; GFNISSE-NEXT: psrlw $7, %xmm1 -; GFNISSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; GFNISSE-NEXT: paddb %xmm0, %xmm0 +; GFNISSE-NEXT: paddb %xmm0, %xmm1 +; GFNISSE-NEXT: psrlw $7, %xmm0 +; GFNISSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; GFNISSE-NEXT: por %xmm1, %xmm0 ; GFNISSE-NEXT: retq ; ; GFNIAVX1OR2-LABEL: splatconstant_rotr_v16i8: ; GFNIAVX1OR2: # %bb.0: -; GFNIAVX1OR2-NEXT: vpsrlw $7, %xmm0, %xmm1 -; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm0, %xmm0 -; GFNIAVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm0, %xmm1 +; GFNIAVX1OR2-NEXT: vpsrlw $7, %xmm0, %xmm0 +; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1OR2-NEXT: vpor %xmm0, %xmm1, %xmm0 ; GFNIAVX1OR2-NEXT: retq ; ; GFNIAVX512-LABEL: splatconstant_rotr_v16i8: diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll index 39d02f9112f4f..2f780e3c6fe1f 100644 --- a/llvm/test/CodeGen/X86/known-never-zero.ll +++ b/llvm/test/CodeGen/X86/known-never-zero.ll @@ -676,12 +676,13 @@ define i32 @rotr_known_nonzero(i32 %xx, i32 %y) { ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: orl $256, %edi # imm = 0x100 +; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rorl %cl, %edi +; X64-NEXT: rorl %cl, %eax ; X64-NEXT: testl %edi, %edi ; X64-NEXT: je .LBB22_1 ; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %edi, %eax +; X64-NEXT: rep bsfl %eax, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB22_1: ; X64-NEXT: movl $32, %eax @@ -713,12 +714,13 @@ define i32 @rotr_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: rotr_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rorl %cl, %edi +; X64-NEXT: rorl %cl, %eax ; X64-NEXT: testl %edi, %edi ; X64-NEXT: je .LBB23_1 ; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %edi, %eax +; X64-NEXT: rep bsfl %eax, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB23_1: ; X64-NEXT: movl $32, %eax @@ -773,12 +775,13 @@ define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: rotr_with_fshr_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rorl %cl, %edi +; X64-NEXT: rorl %cl, %eax ; X64-NEXT: testl %edi, %edi ; X64-NEXT: je .LBB25_1 ; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %edi, %eax +; X64-NEXT: rep bsfl %eax, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB25_1: ; X64-NEXT: movl $32, %eax @@ -808,12 +811,13 @@ define i32 @rotl_known_nonzero(i32 %xx, i32 %y) { ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: orl $256, %edi # imm = 0x100 +; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: roll %cl, %edi +; X64-NEXT: roll %cl, %eax ; X64-NEXT: testl %edi, %edi ; X64-NEXT: je .LBB26_1 ; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %edi, %eax +; X64-NEXT: rep bsfl %eax, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB26_1: ; X64-NEXT: movl $32, %eax @@ -845,12 +849,13 @@ define i32 @rotl_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: rotl_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: roll %cl, %edi +; X64-NEXT: roll %cl, %eax ; X64-NEXT: testl %edi, %edi ; X64-NEXT: je .LBB27_1 ; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %edi, %eax +; X64-NEXT: rep bsfl %eax, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB27_1: ; X64-NEXT: movl $32, %eax @@ -905,12 +910,13 @@ define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) { ; X64-LABEL: rotl_with_fshl_maybe_zero: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: roll %cl, %edi +; X64-NEXT: roll %cl, %eax ; X64-NEXT: testl %edi, %edi ; X64-NEXT: je .LBB29_1 ; X64-NEXT: # %bb.2: # %cond.false -; X64-NEXT: rep bsfl %edi, %eax +; X64-NEXT: rep bsfl %eax, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB29_1: ; X64-NEXT: movl $32, %eax diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll index d3cced3233ea6..5a6375e08bcaf 100644 --- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll @@ -893,27 +893,26 @@ define <2 x i64> @vec128_i64_signed_reg_reg(<2 x i64> %a1, <2 x i64> %a2) nounwi ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] ; SSE41-NEXT: pand %xmm5, %xmm3 -; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3] -; SSE41-NEXT: por %xmm3, %xmm6 -; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 -; SSE41-NEXT: pand %xmm5, %xmm0 -; SSE41-NEXT: por %xmm4, %xmm0 -; SSE41-NEXT: movdqa %xmm2, %xmm3 -; SSE41-NEXT: psubq %xmm1, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: pmovsxbq {{.*#+}} xmm3 = [1,1] +; SSE41-NEXT: por %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: psubq %xmm1, %xmm4 ; SSE41-NEXT: psubq %xmm2, %xmm1 -; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 ; SSE41-NEXT: movapd %xmm1, %xmm0 ; SSE41-NEXT: psrlq $1, %xmm0 ; SSE41-NEXT: psrlq $33, %xmm1 -; SSE41-NEXT: pmuludq %xmm6, %xmm1 -; SSE41-NEXT: movdqa %xmm6, %xmm3 -; SSE41-NEXT: psrlq $32, %xmm3 -; SSE41-NEXT: pmuludq %xmm0, %xmm3 -; SSE41-NEXT: paddq %xmm1, %xmm3 -; SSE41-NEXT: psllq $32, %xmm3 -; SSE41-NEXT: pmuludq %xmm6, %xmm0 +; SSE41-NEXT: pmuludq %xmm3, %xmm1 +; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: psrlq $32, %xmm4 +; SSE41-NEXT: pmuludq %xmm0, %xmm4 +; SSE41-NEXT: paddq %xmm1, %xmm4 +; SSE41-NEXT: psllq $32, %xmm4 +; SSE41-NEXT: pmuludq %xmm3, %xmm0 ; SSE41-NEXT: paddq %xmm2, %xmm0 -; SSE41-NEXT: paddq %xmm3, %xmm0 +; SSE41-NEXT: paddq %xmm4, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: vec128_i64_signed_reg_reg: @@ -1077,27 +1076,26 @@ define <2 x i64> @vec128_i64_unsigned_reg_reg(<2 x i64> %a1, <2 x i64> %a2) noun ; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] ; SSE41-NEXT: pand %xmm5, %xmm3 -; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3] -; SSE41-NEXT: por %xmm3, %xmm6 -; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 -; SSE41-NEXT: pand %xmm5, %xmm0 -; SSE41-NEXT: por %xmm4, %xmm0 -; SSE41-NEXT: movdqa %xmm2, %xmm3 -; SSE41-NEXT: psubq %xmm1, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] +; SSE41-NEXT: por %xmm3, %xmm0 +; SSE41-NEXT: pmovsxbq {{.*#+}} xmm3 = [1,1] +; SSE41-NEXT: por %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm2, %xmm4 +; SSE41-NEXT: psubq %xmm1, %xmm4 ; SSE41-NEXT: psubq %xmm2, %xmm1 -; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 ; SSE41-NEXT: movapd %xmm1, %xmm0 ; SSE41-NEXT: psrlq $1, %xmm0 ; SSE41-NEXT: psrlq $33, %xmm1 -; SSE41-NEXT: pmuludq %xmm6, %xmm1 -; SSE41-NEXT: movdqa %xmm6, %xmm3 -; SSE41-NEXT: psrlq $32, %xmm3 -; SSE41-NEXT: pmuludq %xmm0, %xmm3 -; SSE41-NEXT: paddq %xmm1, %xmm3 -; SSE41-NEXT: psllq $32, %xmm3 -; SSE41-NEXT: pmuludq %xmm6, %xmm0 +; SSE41-NEXT: pmuludq %xmm3, %xmm1 +; SSE41-NEXT: movdqa %xmm3, %xmm4 +; SSE41-NEXT: psrlq $32, %xmm4 +; SSE41-NEXT: pmuludq %xmm0, %xmm4 +; SSE41-NEXT: paddq %xmm1, %xmm4 +; SSE41-NEXT: psllq $32, %xmm4 +; SSE41-NEXT: pmuludq %xmm3, %xmm0 ; SSE41-NEXT: paddq %xmm2, %xmm0 -; SSE41-NEXT: paddq %xmm3, %xmm0 +; SSE41-NEXT: paddq %xmm4, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: vec128_i64_unsigned_reg_reg: @@ -1993,14 +1991,14 @@ define <8 x i16> @vec128_i16_unsigned_reg_reg(<8 x i16> %a1, <8 x i16> %a2) noun ; ; AVX512VL-FALLBACK-LABEL: vec128_i16_unsigned_reg_reg: ; AVX512VL-FALLBACK: # %bb.0: -; AVX512VL-FALLBACK-NEXT: vpminuw %xmm1, %xmm0, %xmm2 -; AVX512VL-FALLBACK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 -; AVX512VL-FALLBACK-NEXT: vpsubw %xmm2, %xmm1, %xmm1 -; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX512VL-FALLBACK-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2 -; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %xmm2, %xmm2, %xmm2 -; AVX512VL-FALLBACK-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX512VL-FALLBACK-NEXT: vpsubw %xmm2, %xmm1, %xmm1 +; AVX512VL-FALLBACK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 +; AVX512VL-FALLBACK-NEXT: vpminuw %xmm1, %xmm0, %xmm1 +; AVX512VL-FALLBACK-NEXT: vpsubw %xmm1, %xmm2, %xmm2 +; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm2, %xmm2 +; AVX512VL-FALLBACK-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 +; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1 +; AVX512VL-FALLBACK-NEXT: vpxor %xmm1, %xmm2, %xmm2 +; AVX512VL-FALLBACK-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512VL-FALLBACK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ; AVX512VL-FALLBACK-NEXT: retq ; @@ -2786,14 +2784,14 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw ; ; AVX512VL-FALLBACK-LABEL: vec128_i8_unsigned_reg_reg: ; AVX512VL-FALLBACK: # %bb.0: -; AVX512VL-FALLBACK-NEXT: vpminub %xmm1, %xmm0, %xmm2 -; AVX512VL-FALLBACK-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 -; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm1, %xmm1 -; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX512VL-FALLBACK-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2 -; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %xmm2, %xmm2, %xmm2 -; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1 -; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm1, %xmm1 +; AVX512VL-FALLBACK-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 +; AVX512VL-FALLBACK-NEXT: vpminub %xmm1, %xmm0, %xmm1 +; AVX512VL-FALLBACK-NEXT: vpsubb %xmm1, %xmm2, %xmm2 +; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm2, %xmm2 +; AVX512VL-FALLBACK-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 +; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1 +; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm2 +; AVX512VL-FALLBACK-NEXT: vpsubb %xmm1, %xmm2, %xmm1 ; AVX512VL-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX512VL-FALLBACK-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll index cc08396ae8c78..e880a1acc9e83 100644 --- a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll +++ b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll @@ -1445,14 +1445,14 @@ define <16 x i16> @vec256_i16_unsigned_reg_reg(<16 x i16> %a1, <16 x i16> %a2) n ; ; AVX512VL-FALLBACK-LABEL: vec256_i16_unsigned_reg_reg: ; AVX512VL-FALLBACK: # %bb.0: -; AVX512VL-FALLBACK-NEXT: vpminuw %ymm1, %ymm0, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm2, %ymm2, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpminuw %ymm1, %ymm0, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm1, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpxor %ymm1, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm2, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ; AVX512VL-FALLBACK-NEXT: retq ; @@ -2210,14 +2210,14 @@ define <32 x i8> @vec256_i8_unsigned_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounw ; ; AVX512VL-FALLBACK-LABEL: vec256_i8_unsigned_reg_reg: ; AVX512VL-FALLBACK: # %bb.0: -; AVX512VL-FALLBACK-NEXT: vpminub %ymm1, %ymm0, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpmaxub %ymm1, %ymm0, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm2, %ymm2, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpmaxub %ymm1, %ymm0, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpminub %ymm1, %ymm0, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm1, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm2, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512VL-FALLBACK-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/pr38539.ll b/llvm/test/CodeGen/X86/pr38539.ll index 04aff9b7d2e58..ace78b38d53ed 100644 --- a/llvm/test/CodeGen/X86/pr38539.ll +++ b/llvm/test/CodeGen/X86/pr38539.ll @@ -22,7 +22,7 @@ define void @f() nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $176, %esp +; X86-NEXT: subl $160, %esp ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -47,55 +47,54 @@ define void @f() nounwind { ; X86-NEXT: testl %edi, %edi ; X86-NEXT: jne .LBB0_1 ; X86-NEXT: # %bb.2: # %BB_udiv-special-cases -; X86-NEXT: bsrl %esi, %ecx -; X86-NEXT: xorl $31, %ecx -; X86-NEXT: addl $32, %ecx +; X86-NEXT: bsrl %esi, %eax +; X86-NEXT: xorl $31, %eax +; X86-NEXT: addl $32, %eax ; X86-NEXT: jmp .LBB0_3 ; X86-NEXT: .LBB0_1: -; X86-NEXT: bsrl %edi, %ecx -; X86-NEXT: xorl $31, %ecx +; X86-NEXT: bsrl %edi, %eax +; X86-NEXT: xorl $31, %eax ; X86-NEXT: .LBB0_3: # %BB_udiv-special-cases -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: testl %edx, %edx -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB0_4 ; X86-NEXT: # %bb.5: # %BB_udiv-special-cases -; X86-NEXT: addl $64, %ecx +; X86-NEXT: addl $64, %eax ; X86-NEXT: jmp .LBB0_6 ; X86-NEXT: .LBB0_4: -; X86-NEXT: bsrl %edx, %ecx -; X86-NEXT: xorl $31, %ecx -; X86-NEXT: addl $32, %ecx +; X86-NEXT: bsrl %edx, %eax +; X86-NEXT: xorl $31, %eax +; X86-NEXT: addl $32, %eax ; X86-NEXT: .LBB0_6: # %BB_udiv-special-cases -; X86-NEXT: subl $62, %ecx +; X86-NEXT: subl $62, %eax ; X86-NEXT: movl $0, %ebx ; X86-NEXT: sbbl %ebx, %ebx -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: addl $-66, %ecx +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: addl $-66, %eax ; X86-NEXT: adcl $-1, %ebx -; X86-NEXT: adcl $3, %eax -; X86-NEXT: movl %eax, %edi -; X86-NEXT: movb $1, %al -; X86-NEXT: testb %al, %al +; X86-NEXT: adcl $3, %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movb $1, %cl +; X86-NEXT: testb %cl, %cl ; X86-NEXT: jne .LBB0_11 ; X86-NEXT: # %bb.7: # %BB_udiv-special-cases -; X86-NEXT: andl $3, %edi -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: xorl $65, %eax -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %edi, %eax -; X86-NEXT: orl %ebx, %eax +; X86-NEXT: andl $3, %esi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: xorl $65, %ecx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %esi, %ecx +; X86-NEXT: orl %ebx, %ecx ; X86-NEXT: je .LBB0_11 ; X86-NEXT: # %bb.8: # %udiv-bb1 -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: addl $1, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: adcl $0, %esi -; X86-NEXT: andl $3, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: andl $3, %ebx ; X86-NEXT: movb $65, %cl ; X86-NEXT: subb %al, %cl ; X86-NEXT: movb %cl, %ch @@ -112,29 +111,31 @@ define void @f() nounwind { ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 136(%esp,%eax), %edx +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 120(%esp,%eax), %edi ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shll %cl, %edx +; X86-NEXT: shll %cl, %edi ; X86-NEXT: notb %cl -; X86-NEXT: movl 128(%esp,%eax), %edi -; X86-NEXT: movl 132(%esp,%eax), %esi -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl 112(%esp,%eax), %esi +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 116(%esp,%eax), %edx +; X86-NEXT: movl %edx, %eax ; X86-NEXT: shrl %eax ; X86-NEXT: shrl %cl, %eax ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shll %cl, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll %cl, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: orl %ebx, %ecx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: je .LBB0_11 ; X86-NEXT: # %bb.9: # %udiv-preheader -; X86-NEXT: orl %eax, %edx -; X86-NEXT: andl $3, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %eax, %edi +; X86-NEXT: andl $3, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -149,20 +150,20 @@ define void @f() nounwind { ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $15, %al -; X86-NEXT: movzbl %al, %esi -; X86-NEXT: movl 80(%esp,%esi), %edx -; X86-NEXT: movl 84(%esp,%esi), %eax -; X86-NEXT: movl %eax, %edi +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: movl 64(%esp,%eax), %edi +; X86-NEXT: movl 68(%esp,%eax), %edx +; X86-NEXT: movl %edx, %esi ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrl %cl, %edi +; X86-NEXT: shrl %cl, %esi ; X86-NEXT: notb %cl -; X86-NEXT: movl 88(%esp,%esi), %esi -; X86-NEXT: addl %esi, %esi -; X86-NEXT: shll %cl, %esi -; X86-NEXT: orl %edi, %esi +; X86-NEXT: movl 72(%esp,%eax), %ebx +; X86-NEXT: addl %ebx, %ebx +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: orl %esi, %ebx ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrdl %cl, %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shrdl %cl, %edx, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: addl $-1, %eax @@ -179,63 +180,62 @@ define void @f() nounwind { ; X86-NEXT: .p2align 4, 0x90 ; X86-NEXT: .LBB0_10: # %udiv-do-while ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shldl $1, %esi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: shldl $1, %edx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: shldl $1, %ebx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: andl $2, %eax -; X86-NEXT: shrl %eax -; X86-NEXT: leal (%eax,%edx,2), %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: shldl $1, %edi, %ebx +; X86-NEXT: shldl $1, %ebx, %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: orl %esi, %ebx +; X86-NEXT: movl %esi, %edx +; X86-NEXT: andl $2, %edx +; X86-NEXT: shrl %edx +; X86-NEXT: leal (%edx,%ebx,2), %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: shldl $1, %edx, %esi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: shldl $1, %eax, %edi -; X86-NEXT: orl %esi, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $1, %eax, %edx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %eax, %eax ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl $3, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: sbbl %esi, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: sbbl %ecx, %ebx -; X86-NEXT: shll $30, %ebx -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: sarl $30, %eax -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: shrdl $1, %ebx, %eax -; X86-NEXT: movl %eax, %edi -; X86-NEXT: andl $1, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl $3, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: sbbl %ecx, %esi +; X86-NEXT: shll $30, %esi +; X86-NEXT: movl %esi, %edx +; X86-NEXT: sarl $30, %edx +; X86-NEXT: sarl $31, %esi +; X86-NEXT: shrdl $1, %esi, %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %esi, %eax ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: subl %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %ebx, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: sbbl %edi, %ecx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: subl %edx, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: sbbl %eax, %ecx ; X86-NEXT: andl $3, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: addl $-1, %eax -; X86-NEXT: adcl $-1, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: adcl $3, %edi -; X86-NEXT: andl $3, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: adcl $-1, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: adcl $3, %esi +; X86-NEXT: andl $3, %esi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %edi, %eax -; X86-NEXT: orl %ebx, %eax +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edx, %eax ; X86-NEXT: jne .LBB0_10 ; X86-NEXT: .LBB0_11: # %udiv-end ; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload diff --git a/llvm/test/CodeGen/X86/pr62286.ll b/llvm/test/CodeGen/X86/pr62286.ll index 782c84408f25a..1b13cee628df6 100644 --- a/llvm/test/CodeGen/X86/pr62286.ll +++ b/llvm/test/CodeGen/X86/pr62286.ll @@ -8,21 +8,20 @@ define i64 @PR62286(i32 %a) { ; SSE-LABEL: PR62286: ; SSE: # %bb.0: ; SSE-NEXT: movd %edi, %xmm0 -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,0] -; SSE-NEXT: paddd %xmm1, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,1,0] +; SSE-NEXT: paddd %xmm0, %xmm0 ; SSE-NEXT: pxor %xmm2, %xmm2 -; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE-NEXT: pxor %xmm3, %xmm3 -; SSE-NEXT: pcmpgtd %xmm1, %xmm3 -; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0] -; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE-NEXT: paddq %xmm1, %xmm0 -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; SSE-NEXT: pxor %xmm3, %xmm3 +; SSE-NEXT: pcmpgtd %xmm0, %xmm3 +; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pcmpgtd %xmm1, %xmm2 +; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE-NEXT: paddq %xmm0, %xmm1 -; SSE-NEXT: movq %xmm1, %rax +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE-NEXT: paddq %xmm1, %xmm0 +; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: retq ; ; AVX1-LABEL: PR62286: @@ -47,10 +46,10 @@ define i64 @PR62286(i32 %a) { ; AVX2-LABEL: PR62286: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %edi, %xmm0 -; AVX2-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] -; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm1 +; AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3] +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll index a9f3e8b22fb69..785b97d8c2402 100644 --- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -14,7 +14,6 @@ define i256 @test1(i256 %a) nounwind { ; ILP: # %bb.0: ; ILP-NEXT: movq %rdi, %rax ; ILP-NEXT: leal (%rsi,%rsi), %ecx -; ILP-NEXT: addb $3, %cl ; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) @@ -23,6 +22,7 @@ define i256 @test1(i256 %a) nounwind { ; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) ; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; ILP-NEXT: addb $3, %cl ; ILP-NEXT: movl %ecx, %edx ; ILP-NEXT: shrb $3, %dl ; ILP-NEXT: andb $7, %cl diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll index 31297a06f8099..a1cabb433d879 100644 --- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll @@ -563,18 +563,20 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: subq $120, %rsp ; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3] -; X64-NEXT: psllq $32, %xmm3 +; X64-NEXT: pxor %xmm3, %xmm3 +; X64-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3] ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3] ; X64-NEXT: psrad $31, %xmm2 ; X64-NEXT: psrlq $31, %xmm3 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3] ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: movq %xmm0, %rbx -; X64-NEXT: movq %rbx, %r13 -; X64-NEXT: sarq $63, %r13 -; X64-NEXT: shldq $31, %rbx, %r13 +; X64-NEXT: movq %xmm0, %rbp +; X64-NEXT: movq %rbp, %r14 +; X64-NEXT: sarq $63, %r14 +; X64-NEXT: shldq $31, %rbp, %r14 +; X64-NEXT: movq %rbp, %r15 +; X64-NEXT: shlq $31, %r15 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] ; X64-NEXT: pxor %xmm0, %xmm0 ; X64-NEXT: pcmpgtd %xmm1, %xmm0 @@ -582,113 +584,113 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: movq %xmm1, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: sarq $63, %r15 -; X64-NEXT: movq %rbx, %r12 -; X64-NEXT: shlq $31, %r12 -; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %r13, %rsi -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: sarq $63, %rbx +; X64-NEXT: movq %r15, %rdi +; X64-NEXT: movq %r14, %rsi +; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: callq __divti3@PLT -; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: subq $1, %rbp -; X64-NEXT: sbbq $0, %r14 -; X64-NEXT: shrq $63, %rbx -; X64-NEXT: xorl %r15d, %ebx -; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %r13, %rsi +; X64-NEXT: subq $1, %r13 +; X64-NEXT: sbbq $0, %r12 +; X64-NEXT: movq %r15, %rdi +; X64-NEXT: movq %r14, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al +; X64-NEXT: shrq $63, %rbp +; X64-NEXT: xorl %ebp, %ebx ; X64-NEXT: testb %bl, %al -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF -; X64-NEXT: cmpq %rdx, %rbp -; X64-NEXT: movq %r14, %rax +; X64-NEXT: cmpq %rdx, %r13 +; X64-NEXT: movq %r12, %rax ; X64-NEXT: sbbq $0, %rax -; X64-NEXT: cmovgeq %rcx, %r14 -; X64-NEXT: cmovgeq %rdx, %rbp +; X64-NEXT: cmovgeq %rdx, %r13 +; X64-NEXT: cmovgeq %rcx, %r12 ; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; X64-NEXT: cmpq %rbp, %rcx +; X64-NEXT: cmpq %r13, %rcx ; X64-NEXT: movq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; X64-NEXT: movq $-1, %rax -; X64-NEXT: sbbq %r14, %rax -; X64-NEXT: cmovgeq %rcx, %rbp -; X64-NEXT: movq %rbp, %xmm0 +; X64-NEXT: sbbq %r12, %rax +; X64-NEXT: cmovgeq %rcx, %r13 +; X64-NEXT: movq %r13, %xmm0 ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; X64-NEXT: # xmm0 = mem[2,3,2,3] -; X64-NEXT: movq %xmm0, %rbx -; X64-NEXT: movq %rbx, %r13 -; X64-NEXT: sarq $63, %r13 -; X64-NEXT: shldq $31, %rbx, %r13 +; X64-NEXT: movq %xmm0, %rbp +; X64-NEXT: movq %rbp, %r14 +; X64-NEXT: sarq $63, %r14 +; X64-NEXT: shldq $31, %rbp, %r14 +; X64-NEXT: movq %rbp, %r15 +; X64-NEXT: shlq $31, %r15 ; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; X64-NEXT: # xmm0 = mem[2,3,2,3] ; X64-NEXT: movq %xmm0, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: sarq $63, %r15 -; X64-NEXT: movq %rbx, %r12 -; X64-NEXT: shlq $31, %r12 -; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %r13, %rsi -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: sarq $63, %rbx +; X64-NEXT: movq %r15, %rdi +; X64-NEXT: movq %r14, %rsi +; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: callq __divti3@PLT -; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: subq $1, %rbp -; X64-NEXT: sbbq $0, %r14 -; X64-NEXT: shrq $63, %rbx -; X64-NEXT: xorl %r15d, %ebx -; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %r13, %rsi +; X64-NEXT: subq $1, %r13 +; X64-NEXT: sbbq $0, %r12 +; X64-NEXT: movq %r15, %rdi +; X64-NEXT: movq %r14, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al +; X64-NEXT: shrq $63, %rbp +; X64-NEXT: xorl %ebp, %ebx ; X64-NEXT: testb %bl, %al -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload ; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF -; X64-NEXT: cmpq %rcx, %rbp -; X64-NEXT: movq %r14, %rax +; X64-NEXT: cmpq %rcx, %r13 +; X64-NEXT: movq %r12, %rax ; X64-NEXT: sbbq $0, %rax +; X64-NEXT: cmovgeq %rcx, %r13 ; X64-NEXT: movl $0, %eax -; X64-NEXT: cmovgeq %rax, %r14 -; X64-NEXT: cmovgeq %rcx, %rbp +; X64-NEXT: cmovgeq %rax, %r12 ; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; X64-NEXT: cmpq %rbp, %rcx +; X64-NEXT: cmpq %r13, %rcx ; X64-NEXT: movq $-1, %rax -; X64-NEXT: sbbq %r14, %rax -; X64-NEXT: cmovgeq %rcx, %rbp -; X64-NEXT: movq %rbp, %xmm0 +; X64-NEXT: sbbq %r12, %rax +; X64-NEXT: cmovgeq %rcx, %r13 +; X64-NEXT: movq %r13, %xmm0 ; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; X64-NEXT: psrlq $1, %xmm1 ; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: pshufd $212, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload -; X64-NEXT: # xmm0 = mem[0,1,1,3] -; X64-NEXT: psllq $32, %xmm0 +; X64-NEXT: pxor %xmm0, %xmm0 +; X64-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; X64-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] ; X64-NEXT: psrad $31, %xmm1 ; X64-NEXT: psrlq $31, %xmm0 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: movq %xmm0, %rbx -; X64-NEXT: movq %rbx, %r13 -; X64-NEXT: sarq $63, %r13 -; X64-NEXT: shldq $31, %rbx, %r13 +; X64-NEXT: movq %xmm0, %rbp +; X64-NEXT: movq %rbp, %r14 +; X64-NEXT: sarq $63, %r14 +; X64-NEXT: shldq $31, %rbp, %r14 +; X64-NEXT: movq %rbp, %r15 +; X64-NEXT: shlq $31, %r15 ; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; X64-NEXT: pxor %xmm1, %xmm1 ; X64-NEXT: pcmpgtd %xmm0, %xmm1 @@ -696,94 +698,92 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: movq %xmm0, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: sarq $63, %r15 -; X64-NEXT: movq %rbx, %r12 -; X64-NEXT: shlq $31, %r12 -; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %r13, %rsi -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: sarq $63, %rbx +; X64-NEXT: movq %r15, %rdi +; X64-NEXT: movq %r14, %rsi +; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: callq __divti3@PLT -; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: subq $1, %rbp -; X64-NEXT: sbbq $0, %r14 -; X64-NEXT: shrq $63, %rbx -; X64-NEXT: xorl %r15d, %ebx -; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %r13, %rsi +; X64-NEXT: subq $1, %r13 +; X64-NEXT: sbbq $0, %r12 +; X64-NEXT: movq %r15, %rdi +; X64-NEXT: movq %r14, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al +; X64-NEXT: shrq $63, %rbp +; X64-NEXT: xorl %ebp, %ebx ; X64-NEXT: testb %bl, %al -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload ; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF -; X64-NEXT: cmpq %rcx, %rbp -; X64-NEXT: movq %r14, %rax +; X64-NEXT: cmpq %rcx, %r13 +; X64-NEXT: movq %r12, %rax ; X64-NEXT: sbbq $0, %rax +; X64-NEXT: cmovgeq %rcx, %r13 ; X64-NEXT: movl $0, %eax -; X64-NEXT: cmovgeq %rax, %r14 -; X64-NEXT: cmovgeq %rcx, %rbp +; X64-NEXT: cmovgeq %rax, %r12 ; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; X64-NEXT: cmpq %rbp, %rcx +; X64-NEXT: cmpq %r13, %rcx ; X64-NEXT: movq $-1, %rax -; X64-NEXT: sbbq %r14, %rax -; X64-NEXT: cmovgeq %rcx, %rbp -; X64-NEXT: movq %rbp, %xmm0 +; X64-NEXT: sbbq %r12, %rax +; X64-NEXT: cmovgeq %rcx, %r13 +; X64-NEXT: movq %r13, %xmm0 ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; X64-NEXT: # xmm0 = mem[2,3,2,3] -; X64-NEXT: movq %xmm0, %rbx -; X64-NEXT: movq %rbx, %r13 -; X64-NEXT: sarq $63, %r13 -; X64-NEXT: shldq $31, %rbx, %r13 +; X64-NEXT: movq %xmm0, %rbp +; X64-NEXT: movq %rbp, %r14 +; X64-NEXT: sarq $63, %r14 +; X64-NEXT: shldq $31, %rbp, %r14 +; X64-NEXT: movq %rbp, %r15 +; X64-NEXT: shlq $31, %r15 ; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; X64-NEXT: # xmm0 = mem[2,3,2,3] ; X64-NEXT: movq %xmm0, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: sarq $63, %r15 -; X64-NEXT: movq %rbx, %r12 -; X64-NEXT: shlq $31, %r12 -; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %r13, %rsi -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: sarq $63, %rbx +; X64-NEXT: movq %r15, %rdi +; X64-NEXT: movq %r14, %rsi +; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: callq __divti3@PLT -; X64-NEXT: movq %rax, %rbp +; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: movq %rdx, %r12 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: subq $1, %rbp -; X64-NEXT: sbbq $0, %r14 -; X64-NEXT: shrq $63, %rbx -; X64-NEXT: xorl %r15d, %ebx -; X64-NEXT: movq %r12, %rdi -; X64-NEXT: movq %r13, %rsi +; X64-NEXT: subq $1, %r13 +; X64-NEXT: sbbq $0, %r12 +; X64-NEXT: movq %r15, %rdi +; X64-NEXT: movq %r14, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al +; X64-NEXT: shrq $63, %rbp +; X64-NEXT: xorl %ebp, %ebx ; X64-NEXT: testb %bl, %al -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload ; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF -; X64-NEXT: cmpq %rcx, %rbp -; X64-NEXT: movq %r14, %rax +; X64-NEXT: cmpq %rcx, %r13 +; X64-NEXT: movq %r12, %rax ; X64-NEXT: sbbq $0, %rax +; X64-NEXT: cmovgeq %rcx, %r13 ; X64-NEXT: movl $0, %eax -; X64-NEXT: cmovgeq %rax, %r14 -; X64-NEXT: cmovgeq %rcx, %rbp +; X64-NEXT: cmovgeq %rax, %r12 ; X64-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 -; X64-NEXT: cmpq %rbp, %rax -; X64-NEXT: sbbq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; X64-NEXT: cmovgeq %rax, %rbp -; X64-NEXT: movq %rbp, %xmm1 +; X64-NEXT: cmpq %r13, %rax +; X64-NEXT: sbbq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; X64-NEXT: cmovgeq %rax, %r13 +; X64-NEXT: movq %r13, %xmm1 ; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: psrlq $1, %xmm0 diff --git a/llvm/test/CodeGen/X86/setcc-non-simple-type.ll b/llvm/test/CodeGen/X86/setcc-non-simple-type.ll index 97c3c2040b291..a80d8d8cd01b8 100644 --- a/llvm/test/CodeGen/X86/setcc-non-simple-type.ll +++ b/llvm/test/CodeGen/X86/setcc-non-simple-type.ll @@ -46,7 +46,6 @@ define void @failing(ptr %0, ptr %1) nounwind { ; CHECK-NEXT: movq 24(%rsi), %rcx ; CHECK-NEXT: movq 32(%rsi), %rdx ; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0] -; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,1] ; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2,2] ; CHECK-NEXT: .p2align 4, 0x90 @@ -54,39 +53,45 @@ define void @failing(ptr %0, ptr %1) nounwind { ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_2 Depth 2 ; CHECK-NEXT: xorpd %xmm3, %xmm3 -; CHECK-NEXT: movq $-1024, %rdi # imm = 0xFC00 +; CHECK-NEXT: movq $-1024, %rsi # imm = 0xFC00 ; CHECK-NEXT: movdqa %xmm0, %xmm4 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_2: # %vector.body ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: cmpq 1024(%rdx,%rdi), %rsi -; CHECK-NEXT: movq %rcx, %r8 -; CHECK-NEXT: sbbq 1032(%rdx,%rdi), %r8 -; CHECK-NEXT: setge %r8b -; CHECK-NEXT: movzbl %r8b, %r8d -; CHECK-NEXT: andl $1, %r8d +; CHECK-NEXT: movdqu 1024(%rdx,%rsi), %xmm5 +; CHECK-NEXT: movdqu 1040(%rdx,%rsi), %xmm6 +; CHECK-NEXT: movq %xmm5, %rdi +; CHECK-NEXT: movq %xmm6, %r8 +; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3] +; CHECK-NEXT: movq %xmm5, %r9 +; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm6[2,3,2,3] +; CHECK-NEXT: movq %xmm5, %r10 ; CHECK-NEXT: negq %r8 -; CHECK-NEXT: movq %r8, %xmm5 -; CHECK-NEXT: cmpq 1040(%rdx,%rdi), %rsi ; CHECK-NEXT: movq %rcx, %r8 -; CHECK-NEXT: sbbq 1048(%rdx,%rdi), %r8 +; CHECK-NEXT: sbbq %r10, %r8 ; CHECK-NEXT: setge %r8b ; CHECK-NEXT: movzbl %r8b, %r8d -; CHECK-NEXT: andl $1, %r8d ; CHECK-NEXT: negq %r8 -; CHECK-NEXT: movq %r8, %xmm6 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0] -; CHECK-NEXT: movdqa %xmm1, %xmm6 -; CHECK-NEXT: psllq %xmm4, %xmm6 +; CHECK-NEXT: movq %r8, %xmm5 +; CHECK-NEXT: negq %rdi +; CHECK-NEXT: movq %rcx, %rdi +; CHECK-NEXT: sbbq %r9, %rdi +; CHECK-NEXT: setge %dil +; CHECK-NEXT: movzbl %dil, %edi +; CHECK-NEXT: negq %rdi +; CHECK-NEXT: movq %rdi, %xmm6 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm5[0] +; CHECK-NEXT: movdqa %xmm1, %xmm5 +; CHECK-NEXT: psllq %xmm4, %xmm5 ; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,2,3] ; CHECK-NEXT: movdqa %xmm1, %xmm8 ; CHECK-NEXT: psllq %xmm7, %xmm8 -; CHECK-NEXT: movsd {{.*#+}} xmm8 = xmm6[0],xmm8[1] -; CHECK-NEXT: andpd %xmm5, %xmm8 +; CHECK-NEXT: movsd {{.*#+}} xmm8 = xmm5[0],xmm8[1] +; CHECK-NEXT: andpd %xmm6, %xmm8 ; CHECK-NEXT: orpd %xmm8, %xmm3 ; CHECK-NEXT: paddq %xmm2, %xmm4 -; CHECK-NEXT: addq $32, %rdi +; CHECK-NEXT: addq $32, %rsi ; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: # %bb.3: # %middle.block ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 @@ -101,7 +106,6 @@ define void @failing(ptr %0, ptr %1) nounwind { ; CHECK-AVX2-NEXT: movq 24(%rsi), %rcx ; CHECK-AVX2-NEXT: movq 32(%rsi), %rdx ; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm0 = [0,1] -; CHECK-AVX2-NEXT: xorl %esi, %esi ; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [1,1] ; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [2,2] ; CHECK-AVX2-NEXT: .p2align 4, 0x90 @@ -109,34 +113,40 @@ define void @failing(ptr %0, ptr %1) nounwind { ; CHECK-AVX2-NEXT: # =>This Loop Header: Depth=1 ; CHECK-AVX2-NEXT: # Child Loop BB0_2 Depth 2 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; CHECK-AVX2-NEXT: movq $-1024, %rdi # imm = 0xFC00 +; CHECK-AVX2-NEXT: movq $-1024, %rsi # imm = 0xFC00 ; CHECK-AVX2-NEXT: vmovdqa %xmm0, %xmm4 ; CHECK-AVX2-NEXT: .p2align 4, 0x90 ; CHECK-AVX2-NEXT: .LBB0_2: # %vector.body ; CHECK-AVX2-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-AVX2-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-AVX2-NEXT: cmpq 1024(%rdx,%rdi), %rsi -; CHECK-AVX2-NEXT: movq %rcx, %r8 -; CHECK-AVX2-NEXT: sbbq 1032(%rdx,%rdi), %r8 +; CHECK-AVX2-NEXT: vmovdqu 1024(%rdx,%rsi), %xmm5 +; CHECK-AVX2-NEXT: vmovdqu 1040(%rdx,%rsi), %xmm6 +; CHECK-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm7 = xmm5[0],xmm6[0] +; CHECK-AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm5 = xmm5[1],xmm6[1] +; CHECK-AVX2-NEXT: vmovq %xmm5, %rdi +; CHECK-AVX2-NEXT: vpextrq $1, %xmm5, %r8 +; CHECK-AVX2-NEXT: vmovq %xmm7, %r9 +; CHECK-AVX2-NEXT: vpextrq $1, %xmm7, %r10 +; CHECK-AVX2-NEXT: negq %r10 +; CHECK-AVX2-NEXT: movq %rcx, %r10 +; CHECK-AVX2-NEXT: sbbq %r8, %r10 ; CHECK-AVX2-NEXT: setge %r8b ; CHECK-AVX2-NEXT: movzbl %r8b, %r8d -; CHECK-AVX2-NEXT: andl $1, %r8d ; CHECK-AVX2-NEXT: negq %r8 ; CHECK-AVX2-NEXT: vmovq %r8, %xmm5 -; CHECK-AVX2-NEXT: cmpq 1040(%rdx,%rdi), %rsi +; CHECK-AVX2-NEXT: negq %r9 ; CHECK-AVX2-NEXT: movq %rcx, %r8 -; CHECK-AVX2-NEXT: sbbq 1048(%rdx,%rdi), %r8 -; CHECK-AVX2-NEXT: setge %r8b -; CHECK-AVX2-NEXT: movzbl %r8b, %r8d -; CHECK-AVX2-NEXT: andl $1, %r8d -; CHECK-AVX2-NEXT: negq %r8 -; CHECK-AVX2-NEXT: vmovq %r8, %xmm6 -; CHECK-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0] +; CHECK-AVX2-NEXT: sbbq %rdi, %r8 +; CHECK-AVX2-NEXT: setge %dil +; CHECK-AVX2-NEXT: movzbl %dil, %edi +; CHECK-AVX2-NEXT: negq %rdi +; CHECK-AVX2-NEXT: vmovq %rdi, %xmm6 +; CHECK-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm6[0],xmm5[0] ; CHECK-AVX2-NEXT: vpsllvq %xmm4, %xmm1, %xmm6 ; CHECK-AVX2-NEXT: vpand %xmm6, %xmm5, %xmm5 ; CHECK-AVX2-NEXT: vpor %xmm3, %xmm5, %xmm3 ; CHECK-AVX2-NEXT: vpaddq %xmm2, %xmm4, %xmm4 -; CHECK-AVX2-NEXT: addq $32, %rdi +; CHECK-AVX2-NEXT: addq $32, %rsi ; CHECK-AVX2-NEXT: jne .LBB0_2 ; CHECK-AVX2-NEXT: # %bb.3: # %middle.block ; CHECK-AVX2-NEXT: # in Loop: Header=BB0_1 Depth=1 diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll index cee30f5fe5da9..460c5fe11f82a 100644 --- a/llvm/test/CodeGen/X86/vec_saddo.ll +++ b/llvm/test/CodeGen/X86/vec_saddo.ll @@ -1045,16 +1045,12 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind { ; ; AVX512-LABEL: saddo_v4i1: ; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vpslld $31, %xmm2, %xmm2 +; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k0 +; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0 -; AVX512-NEXT: vpslld $31, %xmm1, %xmm0 -; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1 -; AVX512-NEXT: kxorw %k1, %k0, %k2 -; AVX512-NEXT: kandw %k1, %k0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: kshiftlw $12, %k2, %k0 -; AVX512-NEXT: kshiftrw $12, %k0, %k0 +; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: movb %al, (%rdi) ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll index 64ed081048851..d06993da6365d 100644 --- a/llvm/test/CodeGen/X86/vec_ssubo.ll +++ b/llvm/test/CodeGen/X86/vec_ssubo.ll @@ -1062,16 +1062,12 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind { ; ; AVX512-LABEL: ssubo_v4i1: ; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vpslld $31, %xmm2, %xmm2 +; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k0 +; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0 -; AVX512-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX512-NEXT: vptestmd %xmm1, %xmm1, %k1 -; AVX512-NEXT: kxorw %k1, %k0, %k0 -; AVX512-NEXT: vptestnmd %xmm0, %xmm0, %k1 {%k1} -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: kshiftlw $12, %k0, %k0 -; AVX512-NEXT: kshiftrw $12, %k0, %k0 +; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: movb %al, (%rdi) ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll index 950e943bd9020..bac118095331c 100644 --- a/llvm/test/CodeGen/X86/vec_uaddo.ll +++ b/llvm/test/CodeGen/X86/vec_uaddo.ll @@ -1098,16 +1098,12 @@ define <4 x i32> @uaddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind { ; ; AVX512-LABEL: uaddo_v4i1: ; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vpslld $31, %xmm2, %xmm2 +; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k0 +; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0 -; AVX512-NEXT: vpslld $31, %xmm1, %xmm0 -; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k1 -; AVX512-NEXT: kxorw %k1, %k0, %k2 -; AVX512-NEXT: kandw %k1, %k0, %k1 -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: kshiftlw $12, %k2, %k0 -; AVX512-NEXT: kshiftrw $12, %k0, %k0 +; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: movb %al, (%rdi) ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll index 7de972770d8da..ab75ada72f256 100644 --- a/llvm/test/CodeGen/X86/vec_usubo.ll +++ b/llvm/test/CodeGen/X86/vec_usubo.ll @@ -1145,16 +1145,12 @@ define <4 x i32> @usubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind { ; ; AVX512-LABEL: usubo_v4i1: ; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vpslld $31, %xmm2, %xmm2 +; AVX512-NEXT: vptestmd %xmm2, %xmm2, %k0 +; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512-NEXT: vptestmd %xmm0, %xmm0, %k0 -; AVX512-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX512-NEXT: vptestmd %xmm1, %xmm1, %k1 -; AVX512-NEXT: kxorw %k1, %k0, %k0 -; AVX512-NEXT: vptestnmd %xmm0, %xmm0, %k1 {%k1} -; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: kshiftlw $12, %k0, %k0 -; AVX512-NEXT: kshiftrw $12, %k0, %k0 +; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: movb %al, (%rdi) ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-bo-select.ll b/llvm/test/CodeGen/X86/vector-bo-select.ll index 78797b9acc2e6..27aaad6353ed6 100644 --- a/llvm/test/CodeGen/X86/vector-bo-select.ll +++ b/llvm/test/CodeGen/X86/vector-bo-select.ll @@ -3137,11 +3137,11 @@ define <8 x i64> @mul_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef ; AVX512-LABEL: mul_v8i64_cast_cond: ; AVX512: # %bb.0: ; AVX512-NEXT: kmovw %edi, %k1 -; AVX512-NEXT: vpsrlq $32, %zmm1, %zmm2 -; AVX512-NEXT: vpmuludq %zmm2, %zmm0, %zmm2 -; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm3 -; AVX512-NEXT: vpmuludq %zmm1, %zmm3, %zmm3 -; AVX512-NEXT: vpaddq %zmm3, %zmm2, %zmm2 +; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm2 +; AVX512-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 +; AVX512-NEXT: vpsrlq $32, %zmm1, %zmm3 +; AVX512-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 +; AVX512-NEXT: vpaddq %zmm2, %zmm3, %zmm2 ; AVX512-NEXT: vpsllq $32, %zmm2, %zmm2 ; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm1 ; AVX512-NEXT: vpaddq %zmm2, %zmm1, %zmm0 {%k1} diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index b839452725a95..3aaa9268a8d88 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -58,12 +58,12 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt) ; SSE41-NEXT: psrlq %xmm4, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm1[4,5,6,7] ; SSE41-NEXT: pandn %xmm3, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] ; SSE41-NEXT: paddq %xmm0, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: psllq %xmm1, %xmm3 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psllq %xmm2, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] ; SSE41-NEXT: psllq %xmm2, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; SSE41-NEXT: por %xmm5, %xmm0 ; SSE41-NEXT: retq ; @@ -76,11 +76,11 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt) ; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] ; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm3 +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] ; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; @@ -158,13 +158,13 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt) ; XOPAVX1-LABEL: var_funnnel_v2i64: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [63,63] -; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 -; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; XOPAVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm4 -; XOPAVX1-NEXT: vpshlq %xmm4, %xmm1, %xmm1 -; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpshlq %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshlq %xmm4, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 +; XOPAVX1-NEXT: vpshlq %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; @@ -366,13 +366,13 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %amt) ; XOPAVX1-LABEL: var_funnnel_v4i32: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] -; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 -; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; XOPAVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm4 -; XOPAVX1-NEXT: vpshld %xmm4, %xmm1, %xmm1 -; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpshld %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2 +; XOPAVX1-NEXT: vpshld %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; @@ -646,26 +646,26 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %amt) ; XOPAVX1-LABEL: var_funnnel_v8i16: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] -; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 -; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; XOPAVX1-NEXT: vpsubw %xmm4, %xmm5, %xmm4 -; XOPAVX1-NEXT: vpshlw %xmm4, %xmm1, %xmm1 -; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpshlw %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshlw %xmm4, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2 +; XOPAVX1-NEXT: vpshlw %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: var_funnnel_v8i16: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] -; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 -; XOPAVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; XOPAVX2-NEXT: vpsubw %xmm4, %xmm5, %xmm4 -; XOPAVX2-NEXT: vpshlw %xmm4, %xmm1, %xmm1 -; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpaddw %xmm0, %xmm0, %xmm0 -; XOPAVX2-NEXT: vpshlw %xmm2, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpshlw %xmm4, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; XOPAVX2-NEXT: vpsubw %xmm2, %xmm3, %xmm2 +; XOPAVX2-NEXT: vpshlw %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq ; @@ -995,26 +995,26 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt) ; XOPAVX1-LABEL: var_funnnel_v16i8: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 -; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; XOPAVX1-NEXT: vpsubb %xmm4, %xmm5, %xmm4 -; XOPAVX1-NEXT: vpshlb %xmm4, %xmm1, %xmm1 -; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshlb %xmm4, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2 +; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: var_funnnel_v16i8: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpbroadcastb {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 -; XOPAVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; XOPAVX2-NEXT: vpsubb %xmm4, %xmm5, %xmm4 -; XOPAVX2-NEXT: vpshlb %xmm4, %xmm1, %xmm1 -; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpaddb %xmm0, %xmm0, %xmm0 -; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpshlb %xmm4, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2 +; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2 +; XOPAVX2-NEXT: vpshlb %xmm2, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index 7b6b0ea83c7ee..fc65f759f5fbe 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -486,22 +486,22 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> % ; XOPAVX2-LABEL: var_funnnel_v16i16: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 +; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4 ; XOPAVX2-NEXT: vextracti128 $1, %ymm4, %xmm5 -; XOPAVX2-NEXT: vpxor %xmm6, %xmm6, %xmm6 -; XOPAVX2-NEXT: vpsubw %xmm5, %xmm6, %xmm5 -; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm7 -; XOPAVX2-NEXT: vpshlw %xmm5, %xmm7, %xmm5 -; XOPAVX2-NEXT: vpsubw %xmm4, %xmm6, %xmm4 -; XOPAVX2-NEXT: vpshlw %xmm4, %xmm1, %xmm1 -; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1 -; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 ; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 -; XOPAVX2-NEXT: vpshlw %xmm3, %xmm4, %xmm3 -; XOPAVX2-NEXT: vpshlw %xmm2, %xmm0, %xmm0 -; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm6 +; XOPAVX2-NEXT: vpshlw %xmm5, %xmm6, %xmm5 +; XOPAVX2-NEXT: vpshlw %xmm4, %xmm0, %xmm0 +; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; XOPAVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; XOPAVX2-NEXT: vpsubw %xmm3, %xmm4, %xmm3 +; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm5 +; XOPAVX2-NEXT: vpshlw %xmm3, %xmm5, %xmm3 +; XOPAVX2-NEXT: vpsubw %xmm2, %xmm4, %xmm2 +; XOPAVX2-NEXT: vpshlw %xmm2, %xmm1, %xmm1 +; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) diff --git a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll index 0426c48aecfcf..a6067a960fc0d 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll @@ -185,13 +185,13 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) ; XOPAVX1-LABEL: var_funnnel_v2i32: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31] -; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 -; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; XOPAVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm4 -; XOPAVX1-NEXT: vpshld %xmm4, %xmm1, %xmm1 -; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 ; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpshld %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2 +; XOPAVX1-NEXT: vpshld %xmm2, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll index c54da38ef10cc..75baba5f35f79 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -927,9 +927,9 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind { ; SSE2-LABEL: constant_shift_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psllq $7, %xmm1 -; SSE2-NEXT: paddq %xmm0, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; SSE2-NEXT: paddq %xmm0, %xmm1 +; SSE2-NEXT: psllq $7, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE41-LABEL: constant_shift_v2i64: @@ -975,9 +975,9 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind { ; X86-SSE-LABEL: constant_shift_v2i64: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE-NEXT: psllq $7, %xmm1 -; X86-SSE-NEXT: paddq %xmm0, %xmm0 -; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; X86-SSE-NEXT: paddq %xmm0, %xmm1 +; X86-SSE-NEXT: psllq $7, %xmm0 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; X86-SSE-NEXT: retl %shift = shl <2 x i64> %a, ret <2 x i64> %shift