diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8827bff111c22..e7e761a5eb8e6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7615,6 +7615,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, N1.getValueType() == VT && "Binary operator types must match!"); if (VT.getScalarType() == MVT::i1) return getNode(ISD::AND, DL, VT, N1, N2); + if (N2CV && N2CV->isZero()) + return N2; if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { const APInt &MulImm = N1->getConstantOperandAPInt(0); const APInt &N2CImm = N2C->getAPIntValue(); diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll index cca190f08df2b..557627bf8eaf1 100644 --- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll +++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll @@ -1674,8 +1674,9 @@ define i32 @combine_i32_sdiv_const100(i32 %x) { ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: smull x8, w0, w8 +; CHECK-SD-NEXT: lsr x9, x8, #63 ; CHECK-SD-NEXT: asr x8, x8, #37 -; CHECK-SD-NEXT: add w0, w8, w8, lsr #31 +; CHECK-SD-NEXT: add w0, w8, w9 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i32_sdiv_const100: diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll index 87b11086e28d5..d988afd24d15a 100644 --- a/llvm/test/CodeGen/AArch64/rem-by-const.ll +++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll @@ -279,11 +279,12 @@ define i32 @si32_100(i32 %a, i32 %b) { ; CHECK-SD-LABEL: si32_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f -; CHECK-SD-NEXT: mov w9, #100 // =0x64 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: smull x8, w0, w8 +; CHECK-SD-NEXT: lsr x9, x8, #63 ; CHECK-SD-NEXT: asr x8, x8, #37 -; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 +; CHECK-SD-NEXT: add w8, w8, w9 +; CHECK-SD-NEXT: mov w9, #100 // =0x64 ; CHECK-SD-NEXT: msub w0, w8, w9, w0 ; CHECK-SD-NEXT: ret ; @@ -723,17 +724,19 @@ entry: define <2 x i8> @sv2i8_100(<2 x i8> %d, <2 x i8> %e) { ; CHECK-SD-LABEL: sv2i8_100: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f -; CHECK-SD-NEXT: movi v2.2s, #100 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-SD-NEXT: movi v3.2s, #100 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #63 ; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37 +; CHECK-SD-NEXT: xtn v2.2s, v2.2d ; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31 -; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s +; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s +; CHECK-SD-NEXT: mls v0.2s, v1.2s, v3.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i8_100: @@ -856,22 +859,25 @@ define <3 x i8> @sv3i8_100(<3 x i8> %d, <3 x i8> %e) { ; CHECK-SD-NEXT: sxtb x10, w1 ; CHECK-SD-NEXT: movk w9, #20971, lsl #16 ; CHECK-SD-NEXT: sxtb x11, w2 -; CHECK-SD-NEXT: sxtb w12, w0 +; CHECK-SD-NEXT: mov w12, #100 // =0x64 ; CHECK-SD-NEXT: smull x8, w8, w9 ; CHECK-SD-NEXT: smull x10, w10, w9 ; CHECK-SD-NEXT: smull x9, w11, w9 -; CHECK-SD-NEXT: mov w11, #100 // =0x64 +; CHECK-SD-NEXT: lsr x11, x8, #63 ; CHECK-SD-NEXT: asr x8, x8, #37 +; CHECK-SD-NEXT: lsr x13, x10, #63 ; CHECK-SD-NEXT: asr x10, x10, #37 +; CHECK-SD-NEXT: add w8, w8, w11 +; CHECK-SD-NEXT: lsr x11, x9, #63 ; CHECK-SD-NEXT: asr x9, x9, #37 -; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 -; CHECK-SD-NEXT: add w10, w10, w10, lsr #31 -; CHECK-SD-NEXT: add w9, w9, w9, lsr #31 -; CHECK-SD-NEXT: msub w0, w8, w11, w12 +; CHECK-SD-NEXT: add w10, w10, w13 +; CHECK-SD-NEXT: sxtb w13, w0 +; CHECK-SD-NEXT: msub w0, w8, w12, w13 ; CHECK-SD-NEXT: sxtb w8, w1 -; CHECK-SD-NEXT: msub w1, w10, w11, w8 +; CHECK-SD-NEXT: add w9, w9, w11 +; CHECK-SD-NEXT: msub w1, w10, w12, w8 ; CHECK-SD-NEXT: sxtb w8, w2 -; CHECK-SD-NEXT: msub w2, w9, w11, w8 +; CHECK-SD-NEXT: msub w2, w9, w12, w8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv3i8_100: @@ -989,33 +995,37 @@ define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) { ; CHECK-SD-NEXT: mov w14, #100 // =0x64 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: sshr v1.4h, v0.4h, #8 -; CHECK-SD-NEXT: smov x9, v1.h[0] -; CHECK-SD-NEXT: smov x10, v1.h[1] +; CHECK-SD-NEXT: smov x10, v1.h[0] +; CHECK-SD-NEXT: smov x9, v1.h[1] ; CHECK-SD-NEXT: smov x11, v1.h[2] -; CHECK-SD-NEXT: smov w12, v1.h[0] -; CHECK-SD-NEXT: smov x13, v1.h[3] +; CHECK-SD-NEXT: smov w16, v1.h[0] ; CHECK-SD-NEXT: smov w15, v1.h[1] -; CHECK-SD-NEXT: smull x9, w9, w8 ; CHECK-SD-NEXT: smull x10, w10, w8 +; CHECK-SD-NEXT: smull x9, w9, w8 ; CHECK-SD-NEXT: smull x11, w11, w8 -; CHECK-SD-NEXT: asr x9, x9, #37 -; CHECK-SD-NEXT: smull x8, w13, w8 +; CHECK-SD-NEXT: lsr x13, x10, #63 ; CHECK-SD-NEXT: asr x10, x10, #37 -; CHECK-SD-NEXT: add w9, w9, w9, lsr #31 +; CHECK-SD-NEXT: lsr x12, x9, #63 +; CHECK-SD-NEXT: asr x9, x9, #37 +; CHECK-SD-NEXT: add w10, w10, w13 +; CHECK-SD-NEXT: smov x13, v1.h[3] +; CHECK-SD-NEXT: msub w10, w10, w14, w16 +; CHECK-SD-NEXT: add w9, w9, w12 +; CHECK-SD-NEXT: lsr x12, x11, #63 +; CHECK-SD-NEXT: msub w9, w9, w14, w15 ; CHECK-SD-NEXT: asr x11, x11, #37 -; CHECK-SD-NEXT: add w10, w10, w10, lsr #31 +; CHECK-SD-NEXT: fmov s0, w10 +; CHECK-SD-NEXT: smull x8, w13, w8 +; CHECK-SD-NEXT: smov w10, v1.h[2] +; CHECK-SD-NEXT: mov v0.h[1], w9 +; CHECK-SD-NEXT: add w9, w11, w12 +; CHECK-SD-NEXT: smov w11, v1.h[3] +; CHECK-SD-NEXT: msub w9, w9, w14, w10 +; CHECK-SD-NEXT: lsr x10, x8, #63 ; CHECK-SD-NEXT: asr x8, x8, #37 -; CHECK-SD-NEXT: msub w9, w9, w14, w12 -; CHECK-SD-NEXT: msub w10, w10, w14, w15 -; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 -; CHECK-SD-NEXT: fmov s0, w9 -; CHECK-SD-NEXT: add w9, w11, w11, lsr #31 -; CHECK-SD-NEXT: smov w11, v1.h[2] -; CHECK-SD-NEXT: msub w9, w9, w14, w11 -; CHECK-SD-NEXT: mov v0.h[1], w10 -; CHECK-SD-NEXT: smov w10, v1.h[3] -; CHECK-SD-NEXT: msub w8, w8, w14, w10 +; CHECK-SD-NEXT: add w8, w8, w10 ; CHECK-SD-NEXT: mov v0.h[2], w9 +; CHECK-SD-NEXT: msub w8, w8, w14, w11 ; CHECK-SD-NEXT: mov v0.h[3], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret @@ -1716,17 +1726,19 @@ entry: define <2 x i16> @sv2i16_100(<2 x i16> %d, <2 x i16> %e) { ; CHECK-SD-LABEL: sv2i16_100: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f -; CHECK-SD-NEXT: movi v2.2s, #100 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: movi v3.2s, #100 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #16 ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #63 ; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37 +; CHECK-SD-NEXT: xtn v2.2s, v2.2d ; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31 -; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s +; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s +; CHECK-SD-NEXT: mls v0.2s, v1.2s, v3.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i16_100: @@ -1839,23 +1851,26 @@ define <3 x i16> @sv3i16_100(<3 x i16> %d, <3 x i16> %e) { ; CHECK-SD-NEXT: smov x10, v0.h[1] ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: smov x11, v0.h[2] -; CHECK-SD-NEXT: mov w12, #100 // =0x64 -; CHECK-SD-NEXT: smov w13, v0.h[1] +; CHECK-SD-NEXT: smov w13, v0.h[0] +; CHECK-SD-NEXT: mov w14, #100 // =0x64 ; CHECK-SD-NEXT: smull x9, w9, w8 ; CHECK-SD-NEXT: smull x10, w10, w8 ; CHECK-SD-NEXT: smull x8, w11, w8 -; CHECK-SD-NEXT: smov w11, v0.h[0] +; CHECK-SD-NEXT: lsr x11, x9, #63 ; CHECK-SD-NEXT: asr x9, x9, #37 +; CHECK-SD-NEXT: lsr x12, x10, #63 ; CHECK-SD-NEXT: asr x10, x10, #37 -; CHECK-SD-NEXT: add w9, w9, w9, lsr #31 +; CHECK-SD-NEXT: add w9, w9, w11 +; CHECK-SD-NEXT: smov w11, v0.h[1] +; CHECK-SD-NEXT: msub w9, w9, w14, w13 +; CHECK-SD-NEXT: add w10, w10, w12 +; CHECK-SD-NEXT: smov w12, v0.h[2] +; CHECK-SD-NEXT: msub w10, w10, w14, w11 +; CHECK-SD-NEXT: lsr x11, x8, #63 ; CHECK-SD-NEXT: asr x8, x8, #37 -; CHECK-SD-NEXT: add w10, w10, w10, lsr #31 -; CHECK-SD-NEXT: msub w9, w9, w12, w11 -; CHECK-SD-NEXT: smov w11, v0.h[2] -; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 -; CHECK-SD-NEXT: msub w10, w10, w12, w13 -; CHECK-SD-NEXT: msub w8, w8, w12, w11 ; CHECK-SD-NEXT: fmov s0, w9 +; CHECK-SD-NEXT: add w8, w8, w11 +; CHECK-SD-NEXT: msub w8, w8, w14, w12 ; CHECK-SD-NEXT: mov v0.h[1], w10 ; CHECK-SD-NEXT: mov v0.h[2], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -2407,14 +2422,16 @@ define <2 x i32> @sv2i32_100(<2 x i32> %d, <2 x i32> %e) { ; CHECK-SD-LABEL: sv2i32_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f -; CHECK-SD-NEXT: movi v2.2s, #100 +; CHECK-SD-NEXT: movi v3.2s, #100 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #63 ; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37 +; CHECK-SD-NEXT: xtn v2.2s, v2.2d ; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31 -; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s +; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s +; CHECK-SD-NEXT: mls v0.2s, v1.2s, v3.2s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv2i32_100: @@ -2492,19 +2509,22 @@ define <3 x i32> @sv3i32_100(<3 x i32> %d, <3 x i32> %e) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: mov w9, v0.s[2] -; CHECK-SD-NEXT: movi v2.2s, #100 +; CHECK-SD-NEXT: movi v3.2s, #100 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 -; CHECK-SD-NEXT: mov w10, #100 // =0x64 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: smull x8, w9, w8 ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s +; CHECK-SD-NEXT: lsr x10, x8, #63 ; CHECK-SD-NEXT: asr x8, x8, #37 -; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 +; CHECK-SD-NEXT: add w8, w8, w10 +; CHECK-SD-NEXT: mov w10, #100 // =0x64 +; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #63 ; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37 ; CHECK-SD-NEXT: msub w8, w8, w10, w9 +; CHECK-SD-NEXT: xtn v2.2s, v2.2d ; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31 -; CHECK-SD-NEXT: mls v0.2s, v1.2s, v2.2s +; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s +; CHECK-SD-NEXT: mls v0.2s, v1.2s, v3.2s ; CHECK-SD-NEXT: mov v0.s[2], w8 ; CHECK-SD-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/srem-lkk.ll b/llvm/test/CodeGen/AArch64/srem-lkk.ll index 1223ae3a15e7b..d9f91449dffb8 100644 --- a/llvm/test/CodeGen/AArch64/srem-lkk.ll +++ b/llvm/test/CodeGen/AArch64/srem-lkk.ll @@ -23,11 +23,12 @@ define i32 @fold_srem_positive_even(i32 %x) { ; CHECK-LABEL: fold_srem_positive_even: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #36849 // =0x8ff1 -; CHECK-NEXT: mov w9, #1060 // =0x424 ; CHECK-NEXT: movk w8, #15827, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 +; CHECK-NEXT: lsr x9, x8, #63 ; CHECK-NEXT: asr x8, x8, #40 -; CHECK-NEXT: add w8, w8, w8, lsr #31 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: mov w9, #1060 // =0x424 ; CHECK-NEXT: msub w0, w8, w9, w0 ; CHECK-NEXT: ret %1 = srem i32 %x, 1060 @@ -39,11 +40,12 @@ define i32 @fold_srem_negative_odd(i32 %x) { ; CHECK-LABEL: fold_srem_negative_odd: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #65445 // =0xffa5 -; CHECK-NEXT: mov w9, #-723 // =0xfffffd2d ; CHECK-NEXT: movk w8, #42330, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 +; CHECK-NEXT: lsr x9, x8, #63 ; CHECK-NEXT: asr x8, x8, #40 -; CHECK-NEXT: add w8, w8, w8, lsr #31 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: mov w9, #-723 // =0xfffffd2d ; CHECK-NEXT: msub w0, w8, w9, w0 ; CHECK-NEXT: ret %1 = srem i32 %x, -723 @@ -55,11 +57,12 @@ define i32 @fold_srem_negative_even(i32 %x) { ; CHECK-LABEL: fold_srem_negative_even: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #62439 // =0xf3e7 -; CHECK-NEXT: mov w9, #-22981 // =0xffffa63b ; CHECK-NEXT: movk w8, #64805, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 +; CHECK-NEXT: lsr x9, x8, #63 ; CHECK-NEXT: asr x8, x8, #40 -; CHECK-NEXT: add w8, w8, w8, lsr #31 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: mov w9, #-22981 // =0xffffa63b ; CHECK-NEXT: msub w0, w8, w9, w0 ; CHECK-NEXT: ret %1 = srem i32 %x, -22981 diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll index b165ac0d56d20..a74f0c86fe185 100644 --- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -263,14 +263,16 @@ define <2 x i32> @fold_srem_v2i32(<2 x i32> %x) { ; CHECK-LABEL: fold_srem_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #26215 // =0x6667 -; CHECK-NEXT: movi v2.2s, #10 +; CHECK-NEXT: movi v3.2s, #10 ; CHECK-NEXT: movk w8, #26214, lsl #16 ; CHECK-NEXT: dup v1.2s, w8 ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: ushr v2.2d, v1.2d, #63 ; CHECK-NEXT: sshr v1.2d, v1.2d, #34 +; CHECK-NEXT: xtn v2.2s, v2.2d ; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: usra v1.2s, v1.2s, #31 -; CHECK-NEXT: mls v0.2s, v1.2s, v2.2s +; CHECK-NEXT: add v1.2s, v1.2s, v2.2s +; CHECK-NEXT: mls v0.2s, v1.2s, v3.2s ; CHECK-NEXT: ret %1 = srem <2 x i32> %x, ret <2 x i32> %1 diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index 4533e14c672e7..d691b1c278a48 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -1829,67 +1829,53 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: srai s4, a3, 31 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3 ; RV32I-NEXT: mv s5, a1 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3 ; RV32I-NEXT: add s5, a0, s5 ; RV32I-NEXT: sltu a0, s5, a0 -; RV32I-NEXT: add s7, a1, a0 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: add s6, a1, a0 +; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3 ; RV32I-NEXT: add s5, a0, s5 ; RV32I-NEXT: sltu a0, s5, a0 ; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: add s8, s7, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: add s5, s6, a0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3 -; RV32I-NEXT: mv s5, a0 -; RV32I-NEXT: mv s6, a1 -; RV32I-NEXT: add s9, a0, s8 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: li a2, 0 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: call __muldi3 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: add s7, a0, s5 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s4 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: call __muldi3 -; RV32I-NEXT: add s2, a0, s2 -; RV32I-NEXT: sltu a3, s9, s5 -; RV32I-NEXT: sltu a4, s8, s7 -; RV32I-NEXT: add a1, a1, s3 -; RV32I-NEXT: add a2, s9, s2 -; RV32I-NEXT: add a4, s6, a4 -; RV32I-NEXT: sltu a0, s2, a0 -; RV32I-NEXT: sltu a5, a2, s9 -; RV32I-NEXT: add a3, a4, a3 -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: add a0, a3, a0 -; RV32I-NEXT: add a1, a0, a5 -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: add a0, s7, a0 +; RV32I-NEXT: sltu a2, s7, s0 +; RV32I-NEXT: sltu a3, s5, s6 +; RV32I-NEXT: sltu a4, a0, s7 +; RV32I-NEXT: add a3, s1, a3 +; RV32I-NEXT: add a2, a3, a2 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: add a1, a1, a4 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1899,8 +1885,6 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll index 9b5fa1c2bc811..94080c02ded80 100644 --- a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll @@ -10,11 +10,11 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV6-NEXT: sub sp, #60 ; THUMBV6-NEXT: mov r6, r3 ; THUMBV6-NEXT: mov r1, r2 -; THUMBV6-NEXT: str r2, [sp, #52] @ 4-byte Spill +; THUMBV6-NEXT: str r2, [sp, #36] @ 4-byte Spill ; THUMBV6-NEXT: mov r4, r0 -; THUMBV6-NEXT: str r0, [sp, #40] @ 4-byte Spill +; THUMBV6-NEXT: str r0, [sp, #48] @ 4-byte Spill ; THUMBV6-NEXT: ldr r2, [sp, #88] -; THUMBV6-NEXT: str r2, [sp, #48] @ 4-byte Spill +; THUMBV6-NEXT: str r2, [sp, #56] @ 4-byte Spill ; THUMBV6-NEXT: movs r5, #0 ; THUMBV6-NEXT: mov r0, r1 ; THUMBV6-NEXT: mov r1, r5 @@ -23,21 +23,21 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV6-NEXT: str r1, [sp, #28] @ 4-byte Spill ; THUMBV6-NEXT: str r0, [r4] ; THUMBV6-NEXT: ldr r2, [sp, #96] -; THUMBV6-NEXT: str r2, [sp, #36] @ 4-byte Spill +; THUMBV6-NEXT: str r2, [sp, #40] @ 4-byte Spill ; THUMBV6-NEXT: mov r4, r6 -; THUMBV6-NEXT: str r6, [sp, #56] @ 4-byte Spill +; THUMBV6-NEXT: str r6, [sp, #44] @ 4-byte Spill ; THUMBV6-NEXT: mov r0, r6 ; THUMBV6-NEXT: mov r1, r5 ; THUMBV6-NEXT: mov r3, r5 ; THUMBV6-NEXT: bl __aeabi_lmul -; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill +; THUMBV6-NEXT: str r0, [sp, #52] @ 4-byte Spill ; THUMBV6-NEXT: mov r7, r1 ; THUMBV6-NEXT: subs r0, r1, #1 ; THUMBV6-NEXT: sbcs r7, r0 ; THUMBV6-NEXT: ldr r0, [sp, #100] ; THUMBV6-NEXT: str r0, [sp, #32] @ 4-byte Spill ; THUMBV6-NEXT: mov r1, r5 -; THUMBV6-NEXT: ldr r6, [sp, #52] @ 4-byte Reload +; THUMBV6-NEXT: ldr r6, [sp, #36] @ 4-byte Reload ; THUMBV6-NEXT: mov r2, r6 ; THUMBV6-NEXT: mov r3, r5 ; THUMBV6-NEXT: bl __aeabi_lmul @@ -53,10 +53,10 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV6-NEXT: ands r4, r3 ; THUMBV6-NEXT: orrs r4, r1 ; THUMBV6-NEXT: orrs r4, r7 -; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; THUMBV6-NEXT: ldr r0, [sp, #52] @ 4-byte Reload ; THUMBV6-NEXT: ldr r1, [sp, #24] @ 4-byte Reload ; THUMBV6-NEXT: adds r7, r1, r0 -; THUMBV6-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; THUMBV6-NEXT: ldr r0, [sp, #40] @ 4-byte Reload ; THUMBV6-NEXT: mov r1, r5 ; THUMBV6-NEXT: mov r2, r6 ; THUMBV6-NEXT: mov r3, r5 @@ -69,7 +69,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV6-NEXT: orrs r0, r4 ; THUMBV6-NEXT: str r0, [sp, #16] @ 4-byte Spill ; THUMBV6-NEXT: ldr r0, [sp, #92] -; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill +; THUMBV6-NEXT: str r0, [sp, #52] @ 4-byte Spill ; THUMBV6-NEXT: ldr r7, [sp, #80] ; THUMBV6-NEXT: mov r1, r5 ; THUMBV6-NEXT: mov r2, r7 @@ -82,13 +82,13 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV6-NEXT: ldr r6, [sp, #84] ; THUMBV6-NEXT: mov r0, r6 ; THUMBV6-NEXT: mov r1, r5 -; THUMBV6-NEXT: ldr r2, [sp, #48] @ 4-byte Reload +; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload ; THUMBV6-NEXT: mov r3, r5 ; THUMBV6-NEXT: bl __aeabi_lmul ; THUMBV6-NEXT: str r0, [sp, #4] @ 4-byte Spill ; THUMBV6-NEXT: subs r2, r1, #1 ; THUMBV6-NEXT: sbcs r1, r2 -; THUMBV6-NEXT: ldr r3, [sp, #44] @ 4-byte Reload +; THUMBV6-NEXT: ldr r3, [sp, #52] @ 4-byte Reload ; THUMBV6-NEXT: subs r2, r3, #1 ; THUMBV6-NEXT: sbcs r3, r2 ; THUMBV6-NEXT: str r6, [sp, #8] @ 4-byte Spill @@ -99,21 +99,17 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV6-NEXT: orrs r6, r4 ; THUMBV6-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; THUMBV6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; THUMBV6-NEXT: adds r0, r1, r0 -; THUMBV6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; THUMBV6-NEXT: adds r4, r1, r0 ; THUMBV6-NEXT: mov r0, r7 ; THUMBV6-NEXT: mov r1, r5 -; THUMBV6-NEXT: ldr r4, [sp, #48] @ 4-byte Reload -; THUMBV6-NEXT: mov r2, r4 +; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload ; THUMBV6-NEXT: mov r3, r5 ; THUMBV6-NEXT: bl __aeabi_lmul -; THUMBV6-NEXT: str r0, [sp, #12] @ 4-byte Spill -; THUMBV6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; THUMBV6-NEXT: adds r0, r1, r0 +; THUMBV6-NEXT: adds r4, r1, r4 ; THUMBV6-NEXT: mov r1, r5 ; THUMBV6-NEXT: adcs r1, r5 ; THUMBV6-NEXT: orrs r1, r6 -; THUMBV6-NEXT: ldr r3, [sp, #36] @ 4-byte Reload +; THUMBV6-NEXT: ldr r3, [sp, #40] @ 4-byte Reload ; THUMBV6-NEXT: ldr r2, [sp, #32] @ 4-byte Reload ; THUMBV6-NEXT: orrs r3, r2 ; THUMBV6-NEXT: subs r2, r3, #1 @@ -127,68 +123,44 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV6-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; THUMBV6-NEXT: orrs r7, r1 ; THUMBV6-NEXT: ldr r1, [sp, #24] @ 4-byte Reload -; THUMBV6-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; THUMBV6-NEXT: adds r1, r2, r1 -; THUMBV6-NEXT: str r1, [sp, #32] @ 4-byte Spill -; THUMBV6-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; THUMBV6-NEXT: adcs r0, r1 -; THUMBV6-NEXT: str r0, [sp, #36] @ 4-byte Spill -; THUMBV6-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; THUMBV6-NEXT: adds r0, r0, r1 +; THUMBV6-NEXT: str r0, [sp, #32] @ 4-byte Spill +; THUMBV6-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; THUMBV6-NEXT: adcs r4, r0 +; THUMBV6-NEXT: str r4, [sp, #40] @ 4-byte Spill +; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload ; THUMBV6-NEXT: mov r1, r5 -; THUMBV6-NEXT: mov r2, r4 +; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload ; THUMBV6-NEXT: mov r3, r5 ; THUMBV6-NEXT: bl __aeabi_lmul ; THUMBV6-NEXT: mov r4, r1 ; THUMBV6-NEXT: ldr r1, [sp, #28] @ 4-byte Reload ; THUMBV6-NEXT: adds r6, r0, r1 ; THUMBV6-NEXT: adcs r4, r5 -; THUMBV6-NEXT: ldr r0, [sp, #52] @ 4-byte Reload +; THUMBV6-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; THUMBV6-NEXT: mov r1, r5 -; THUMBV6-NEXT: ldr r2, [sp, #44] @ 4-byte Reload +; THUMBV6-NEXT: ldr r2, [sp, #52] @ 4-byte Reload ; THUMBV6-NEXT: mov r3, r5 ; THUMBV6-NEXT: bl __aeabi_lmul ; THUMBV6-NEXT: adds r0, r0, r6 -; THUMBV6-NEXT: ldr r2, [sp, #40] @ 4-byte Reload +; THUMBV6-NEXT: ldr r2, [sp, #48] @ 4-byte Reload ; THUMBV6-NEXT: str r0, [r2, #4] ; THUMBV6-NEXT: adcs r1, r5 -; THUMBV6-NEXT: adds r0, r4, r1 -; THUMBV6-NEXT: str r0, [sp, #28] @ 4-byte Spill +; THUMBV6-NEXT: adds r4, r4, r1 ; THUMBV6-NEXT: mov r6, r5 ; THUMBV6-NEXT: adcs r6, r5 -; THUMBV6-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload ; THUMBV6-NEXT: mov r1, r5 -; THUMBV6-NEXT: ldr r4, [sp, #44] @ 4-byte Reload -; THUMBV6-NEXT: mov r2, r4 +; THUMBV6-NEXT: ldr r2, [sp, #52] @ 4-byte Reload ; THUMBV6-NEXT: mov r3, r5 ; THUMBV6-NEXT: bl __aeabi_lmul -; THUMBV6-NEXT: ldr r2, [sp, #28] @ 4-byte Reload -; THUMBV6-NEXT: adds r0, r0, r2 -; THUMBV6-NEXT: str r0, [sp, #28] @ 4-byte Spill +; THUMBV6-NEXT: adds r0, r0, r4 ; THUMBV6-NEXT: adcs r1, r6 -; THUMBV6-NEXT: str r1, [sp, #24] @ 4-byte Spill -; THUMBV6-NEXT: ldr r0, [sp, #48] @ 4-byte Reload -; THUMBV6-NEXT: mov r1, r4 -; THUMBV6-NEXT: mov r2, r5 -; THUMBV6-NEXT: mov r3, r5 -; THUMBV6-NEXT: bl __aeabi_lmul -; THUMBV6-NEXT: mov r6, r0 -; THUMBV6-NEXT: mov r4, r1 -; THUMBV6-NEXT: ldr r0, [sp, #52] @ 4-byte Reload -; THUMBV6-NEXT: ldr r1, [sp, #56] @ 4-byte Reload -; THUMBV6-NEXT: mov r2, r5 -; THUMBV6-NEXT: mov r3, r5 -; THUMBV6-NEXT: bl __aeabi_lmul -; THUMBV6-NEXT: adds r0, r0, r6 -; THUMBV6-NEXT: adcs r1, r4 -; THUMBV6-NEXT: ldr r2, [sp, #28] @ 4-byte Reload -; THUMBV6-NEXT: adds r0, r2, r0 -; THUMBV6-NEXT: ldr r2, [sp, #24] @ 4-byte Reload -; THUMBV6-NEXT: adcs r1, r2 ; THUMBV6-NEXT: ldr r2, [sp, #32] @ 4-byte Reload ; THUMBV6-NEXT: adds r0, r0, r2 -; THUMBV6-NEXT: ldr r2, [sp, #40] @ 4-byte Reload +; THUMBV6-NEXT: ldr r2, [sp, #48] @ 4-byte Reload ; THUMBV6-NEXT: str r0, [r2, #8] -; THUMBV6-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; THUMBV6-NEXT: ldr r0, [sp, #40] @ 4-byte Reload ; THUMBV6-NEXT: adcs r1, r0 ; THUMBV6-NEXT: str r1, [r2, #12] ; THUMBV6-NEXT: adcs r5, r5