diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 50a3a4ab8d8b6..0a37b592c3eb3 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2752,6 +2752,14 @@ def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; } +// Patterns for (add X, trunc(shift(Y))), for which we can generate 64bit instructions. +def : Pat<(add GPR32:$Rn, (trunc arith_shifted_reg64:$Rm)), + (EXTRACT_SUBREG (ADDXrs (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Rn, sub_32), + arith_shifted_reg64:$Rm), sub_32)>; +def : Pat<(sub GPR32:$Rn, (trunc arith_shifted_reg64:$Rm)), + (EXTRACT_SUBREG (SUBXrs (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Rn, sub_32), + arith_shifted_reg64:$Rm), sub_32)>; + def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, (arith_shifted_reg32 GPR32:$src, 0)), 3>; diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll index cca190f08df2b..e6562e303121b 100644 --- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll +++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll @@ -1493,13 +1493,12 @@ define i5 @combine_i5_sdiv_const7(i5 %x) { ; CHECK-SD-LABEL: combine_i5_sdiv_const7: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493 -; CHECK-SD-NEXT: sbfx x9, x0, #0, #5 -; CHECK-SD-NEXT: movk x8, #37449, lsl #16 -; CHECK-SD-NEXT: smull x8, w9, w8 -; CHECK-SD-NEXT: lsl w9, w0, #27 -; CHECK-SD-NEXT: lsr x8, x8, #32 -; CHECK-SD-NEXT: add w8, w8, w9, asr #27 +; CHECK-SD-NEXT: sbfx x8, x0, #0, #5 +; CHECK-SD-NEXT: mov x9, #-56173 // =0xffffffffffff2493 +; CHECK-SD-NEXT: movk x9, #37449, lsl #16 +; CHECK-SD-NEXT: smull x8, w8, w9 +; CHECK-SD-NEXT: sbfx w9, w0, #0, #5 +; CHECK-SD-NEXT: add x8, x9, x8, lsr #32 ; CHECK-SD-NEXT: asr w9, w8, #2 ; CHECK-SD-NEXT: add w0, w9, w8, lsr #31 ; CHECK-SD-NEXT: ret @@ -1646,10 +1645,10 @@ define i32 @combine_i32_sdiv_const7(i32 %x) { ; CHECK-SD-LABEL: combine_i32_sdiv_const7: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov w8, #9363 // =0x2493 +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SD-NEXT: movk w8, #37449, lsl #16 ; CHECK-SD-NEXT: smull x8, w0, w8 -; CHECK-SD-NEXT: lsr x8, x8, #32 -; CHECK-SD-NEXT: add w8, w8, w0 +; CHECK-SD-NEXT: add x8, x0, x8, lsr #32 ; CHECK-SD-NEXT: asr w9, w8, #2 ; CHECK-SD-NEXT: add w0, w9, w8, lsr #31 ; CHECK-SD-NEXT: ret @@ -1657,10 +1656,10 @@ define i32 @combine_i32_sdiv_const7(i32 %x) { ; CHECK-GI-LABEL: combine_i32_sdiv_const7: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #9363 // =0x2493 +; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-GI-NEXT: movk w8, #37449, lsl #16 ; CHECK-GI-NEXT: smull x8, w0, w8 -; CHECK-GI-NEXT: asr x8, x8, #32 -; CHECK-GI-NEXT: add w8, w8, w0 +; CHECK-GI-NEXT: add x8, x0, x8, asr #32 ; CHECK-GI-NEXT: asr w8, w8, #2 ; CHECK-GI-NEXT: add w0, w8, w8, lsr #31 ; CHECK-GI-NEXT: ret @@ -1674,8 +1673,11 @@ define i32 @combine_i32_sdiv_const100(i32 %x) { ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: smull x8, w0, w8 -; CHECK-SD-NEXT: asr x8, x8, #37 -; CHECK-SD-NEXT: add w0, w8, w8, lsr #31 +; CHECK-SD-NEXT: asr x9, x8, #37 +; CHECK-SD-NEXT: lsr w9, w9, #31 +; CHECK-SD-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; CHECK-SD-NEXT: add x0, x9, x8, asr #37 +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_i32_sdiv_const100: diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll index 87b11086e28d5..dbe3cd0c4174e 100644 --- a/llvm/test/CodeGen/AArch64/rem-by-const.ll +++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll @@ -247,10 +247,10 @@ define i32 @si32_7(i32 %a, i32 %b) { ; CHECK-SD-LABEL: si32_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #9363 // =0x2493 +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SD-NEXT: movk w8, #37449, lsl #16 ; CHECK-SD-NEXT: smull x8, w0, w8 -; CHECK-SD-NEXT: lsr x8, x8, #32 -; CHECK-SD-NEXT: add w8, w8, w0 +; CHECK-SD-NEXT: add x8, x0, x8, lsr #32 ; CHECK-SD-NEXT: asr w9, w8, #2 ; CHECK-SD-NEXT: add w8, w9, w8, lsr #31 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 @@ -260,10 +260,10 @@ define i32 @si32_7(i32 %a, i32 %b) { ; CHECK-GI-LABEL: si32_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #9363 // =0x2493 +; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-GI-NEXT: movk w8, #37449, lsl #16 ; CHECK-GI-NEXT: smull x8, w0, w8 -; CHECK-GI-NEXT: asr x8, x8, #32 -; CHECK-GI-NEXT: add w8, w8, w0 +; CHECK-GI-NEXT: add x8, x0, x8, asr #32 ; CHECK-GI-NEXT: asr w8, w8, #2 ; CHECK-GI-NEXT: add w8, w8, w8, lsr #31 ; CHECK-GI-NEXT: lsl w9, w8, #3 @@ -279,11 +279,13 @@ define i32 @si32_100(i32 %a, i32 %b) { ; CHECK-SD-LABEL: si32_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f -; CHECK-SD-NEXT: mov w9, #100 // =0x64 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: smull x8, w0, w8 -; CHECK-SD-NEXT: asr x8, x8, #37 -; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 +; CHECK-SD-NEXT: asr x9, x8, #37 +; CHECK-SD-NEXT: lsr w9, w9, #31 +; CHECK-SD-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; CHECK-SD-NEXT: add x8, x9, x8, asr #37 +; CHECK-SD-NEXT: mov w9, #100 // =0x64 ; CHECK-SD-NEXT: msub w0, w8, w9, w0 ; CHECK-SD-NEXT: ret ; @@ -307,11 +309,12 @@ define i32 @ui32_7(i32 %a, i32 %b) { ; CHECK-SD-LABEL: ui32_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SD-NEXT: movk w8, #9362, lsl #16 ; CHECK-SD-NEXT: umull x8, w0, w8 -; CHECK-SD-NEXT: lsr x8, x8, #32 -; CHECK-SD-NEXT: sub w9, w0, w8 -; CHECK-SD-NEXT: add w8, w8, w9, lsr #1 +; CHECK-SD-NEXT: sub x9, x0, x8, lsr #32 +; CHECK-SD-NEXT: lsr w9, w9, #1 +; CHECK-SD-NEXT: add x8, x9, x8, lsr #32 ; CHECK-SD-NEXT: lsr w8, w8, #2 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: add w0, w0, w8 @@ -320,11 +323,12 @@ define i32 @ui32_7(i32 %a, i32 %b) { ; CHECK-GI-LABEL: ui32_7: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #18725 // =0x4925 +; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-GI-NEXT: movk w8, #9362, lsl #16 ; CHECK-GI-NEXT: umull x8, w0, w8 -; CHECK-GI-NEXT: lsr x8, x8, #32 -; CHECK-GI-NEXT: sub w9, w0, w8 -; CHECK-GI-NEXT: add w8, w8, w9, lsr #1 +; CHECK-GI-NEXT: sub x9, x0, x8, lsr #32 +; CHECK-GI-NEXT: lsr w9, w9, #1 +; CHECK-GI-NEXT: add x8, x9, x8, lsr #32 ; CHECK-GI-NEXT: lsr w8, w8, #2 ; CHECK-GI-NEXT: lsl w9, w8, #3 ; CHECK-GI-NEXT: sub w8, w9, w8 @@ -801,13 +805,10 @@ define <3 x i8> @sv3i8_7(<3 x i8> %d, <3 x i8> %e) { ; CHECK-SD-NEXT: smull x10, w10, w9 ; CHECK-SD-NEXT: smull x9, w11, w9 ; CHECK-SD-NEXT: sxtb w11, w2 -; CHECK-SD-NEXT: lsr x8, x8, #32 -; CHECK-SD-NEXT: lsr x10, x10, #32 -; CHECK-SD-NEXT: lsr x9, x9, #32 -; CHECK-SD-NEXT: add w8, w8, w13 -; CHECK-SD-NEXT: add w10, w10, w12 +; CHECK-SD-NEXT: add x8, x13, x8, lsr #32 +; CHECK-SD-NEXT: add x10, x12, x10, lsr #32 +; CHECK-SD-NEXT: add x9, x11, x9, lsr #32 ; CHECK-SD-NEXT: asr w14, w8, #2 -; CHECK-SD-NEXT: add w9, w9, w11 ; CHECK-SD-NEXT: asr w15, w10, #2 ; CHECK-SD-NEXT: asr w16, w9, #2 ; CHECK-SD-NEXT: add w8, w14, w8, lsr #31 @@ -856,17 +857,23 @@ define <3 x i8> @sv3i8_100(<3 x i8> %d, <3 x i8> %e) { ; CHECK-SD-NEXT: sxtb x10, w1 ; CHECK-SD-NEXT: movk w9, #20971, lsl #16 ; CHECK-SD-NEXT: sxtb x11, w2 -; CHECK-SD-NEXT: sxtb w12, w0 ; CHECK-SD-NEXT: smull x8, w8, w9 ; CHECK-SD-NEXT: smull x10, w10, w9 ; CHECK-SD-NEXT: smull x9, w11, w9 +; CHECK-SD-NEXT: asr x11, x8, #37 +; CHECK-SD-NEXT: asr x12, x10, #37 +; CHECK-SD-NEXT: asr x13, x9, #37 +; CHECK-SD-NEXT: lsr w11, w11, #31 +; CHECK-SD-NEXT: lsr w12, w12, #31 +; CHECK-SD-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 +; CHECK-SD-NEXT: add x8, x11, x8, asr #37 +; CHECK-SD-NEXT: lsr w13, w13, #31 +; CHECK-SD-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; CHECK-SD-NEXT: add x10, x12, x10, asr #37 ; CHECK-SD-NEXT: mov w11, #100 // =0x64 -; CHECK-SD-NEXT: asr x8, x8, #37 -; CHECK-SD-NEXT: asr x10, x10, #37 -; CHECK-SD-NEXT: asr x9, x9, #37 -; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 -; CHECK-SD-NEXT: add w10, w10, w10, lsr #31 -; CHECK-SD-NEXT: add w9, w9, w9, lsr #31 +; CHECK-SD-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; CHECK-SD-NEXT: add x9, x13, x9, asr #37 +; CHECK-SD-NEXT: sxtb w12, w0 ; CHECK-SD-NEXT: msub w0, w8, w11, w12 ; CHECK-SD-NEXT: sxtb w8, w1 ; CHECK-SD-NEXT: msub w1, w10, w11, w8 @@ -899,45 +906,41 @@ define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) { ; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493 ; CHECK-SD-NEXT: movk x8, #37449, lsl #16 ; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-SD-NEXT: smov x9, v0.h[0] -; CHECK-SD-NEXT: smov x10, v0.h[1] -; CHECK-SD-NEXT: smov w11, v0.h[0] -; CHECK-SD-NEXT: smov x12, v0.h[2] -; CHECK-SD-NEXT: smov w13, v0.h[1] -; CHECK-SD-NEXT: smov x14, v0.h[3] -; CHECK-SD-NEXT: smov w16, v0.h[2] -; CHECK-SD-NEXT: smull x9, w9, w8 +; CHECK-SD-NEXT: smov x10, v0.h[0] +; CHECK-SD-NEXT: smov x9, v0.h[1] +; CHECK-SD-NEXT: smov w12, v0.h[0] +; CHECK-SD-NEXT: smov w11, v0.h[1] +; CHECK-SD-NEXT: smov x13, v0.h[2] +; CHECK-SD-NEXT: smov w14, v0.h[2] +; CHECK-SD-NEXT: smov x17, v0.h[3] ; CHECK-SD-NEXT: smull x10, w10, w8 -; CHECK-SD-NEXT: smull x12, w12, w8 -; CHECK-SD-NEXT: lsr x9, x9, #32 -; CHECK-SD-NEXT: smull x8, w14, w8 -; CHECK-SD-NEXT: smov w14, v0.h[3] -; CHECK-SD-NEXT: lsr x10, x10, #32 -; CHECK-SD-NEXT: add w9, w9, w11 -; CHECK-SD-NEXT: lsr x12, x12, #32 +; CHECK-SD-NEXT: smull x9, w9, w8 +; CHECK-SD-NEXT: smull x13, w13, w8 +; CHECK-SD-NEXT: add x10, x12, x10, lsr #32 +; CHECK-SD-NEXT: smull x8, w17, w8 +; CHECK-SD-NEXT: add x9, x11, x9, lsr #32 +; CHECK-SD-NEXT: asr w16, w10, #2 +; CHECK-SD-NEXT: add x13, x14, x13, lsr #32 ; CHECK-SD-NEXT: asr w15, w9, #2 -; CHECK-SD-NEXT: add w10, w10, w13 -; CHECK-SD-NEXT: lsr x8, x8, #32 -; CHECK-SD-NEXT: asr w17, w10, #2 -; CHECK-SD-NEXT: add w12, w12, w16 +; CHECK-SD-NEXT: add w10, w16, w10, lsr #31 +; CHECK-SD-NEXT: asr w16, w13, #2 ; CHECK-SD-NEXT: add w9, w15, w9, lsr #31 -; CHECK-SD-NEXT: asr w15, w12, #2 -; CHECK-SD-NEXT: add w8, w8, w14 -; CHECK-SD-NEXT: add w10, w17, w10, lsr #31 -; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3 +; CHECK-SD-NEXT: smov w15, v0.h[3] ; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3 -; CHECK-SD-NEXT: add w9, w11, w9 -; CHECK-SD-NEXT: fmov s0, w9 -; CHECK-SD-NEXT: add w10, w13, w10 -; CHECK-SD-NEXT: add w9, w15, w12, lsr #31 ; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3 -; CHECK-SD-NEXT: mov v0.h[1], w10 +; CHECK-SD-NEXT: add w10, w12, w10 +; CHECK-SD-NEXT: fmov s0, w10 +; CHECK-SD-NEXT: add w9, w11, w9 +; CHECK-SD-NEXT: add w10, w16, w13, lsr #31 +; CHECK-SD-NEXT: add x8, x15, x8, lsr #32 +; CHECK-SD-NEXT: mov v0.h[1], w9 +; CHECK-SD-NEXT: sub w9, w10, w10, lsl #3 ; CHECK-SD-NEXT: asr w10, w8, #2 -; CHECK-SD-NEXT: add w9, w16, w9 +; CHECK-SD-NEXT: add w9, w14, w9 ; CHECK-SD-NEXT: add w8, w10, w8, lsr #31 ; CHECK-SD-NEXT: mov v0.h[2], w9 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 -; CHECK-SD-NEXT: add w8, w14, w8 +; CHECK-SD-NEXT: add w8, w15, w8 ; CHECK-SD-NEXT: mov v0.h[3], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret @@ -986,36 +989,44 @@ define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-SD-NEXT: mov w8, #34079 // =0x851f -; CHECK-SD-NEXT: mov w14, #100 // =0x64 +; CHECK-SD-NEXT: mov w15, #100 // =0x64 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: sshr v1.4h, v0.4h, #8 -; CHECK-SD-NEXT: smov x9, v1.h[0] -; CHECK-SD-NEXT: smov x10, v1.h[1] +; CHECK-SD-NEXT: smov x9, v1.h[1] +; CHECK-SD-NEXT: smov x10, v1.h[0] ; CHECK-SD-NEXT: smov x11, v1.h[2] -; CHECK-SD-NEXT: smov w12, v1.h[0] -; CHECK-SD-NEXT: smov x13, v1.h[3] -; CHECK-SD-NEXT: smov w15, v1.h[1] +; CHECK-SD-NEXT: smov x14, v1.h[3] ; CHECK-SD-NEXT: smull x9, w9, w8 ; CHECK-SD-NEXT: smull x10, w10, w8 ; CHECK-SD-NEXT: smull x11, w11, w8 -; CHECK-SD-NEXT: asr x9, x9, #37 -; CHECK-SD-NEXT: smull x8, w13, w8 -; CHECK-SD-NEXT: asr x10, x10, #37 -; CHECK-SD-NEXT: add w9, w9, w9, lsr #31 -; CHECK-SD-NEXT: asr x11, x11, #37 -; CHECK-SD-NEXT: add w10, w10, w10, lsr #31 -; CHECK-SD-NEXT: asr x8, x8, #37 -; CHECK-SD-NEXT: msub w9, w9, w14, w12 -; CHECK-SD-NEXT: msub w10, w10, w14, w15 -; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 -; CHECK-SD-NEXT: fmov s0, w9 -; CHECK-SD-NEXT: add w9, w11, w11, lsr #31 +; CHECK-SD-NEXT: asr x12, x9, #37 +; CHECK-SD-NEXT: smull x8, w14, w8 +; CHECK-SD-NEXT: smov w14, v1.h[1] +; CHECK-SD-NEXT: asr x13, x10, #37 +; CHECK-SD-NEXT: lsr w12, w12, #31 +; CHECK-SD-NEXT: lsr w13, w13, #31 +; CHECK-SD-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; CHECK-SD-NEXT: add x9, x12, x9, asr #37 +; CHECK-SD-NEXT: // kill: def $w13 killed $w13 killed $x13 def $x13 +; CHECK-SD-NEXT: add x10, x13, x10, asr #37 +; CHECK-SD-NEXT: smov w12, v1.h[0] +; CHECK-SD-NEXT: asr x13, x11, #37 +; CHECK-SD-NEXT: msub w9, w9, w15, w14 +; CHECK-SD-NEXT: msub w10, w10, w15, w12 +; CHECK-SD-NEXT: lsr w12, w13, #31 +; CHECK-SD-NEXT: asr x13, x8, #37 +; CHECK-SD-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; CHECK-SD-NEXT: fmov s0, w10 +; CHECK-SD-NEXT: add x10, x12, x11, asr #37 ; CHECK-SD-NEXT: smov w11, v1.h[2] -; CHECK-SD-NEXT: msub w9, w9, w14, w11 -; CHECK-SD-NEXT: mov v0.h[1], w10 -; CHECK-SD-NEXT: smov w10, v1.h[3] -; CHECK-SD-NEXT: msub w8, w8, w14, w10 -; CHECK-SD-NEXT: mov v0.h[2], w9 +; CHECK-SD-NEXT: msub w10, w10, w15, w11 +; CHECK-SD-NEXT: mov v0.h[1], w9 +; CHECK-SD-NEXT: lsr w9, w13, #31 +; CHECK-SD-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; CHECK-SD-NEXT: add x8, x9, x8, asr #37 +; CHECK-SD-NEXT: smov w9, v1.h[3] +; CHECK-SD-NEXT: mov v0.h[2], w10 +; CHECK-SD-NEXT: msub w8, w8, w15, w9 ; CHECK-SD-NEXT: mov v0.h[3], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret @@ -1770,32 +1781,29 @@ define <3 x i16> @sv3i16_7(<3 x i16> %d, <3 x i16> %e) { ; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493 ; CHECK-SD-NEXT: smov x10, v0.h[1] ; CHECK-SD-NEXT: movk x8, #37449, lsl #16 -; CHECK-SD-NEXT: smov w12, v0.h[0] -; CHECK-SD-NEXT: smov x11, v0.h[2] -; CHECK-SD-NEXT: smov w13, v0.h[1] +; CHECK-SD-NEXT: smov w11, v0.h[0] +; CHECK-SD-NEXT: smov x13, v0.h[2] +; CHECK-SD-NEXT: smov w12, v0.h[1] +; CHECK-SD-NEXT: smov w14, v0.h[2] ; CHECK-SD-NEXT: smull x9, w9, w8 ; CHECK-SD-NEXT: smull x10, w10, w8 -; CHECK-SD-NEXT: smull x8, w11, w8 -; CHECK-SD-NEXT: smov w11, v0.h[2] -; CHECK-SD-NEXT: lsr x9, x9, #32 -; CHECK-SD-NEXT: lsr x10, x10, #32 -; CHECK-SD-NEXT: add w9, w9, w12 -; CHECK-SD-NEXT: lsr x8, x8, #32 -; CHECK-SD-NEXT: asr w14, w9, #2 -; CHECK-SD-NEXT: add w10, w10, w13 +; CHECK-SD-NEXT: smull x8, w13, w8 +; CHECK-SD-NEXT: add x9, x11, x9, lsr #32 +; CHECK-SD-NEXT: add x10, x12, x10, lsr #32 +; CHECK-SD-NEXT: asr w13, w9, #2 +; CHECK-SD-NEXT: add x8, x14, x8, lsr #32 ; CHECK-SD-NEXT: asr w15, w10, #2 -; CHECK-SD-NEXT: add w8, w8, w11 -; CHECK-SD-NEXT: add w9, w14, w9, lsr #31 -; CHECK-SD-NEXT: asr w14, w8, #2 +; CHECK-SD-NEXT: add w9, w13, w9, lsr #31 +; CHECK-SD-NEXT: asr w13, w8, #2 ; CHECK-SD-NEXT: add w10, w15, w10, lsr #31 ; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3 -; CHECK-SD-NEXT: add w8, w14, w8, lsr #31 +; CHECK-SD-NEXT: add w8, w13, w8, lsr #31 ; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3 -; CHECK-SD-NEXT: add w9, w12, w9 +; CHECK-SD-NEXT: add w9, w11, w9 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: fmov s0, w9 -; CHECK-SD-NEXT: add w10, w13, w10 -; CHECK-SD-NEXT: add w8, w11, w8 +; CHECK-SD-NEXT: add w10, w12, w10 +; CHECK-SD-NEXT: add w8, w14, w8 ; CHECK-SD-NEXT: mov v0.h[1], w10 ; CHECK-SD-NEXT: mov v0.h[2], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -1839,23 +1847,29 @@ define <3 x i16> @sv3i16_100(<3 x i16> %d, <3 x i16> %e) { ; CHECK-SD-NEXT: smov x10, v0.h[1] ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 ; CHECK-SD-NEXT: smov x11, v0.h[2] -; CHECK-SD-NEXT: mov w12, #100 // =0x64 -; CHECK-SD-NEXT: smov w13, v0.h[1] +; CHECK-SD-NEXT: mov w14, #100 // =0x64 ; CHECK-SD-NEXT: smull x9, w9, w8 ; CHECK-SD-NEXT: smull x10, w10, w8 ; CHECK-SD-NEXT: smull x8, w11, w8 -; CHECK-SD-NEXT: smov w11, v0.h[0] -; CHECK-SD-NEXT: asr x9, x9, #37 -; CHECK-SD-NEXT: asr x10, x10, #37 -; CHECK-SD-NEXT: add w9, w9, w9, lsr #31 -; CHECK-SD-NEXT: asr x8, x8, #37 -; CHECK-SD-NEXT: add w10, w10, w10, lsr #31 -; CHECK-SD-NEXT: msub w9, w9, w12, w11 +; CHECK-SD-NEXT: asr x12, x9, #37 +; CHECK-SD-NEXT: asr x11, x10, #37 +; CHECK-SD-NEXT: lsr w12, w12, #31 +; CHECK-SD-NEXT: asr x13, x8, #37 +; CHECK-SD-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; CHECK-SD-NEXT: add x9, x12, x9, asr #37 +; CHECK-SD-NEXT: lsr w11, w11, #31 +; CHECK-SD-NEXT: smov w12, v0.h[0] +; CHECK-SD-NEXT: // kill: def $w11 killed $w11 killed $x11 def $x11 +; CHECK-SD-NEXT: add x10, x11, x10, asr #37 +; CHECK-SD-NEXT: smov w11, v0.h[1] +; CHECK-SD-NEXT: msub w9, w9, w14, w12 +; CHECK-SD-NEXT: lsr w12, w13, #31 +; CHECK-SD-NEXT: msub w10, w10, w14, w11 ; CHECK-SD-NEXT: smov w11, v0.h[2] -; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 -; CHECK-SD-NEXT: msub w10, w10, w12, w13 -; CHECK-SD-NEXT: msub w8, w8, w12, w11 +; CHECK-SD-NEXT: // kill: def $w12 killed $w12 killed $x12 def $x12 +; CHECK-SD-NEXT: add x8, x12, x8, asr #37 ; CHECK-SD-NEXT: fmov s0, w9 +; CHECK-SD-NEXT: msub w8, w8, w14, w11 ; CHECK-SD-NEXT: mov v0.h[1], w10 ; CHECK-SD-NEXT: mov v0.h[2], w8 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -2444,15 +2458,14 @@ define <3 x i32> @sv3i32_7(<3 x i32> %d, <3 x i32> %e) { ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: smull x8, w9, w8 ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s -; CHECK-SD-NEXT: lsr x8, x8, #32 -; CHECK-SD-NEXT: add w8, w8, w9 -; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32 +; CHECK-SD-NEXT: add x8, x9, x8, lsr #32 ; CHECK-SD-NEXT: asr w10, w8, #2 +; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32 ; CHECK-SD-NEXT: add w8, w10, w8, lsr #31 ; CHECK-SD-NEXT: add v1.2s, v1.2s, v0.2s ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 -; CHECK-SD-NEXT: sshr v2.2s, v1.2s, #2 ; CHECK-SD-NEXT: add w8, w9, w8 +; CHECK-SD-NEXT: sshr v2.2s, v1.2s, #2 ; CHECK-SD-NEXT: usra v2.2s, v1.2s, #31 ; CHECK-SD-NEXT: mls v0.2s, v2.2s, v3.2s ; CHECK-SD-NEXT: mov v0.s[2], w8 @@ -2494,13 +2507,15 @@ define <3 x i32> @sv3i32_100(<3 x i32> %d, <3 x i32> %e) { ; CHECK-SD-NEXT: mov w9, v0.s[2] ; CHECK-SD-NEXT: movi v2.2s, #100 ; CHECK-SD-NEXT: movk w8, #20971, lsl #16 -; CHECK-SD-NEXT: mov w10, #100 // =0x64 ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: smull x8, w9, w8 ; CHECK-SD-NEXT: smull v1.2d, v0.2s, v1.2s -; CHECK-SD-NEXT: asr x8, x8, #37 -; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 +; CHECK-SD-NEXT: asr x10, x8, #37 +; CHECK-SD-NEXT: lsr w10, w10, #31 ; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #37 +; CHECK-SD-NEXT: // kill: def $w10 killed $w10 killed $x10 def $x10 +; CHECK-SD-NEXT: add x8, x10, x8, asr #37 +; CHECK-SD-NEXT: mov w10, #100 // =0x64 ; CHECK-SD-NEXT: msub w8, w8, w10, w9 ; CHECK-SD-NEXT: xtn v1.2s, v1.2d ; CHECK-SD-NEXT: usra v1.2s, v1.2s, #31 @@ -2670,12 +2685,12 @@ define <3 x i32> @uv3i32_7(<3 x i32> %d, <3 x i32> %e) { ; CHECK-SD-NEXT: dup v1.2s, w8 ; CHECK-SD-NEXT: umull x8, w9, w8 ; CHECK-SD-NEXT: umull v1.2d, v0.2s, v1.2s -; CHECK-SD-NEXT: lsr x8, x8, #32 -; CHECK-SD-NEXT: sub w10, w9, w8 +; CHECK-SD-NEXT: sub x10, x9, x8, lsr #32 +; CHECK-SD-NEXT: lsr w10, w10, #1 ; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #32 -; CHECK-SD-NEXT: add w8, w8, w10, lsr #1 -; CHECK-SD-NEXT: lsr w8, w8, #2 +; CHECK-SD-NEXT: add x8, x10, x8, lsr #32 ; CHECK-SD-NEXT: sub v2.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: lsr w8, w8, #2 ; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 ; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-SD-NEXT: add w8, w9, w8 diff --git a/llvm/test/CodeGen/AArch64/srem-lkk.ll b/llvm/test/CodeGen/AArch64/srem-lkk.ll index 1223ae3a15e7b..cd168ac3f678d 100644 --- a/llvm/test/CodeGen/AArch64/srem-lkk.ll +++ b/llvm/test/CodeGen/AArch64/srem-lkk.ll @@ -5,10 +5,10 @@ define i32 @fold_srem_positive_odd(i32 %x) { ; CHECK-LABEL: fold_srem_positive_odd: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37253 // =0x9185 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: movk w8, #44150, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 -; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: add x8, x0, x8, lsr #32 ; CHECK-NEXT: asr w9, w8, #6 ; CHECK-NEXT: add w8, w9, w8, lsr #31 ; CHECK-NEXT: mov w9, #95 // =0x5f @@ -23,11 +23,13 @@ define i32 @fold_srem_positive_even(i32 %x) { ; CHECK-LABEL: fold_srem_positive_even: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #36849 // =0x8ff1 -; CHECK-NEXT: mov w9, #1060 // =0x424 ; CHECK-NEXT: movk w8, #15827, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 -; CHECK-NEXT: asr x8, x8, #40 -; CHECK-NEXT: add w8, w8, w8, lsr #31 +; CHECK-NEXT: asr x9, x8, #40 +; CHECK-NEXT: lsr w9, w9, #31 +; CHECK-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; CHECK-NEXT: add x8, x9, x8, asr #40 +; CHECK-NEXT: mov w9, #1060 // =0x424 ; CHECK-NEXT: msub w0, w8, w9, w0 ; CHECK-NEXT: ret %1 = srem i32 %x, 1060 @@ -39,11 +41,13 @@ define i32 @fold_srem_negative_odd(i32 %x) { ; CHECK-LABEL: fold_srem_negative_odd: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #65445 // =0xffa5 -; CHECK-NEXT: mov w9, #-723 // =0xfffffd2d ; CHECK-NEXT: movk w8, #42330, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 -; CHECK-NEXT: asr x8, x8, #40 -; CHECK-NEXT: add w8, w8, w8, lsr #31 +; CHECK-NEXT: asr x9, x8, #40 +; CHECK-NEXT: lsr w9, w9, #31 +; CHECK-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; CHECK-NEXT: add x8, x9, x8, asr #40 +; CHECK-NEXT: mov w9, #-723 // =0xfffffd2d ; CHECK-NEXT: msub w0, w8, w9, w0 ; CHECK-NEXT: ret %1 = srem i32 %x, -723 @@ -55,11 +59,13 @@ define i32 @fold_srem_negative_even(i32 %x) { ; CHECK-LABEL: fold_srem_negative_even: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #62439 // =0xf3e7 -; CHECK-NEXT: mov w9, #-22981 // =0xffffa63b ; CHECK-NEXT: movk w8, #64805, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 -; CHECK-NEXT: asr x8, x8, #40 -; CHECK-NEXT: add w8, w8, w8, lsr #31 +; CHECK-NEXT: asr x9, x8, #40 +; CHECK-NEXT: lsr w9, w9, #31 +; CHECK-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; CHECK-NEXT: add x8, x9, x8, asr #40 +; CHECK-NEXT: mov w9, #-22981 // =0xffffa63b ; CHECK-NEXT: msub w0, w8, w9, w0 ; CHECK-NEXT: ret %1 = srem i32 %x, -22981 @@ -72,10 +78,10 @@ define i32 @combine_srem_sdiv(i32 %x) { ; CHECK-LABEL: combine_srem_sdiv: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #37253 // =0x9185 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: movk w8, #44150, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 -; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: add x8, x0, x8, lsr #32 ; CHECK-NEXT: asr w9, w8, #6 ; CHECK-NEXT: add w8, w9, w8, lsr #31 ; CHECK-NEXT: mov w9, #95 // =0x5f diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll index 8e1d61b51e2bb..adfc04b7672c2 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll @@ -1859,56 +1859,64 @@ define void @udiv_constantsplat_v8i32(ptr %a) { ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 ; NONEON-NOSVE-NEXT: ldr w9, [sp, #28] ; NONEON-NOSVE-NEXT: umull x10, w9, w8 -; NONEON-NOSVE-NEXT: lsr x10, x10, #32 -; NONEON-NOSVE-NEXT: sub w9, w9, w10 -; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; NONEON-NOSVE-NEXT: sub x9, x9, x10, lsr #32 +; NONEON-NOSVE-NEXT: lsr w9, w9, #1 +; NONEON-NOSVE-NEXT: add x9, x9, x10, lsr #32 ; NONEON-NOSVE-NEXT: lsr w11, w9, #6 ; NONEON-NOSVE-NEXT: ldr w9, [sp, #24] ; NONEON-NOSVE-NEXT: umull x10, w9, w8 -; NONEON-NOSVE-NEXT: lsr x10, x10, #32 -; NONEON-NOSVE-NEXT: sub w9, w9, w10 -; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; NONEON-NOSVE-NEXT: sub x9, x9, x10, lsr #32 +; NONEON-NOSVE-NEXT: lsr w9, w9, #1 +; NONEON-NOSVE-NEXT: add x9, x9, x10, lsr #32 ; NONEON-NOSVE-NEXT: lsr w9, w9, #6 ; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #56] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #20] ; NONEON-NOSVE-NEXT: umull x10, w9, w8 -; NONEON-NOSVE-NEXT: lsr x10, x10, #32 -; NONEON-NOSVE-NEXT: sub w9, w9, w10 -; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; NONEON-NOSVE-NEXT: sub x9, x9, x10, lsr #32 +; NONEON-NOSVE-NEXT: lsr w9, w9, #1 +; NONEON-NOSVE-NEXT: add x9, x9, x10, lsr #32 ; NONEON-NOSVE-NEXT: lsr w11, w9, #6 ; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] ; NONEON-NOSVE-NEXT: umull x10, w9, w8 -; NONEON-NOSVE-NEXT: lsr x10, x10, #32 -; NONEON-NOSVE-NEXT: sub w9, w9, w10 -; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; NONEON-NOSVE-NEXT: sub x9, x9, x10, lsr #32 +; NONEON-NOSVE-NEXT: lsr w9, w9, #1 +; NONEON-NOSVE-NEXT: add x9, x9, x10, lsr #32 ; NONEON-NOSVE-NEXT: lsr w9, w9, #6 ; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #48] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #12] ; NONEON-NOSVE-NEXT: umull x10, w9, w8 -; NONEON-NOSVE-NEXT: lsr x10, x10, #32 -; NONEON-NOSVE-NEXT: sub w9, w9, w10 -; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; NONEON-NOSVE-NEXT: sub x9, x9, x10, lsr #32 +; NONEON-NOSVE-NEXT: lsr w9, w9, #1 +; NONEON-NOSVE-NEXT: add x9, x9, x10, lsr #32 ; NONEON-NOSVE-NEXT: lsr w11, w9, #6 ; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] ; NONEON-NOSVE-NEXT: umull x10, w9, w8 -; NONEON-NOSVE-NEXT: lsr x10, x10, #32 -; NONEON-NOSVE-NEXT: sub w9, w9, w10 -; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 +; NONEON-NOSVE-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; NONEON-NOSVE-NEXT: sub x9, x9, x10, lsr #32 +; NONEON-NOSVE-NEXT: lsr w9, w9, #1 +; NONEON-NOSVE-NEXT: add x9, x9, x10, lsr #32 ; NONEON-NOSVE-NEXT: lsr w9, w9, #6 ; NONEON-NOSVE-NEXT: stp w9, w11, [sp, #40] ; NONEON-NOSVE-NEXT: ldr w9, [sp, #4] ; NONEON-NOSVE-NEXT: umull x10, w9, w8 -; NONEON-NOSVE-NEXT: lsr x10, x10, #32 -; NONEON-NOSVE-NEXT: sub w9, w9, w10 -; NONEON-NOSVE-NEXT: add w9, w10, w9, lsr #1 -; NONEON-NOSVE-NEXT: lsr w11, w9, #6 +; NONEON-NOSVE-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; NONEON-NOSVE-NEXT: sub x9, x9, x10, lsr #32 +; NONEON-NOSVE-NEXT: lsr w9, w9, #1 +; NONEON-NOSVE-NEXT: add x9, x9, x10, lsr #32 +; NONEON-NOSVE-NEXT: lsr w10, w9, #6 ; NONEON-NOSVE-NEXT: ldr w9, [sp] ; NONEON-NOSVE-NEXT: umull x8, w9, w8 -; NONEON-NOSVE-NEXT: lsr x8, x8, #32 -; NONEON-NOSVE-NEXT: sub w9, w9, w8 -; NONEON-NOSVE-NEXT: add w8, w8, w9, lsr #1 +; NONEON-NOSVE-NEXT: // kill: def $w9 killed $w9 killed $x9 def $x9 +; NONEON-NOSVE-NEXT: sub x9, x9, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr w9, w9, #1 +; NONEON-NOSVE-NEXT: add x8, x9, x8, lsr #32 ; NONEON-NOSVE-NEXT: lsr w8, w8, #6 -; NONEON-NOSVE-NEXT: stp w8, w11, [sp, #32] +; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #32] ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32] ; NONEON-NOSVE-NEXT: stp q0, q1, [x0] ; NONEON-NOSVE-NEXT: add sp, sp, #64 diff --git a/llvm/test/CodeGen/AArch64/urem-lkk.ll b/llvm/test/CodeGen/AArch64/urem-lkk.ll index 40016c7e4ce0f..1f36f0dfe8366 100644 --- a/llvm/test/CodeGen/AArch64/urem-lkk.ll +++ b/llvm/test/CodeGen/AArch64/urem-lkk.ll @@ -6,11 +6,12 @@ define i32 @fold_urem_positive_odd(i32 %x) { ; CHECK-LABEL: fold_urem_positive_odd: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #8969 // =0x2309 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: movk w8, #22765, lsl #16 ; CHECK-NEXT: umull x8, w0, w8 -; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: sub w9, w0, w8 -; CHECK-NEXT: add w8, w8, w9, lsr #1 +; CHECK-NEXT: sub x9, x0, x8, lsr #32 +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: add x8, x9, x8, lsr #32 ; CHECK-NEXT: mov w9, #95 // =0x5f ; CHECK-NEXT: lsr w8, w8, #6 ; CHECK-NEXT: msub w0, w8, w9, w0 @@ -38,11 +39,12 @@ define i32 @combine_urem_udiv(i32 %x) { ; CHECK-SD-LABEL: combine_urem_udiv: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov w8, #8969 // =0x2309 +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SD-NEXT: movk w8, #22765, lsl #16 ; CHECK-SD-NEXT: umull x8, w0, w8 -; CHECK-SD-NEXT: lsr x8, x8, #32 -; CHECK-SD-NEXT: sub w9, w0, w8 -; CHECK-SD-NEXT: add w8, w8, w9, lsr #1 +; CHECK-SD-NEXT: sub x9, x0, x8, lsr #32 +; CHECK-SD-NEXT: lsr w9, w9, #1 +; CHECK-SD-NEXT: add x8, x9, x8, lsr #32 ; CHECK-SD-NEXT: mov w9, #95 // =0x5f ; CHECK-SD-NEXT: lsr w8, w8, #6 ; CHECK-SD-NEXT: msub w9, w8, w9, w0