-
Notifications
You must be signed in to change notification settings - Fork 15.3k
Reland "[RegAlloc] Fix the terminal rule check for interfere with DstReg (#168661)" #169219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-webassembly @llvm/pr-subscribers-backend-aarch64 Author: None (hstk30-hw) ChangesReland d5f3ab8, fix testcases. Patch is 1023.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169219.diff 55 Files Affected:
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 25c4375a73ce0..e624088a0964e 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -4150,7 +4150,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
continue;
Register OtherSrcReg, OtherReg;
unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
- if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
+ if (!isMoveInstr(*TRI, &MI, OtherSrcReg, OtherReg, OtherSrcSubReg,
OtherSubReg))
return false;
if (OtherReg == SrcReg)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index 4894932d3c9b1..99c540366fb12 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -803,20 +803,20 @@ define i64 @red_mla_dup_ext_u8_s8_s64(ptr noalias noundef readonly captures(none
; CHECK-SD-NEXT: smlal2 v4.2d, v16.4s, v20.4s
; CHECK-SD-NEXT: smlal v6.2d, v16.2s, v20.2s
; CHECK-SD-NEXT: smlal v3.2d, v16.2s, v19.2s
-; CHECK-SD-NEXT: smlal2 v1.2d, v16.4s, v18.4s
+; CHECK-SD-NEXT: smlal2 v0.2d, v16.4s, v18.4s
; CHECK-SD-NEXT: smlal v7.2d, v16.2s, v17.2s
-; CHECK-SD-NEXT: smlal v0.2d, v16.2s, v18.2s
+; CHECK-SD-NEXT: smlal v1.2d, v16.2s, v18.2s
; CHECK-SD-NEXT: smlal2 v5.2d, v16.4s, v17.4s
; CHECK-SD-NEXT: b.ne .LBB6_7
; CHECK-SD-NEXT: // %bb.8: // %middle.block
-; CHECK-SD-NEXT: add v0.2d, v0.2d, v6.2d
+; CHECK-SD-NEXT: add v1.2d, v1.2d, v6.2d
; CHECK-SD-NEXT: add v3.2d, v3.2d, v7.2d
; CHECK-SD-NEXT: cmp x10, x9
-; CHECK-SD-NEXT: add v1.2d, v1.2d, v4.2d
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v4.2d
; CHECK-SD-NEXT: add v2.2d, v2.2d, v5.2d
-; CHECK-SD-NEXT: add v0.2d, v0.2d, v3.2d
-; CHECK-SD-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-SD-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: add v0.2d, v1.2d, v0.2d
; CHECK-SD-NEXT: addp d0, v0.2d
; CHECK-SD-NEXT: fmov x8, d0
; CHECK-SD-NEXT: b.eq .LBB6_15
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll
index 7542e9c4b8f5b..a4f20905a85c2 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll
@@ -35,15 +35,15 @@ define i32 @check_deinterleaving_has_deinterleave(ptr %a) {
; CHECK-LABEL: check_deinterleaving_has_deinterleave:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: movi v2.4s, #1
; CHECK-NEXT: add x8, x0, #16
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: mov w9, #32 // =0x20
+; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: movi v4.2d, #0000000000000000
+; CHECK-NEXT: mov w9, #32 // =0x20
+; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: movi v5.2d, #0000000000000000
-; CHECK-NEXT: movi v7.2d, #0000000000000000
; CHECK-NEXT: movi v6.2d, #0000000000000000
+; CHECK-NEXT: movi v7.2d, #0000000000000000
; CHECK-NEXT: movi v16.2d, #0000000000000000
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
@@ -64,31 +64,31 @@ define i32 @check_deinterleaving_has_deinterleave(ptr %a) {
; CHECK-NEXT: ushll v24.4s, v18.4h, #0
; CHECK-NEXT: ushll2 v18.4s, v18.8h, #0
; CHECK-NEXT: ushll v20.4s, v20.4h, #0
-; CHECK-NEXT: and v21.16b, v21.16b, v1.16b
-; CHECK-NEXT: and v19.16b, v19.16b, v1.16b
-; CHECK-NEXT: and v22.16b, v22.16b, v1.16b
-; CHECK-NEXT: and v17.16b, v17.16b, v1.16b
-; CHECK-NEXT: and v23.16b, v23.16b, v1.16b
-; CHECK-NEXT: and v24.16b, v24.16b, v1.16b
-; CHECK-NEXT: and v18.16b, v18.16b, v1.16b
-; CHECK-NEXT: and v20.16b, v20.16b, v1.16b
-; CHECK-NEXT: add v4.4s, v4.4s, v19.4s
-; CHECK-NEXT: add v2.4s, v2.4s, v21.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v22.4s
-; CHECK-NEXT: add v3.4s, v3.4s, v17.4s
+; CHECK-NEXT: and v21.16b, v21.16b, v2.16b
+; CHECK-NEXT: and v19.16b, v19.16b, v2.16b
+; CHECK-NEXT: and v22.16b, v22.16b, v2.16b
+; CHECK-NEXT: and v17.16b, v17.16b, v2.16b
+; CHECK-NEXT: and v23.16b, v23.16b, v2.16b
+; CHECK-NEXT: and v24.16b, v24.16b, v2.16b
+; CHECK-NEXT: and v18.16b, v18.16b, v2.16b
+; CHECK-NEXT: and v20.16b, v20.16b, v2.16b
+; CHECK-NEXT: add v5.4s, v5.4s, v19.4s
+; CHECK-NEXT: add v3.4s, v3.4s, v21.4s
+; CHECK-NEXT: add v1.4s, v1.4s, v22.4s
+; CHECK-NEXT: add v4.4s, v4.4s, v17.4s
; CHECK-NEXT: add v16.4s, v16.4s, v23.4s
-; CHECK-NEXT: add v5.4s, v5.4s, v24.4s
-; CHECK-NEXT: add v6.4s, v6.4s, v20.4s
-; CHECK-NEXT: add v7.4s, v7.4s, v18.4s
+; CHECK-NEXT: add v6.4s, v6.4s, v24.4s
+; CHECK-NEXT: add v7.4s, v7.4s, v20.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v18.4s
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %middle.block
-; CHECK-NEXT: add v1.4s, v7.4s, v3.4s
-; CHECK-NEXT: add v3.4s, v16.4s, v4.4s
-; CHECK-NEXT: add v0.4s, v5.4s, v0.4s
-; CHECK-NEXT: add v2.4s, v6.4s, v2.4s
-; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
+; CHECK-NEXT: add v2.4s, v16.4s, v5.4s
+; CHECK-NEXT: add v1.4s, v6.4s, v1.4s
+; CHECK-NEXT: add v3.4s, v7.4s, v3.4s
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index 4f00aed3aa4bc..ddeeca7d5df50 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -31,14 +31,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: ldr z5, [x1]
; CHECK-NEXT: add x1, x1, x10
; CHECK-NEXT: add x0, x0, x10
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
-; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
+; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -205,20 +205,20 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-NEXT: ldr z18, [x1, #3, mul vl]
; CHECK-NEXT: ldr z19, [x1, #2, mul vl]
; CHECK-NEXT: add x1, x1, x10
-; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #0
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #0
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #90
-; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #90
+; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #90
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #90
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #90
; CHECK-NEXT: b.ne .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
-; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z5.d, z1.d, z0.d
; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d
-; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z0.d, z1.d, z0.d
; CHECK-NEXT: fadd z1.d, z4.d, z5.d
; CHECK-NEXT: fadd z2.d, z2.d, z0.d
; CHECK-NEXT: faddv d0, p0, z1.d
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
index aed3072bb4af3..355adec955e4b 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
@@ -25,14 +25,14 @@ define dso_local %"struct.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q3, q2, [x9]
; CHECK-NEXT: cmp x8, #1600
; CHECK-NEXT: ldp q5, q4, [x10]
-; CHECK-NEXT: fcmla v0.2d, v5.2d, v3.2d, #0
-; CHECK-NEXT: fcmla v1.2d, v4.2d, v2.2d, #0
-; CHECK-NEXT: fcmla v0.2d, v5.2d, v3.2d, #90
-; CHECK-NEXT: fcmla v1.2d, v4.2d, v2.2d, #90
+; CHECK-NEXT: fcmla v1.2d, v5.2d, v3.2d, #0
+; CHECK-NEXT: fcmla v0.2d, v4.2d, v2.2d, #0
+; CHECK-NEXT: fcmla v1.2d, v5.2d, v3.2d, #90
+; CHECK-NEXT: fcmla v0.2d, v4.2d, v2.2d, #90
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %middle.block
-; CHECK-NEXT: zip2 v2.2d, v0.2d, v1.2d
-; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: zip2 v2.2d, v1.2d, v0.2d
+; CHECK-NEXT: zip1 v0.2d, v1.2d, v0.2d
; CHECK-NEXT: faddp d0, v0.2d
; CHECK-NEXT: faddp d1, v2.2d
; CHECK-NEXT: ret
@@ -159,20 +159,20 @@ define %"struct.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q17, q16, [x8], #64
; CHECK-NEXT: ldp q19, q18, [x9], #64
; CHECK-NEXT: fcmla v2.2d, v7.2d, v5.2d, #0
-; CHECK-NEXT: fcmla v0.2d, v6.2d, v4.2d, #0
-; CHECK-NEXT: fcmla v1.2d, v19.2d, v17.2d, #0
+; CHECK-NEXT: fcmla v1.2d, v6.2d, v4.2d, #0
+; CHECK-NEXT: fcmla v0.2d, v19.2d, v17.2d, #0
; CHECK-NEXT: fcmla v3.2d, v18.2d, v16.2d, #0
; CHECK-NEXT: fcmla v2.2d, v7.2d, v5.2d, #90
-; CHECK-NEXT: fcmla v0.2d, v6.2d, v4.2d, #90
-; CHECK-NEXT: fcmla v1.2d, v19.2d, v17.2d, #90
+; CHECK-NEXT: fcmla v1.2d, v6.2d, v4.2d, #90
+; CHECK-NEXT: fcmla v0.2d, v19.2d, v17.2d, #90
; CHECK-NEXT: fcmla v3.2d, v18.2d, v16.2d, #90
; CHECK-NEXT: b.ne .LBB2_1
; CHECK-NEXT: // %bb.2: // %middle.block
-; CHECK-NEXT: zip2 v4.2d, v1.2d, v3.2d
-; CHECK-NEXT: zip1 v1.2d, v1.2d, v3.2d
-; CHECK-NEXT: zip2 v3.2d, v2.2d, v0.2d
-; CHECK-NEXT: zip1 v0.2d, v2.2d, v0.2d
-; CHECK-NEXT: fadd v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: zip2 v4.2d, v0.2d, v3.2d
+; CHECK-NEXT: zip1 v0.2d, v0.2d, v3.2d
+; CHECK-NEXT: zip2 v3.2d, v2.2d, v1.2d
+; CHECK-NEXT: zip1 v1.2d, v2.2d, v1.2d
+; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
; CHECK-NEXT: fadd v1.2d, v4.2d, v3.2d
; CHECK-NEXT: faddp d0, v0.2d
; CHECK-NEXT: faddp d1, v1.2d
diff --git a/llvm/test/CodeGen/AArch64/machine-sink-kill-flags.ll b/llvm/test/CodeGen/AArch64/machine-sink-kill-flags.ll
index 338084295fc7f..0fe4683d97a23 100644
--- a/llvm/test/CodeGen/AArch64/machine-sink-kill-flags.ll
+++ b/llvm/test/CodeGen/AArch64/machine-sink-kill-flags.ll
@@ -16,8 +16,9 @@ define i32 @test(ptr %ptr) {
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: LBB0_1: ; %.thread
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: lsr w11, w9, #1
; CHECK-NEXT: sub w10, w9, #1
-; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: mov w9, w11
; CHECK-NEXT: tbnz w10, #0, LBB0_1
; CHECK-NEXT: ; %bb.2: ; %bb343
; CHECK-NEXT: and w9, w10, #0x1
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
index 52a77cb396909..6c6a691760af3 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
@@ -147,15 +147,15 @@ define <2 x float> @extract_v2f32_nxv16f32_2(<vscale x 16 x float> %arg) {
define <4 x i1> @extract_v4i1_nxv32i1_0(<vscale x 32 x i1> %arg) {
; CHECK-LABEL: extract_v4i1_nxv32i1_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
-; CHECK-NEXT: umov w8, v0.b[1]
-; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
+; CHECK-NEXT: umov w8, v1.b[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: umov w9, v1.b[2]
; CHECK-NEXT: mov v0.h[1], w8
-; CHECK-NEXT: umov w8, v1.b[2]
-; CHECK-NEXT: mov v0.h[2], w8
; CHECK-NEXT: umov w8, v1.b[3]
+; CHECK-NEXT: mov v0.h[2], w9
; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%ext = call <4 x i1> @llvm.vector.extract.v4i1.nxv32i1(<vscale x 32 x i1> %arg, i64 0)
ret <4 x i1> %ext
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
index 72994100b2970..1cefe96962e29 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
@@ -248,15 +248,15 @@ define <2 x i1> @extract_v2i1_nxv2i1(<vscale x 2 x i1> %inmask) {
define <4 x i1> @extract_v4i1_nxv4i1(<vscale x 4 x i1> %inmask) {
; CHECK-LABEL: extract_v4i1_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
+; CHECK-NEXT: mov w8, v1.s[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: mov w9, v1.s[2]
; CHECK-NEXT: mov v0.h[1], w8
-; CHECK-NEXT: mov w8, v1.s[2]
-; CHECK-NEXT: mov v0.h[2], w8
; CHECK-NEXT: mov w8, v1.s[3]
+; CHECK-NEXT: mov v0.h[2], w9
; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%mask = call <4 x i1> @llvm.vector.extract.v4i1.nxv4i1(<vscale x 4 x i1> %inmask, i64 0)
ret <4 x i1> %mask
@@ -265,23 +265,23 @@ define <4 x i1> @extract_v4i1_nxv4i1(<vscale x 4 x i1> %inmask) {
define <8 x i1> @extract_v8i1_nxv8i1(<vscale x 8 x i1> %inmask) {
; CHECK-LABEL: extract_v8i1_nxv8i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
-; CHECK-NEXT: umov w8, v0.h[1]
-; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
+; CHECK-NEXT: umov w8, v1.h[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: umov w9, v1.h[2]
; CHECK-NEXT: mov v0.b[1], w8
-; CHECK-NEXT: umov w8, v1.h[2]
-; CHECK-NEXT: mov v0.b[2], w8
; CHECK-NEXT: umov w8, v1.h[3]
+; CHECK-NEXT: mov v0.b[2], w9
+; CHECK-NEXT: umov w9, v1.h[4]
; CHECK-NEXT: mov v0.b[3], w8
-; CHECK-NEXT: umov w8, v1.h[4]
-; CHECK-NEXT: mov v0.b[4], w8
; CHECK-NEXT: umov w8, v1.h[5]
+; CHECK-NEXT: mov v0.b[4], w9
+; CHECK-NEXT: umov w9, v1.h[6]
; CHECK-NEXT: mov v0.b[5], w8
-; CHECK-NEXT: umov w8, v1.h[6]
-; CHECK-NEXT: mov v0.b[6], w8
; CHECK-NEXT: umov w8, v1.h[7]
+; CHECK-NEXT: mov v0.b[6], w9
; CHECK-NEXT: mov v0.b[7], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%mask = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> %inmask, i64 0)
ret <8 x i1> %mask
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
index 8e807cda7166d..41e4a38fad90b 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
@@ -8,15 +8,15 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i1> @reshuffle_v4i1_nxv4i1(<vscale x 4 x i1> %a) #0 {
; CHECK-LABEL: reshuffle_v4i1_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
+; CHECK-NEXT: mov w8, v1.s[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: mov w9, v1.s[2]
; CHECK-NEXT: mov v0.h[1], w8
-; CHECK-NEXT: mov w8, v1.s[2]
-; CHECK-NEXT: mov v0.h[2], w8
; CHECK-NEXT: mov w8, v1.s[3]
+; CHECK-NEXT: mov v0.h[2], w9
; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%el0 = extractelement <vscale x 4 x i1> %a, i32 0
%el1 = extractelement <vscale x 4 x i1> %a, i32 1
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index 935189dec48ac..74a717f1635a3 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -2835,11 +2835,11 @@ define i32 @test_widening_instr_mull(ptr %p1, ptr %p2, i32 %h) {
; CHECK-BE-NEXT: .LBB24_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: ld1 { v0.16b }, [x1], #16
-; CHECK-BE-NEXT: mov x8, x0
+; CHECK-BE-NEXT: add x8, x0, #16
; CHECK-BE-NEXT: ld1 { v1.8h }, [x0]
-; CHECK-BE-NEXT: add x0, x0, #16
-; CHECK-BE-NEXT: add x9, x8, #48
-; CHECK-BE-NEXT: ld1 { v3.8h }, [x0]
+; CHECK-BE-NEXT: ld1 { v3.8h }, [x8]
+; CHECK-BE-NEXT: add x9, x0, #48
+; CHECK-BE-NEXT: add x10, x0, #32
; CHECK-BE-NEXT: subs w2, w2, #1
; CHECK-BE-NEXT: ushll v2.8h, v0.8b, #0
; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0
@@ -2847,11 +2847,11 @@ define i32 @test_widening_instr_mull(ptr %p1, ptr %p2, i32 %h) {
; CHECK-BE-NEXT: umull2 v5.4s, v3.8h, v0.8h
; CHECK-BE-NEXT: umull v0.4s, v3.4h, v0.4h
; CHECK-BE-NEXT: umull2 v1.4s, v1.8h, v2.8h
-; CHECK-BE-NEXT: st1 { v4.4s }, [x8]
-; CHECK-BE-NEXT: add x8, x8, #32
+; CHECK-BE-NEXT: st1 { v4.4s }, [x0]
+; CHECK-BE-NEXT: mov x0, x8
; CHECK-BE-NEXT: st1 { v5.4s }, [x9]
-; CHECK-BE-NEXT: st1 { v0.4s }, [x8]
-; CHECK-BE-NEXT: st1 { v1.4s }, [x0]
+; CHECK-BE-NEXT: st1 { v0.4s }, [x10]
+; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
; CHECK-BE-NEXT: b.ne .LBB24_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: mov w0, wzr
@@ -2950,26 +2950,26 @@ define i32 @test_widening_instr_mull_64(ptr %p1, ptr %p2, i32 %h) {
; CHECK-BE-NEXT: .LBB25_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: ld1 { v4.16b }, [x0]
-; CHECK-BE-NEXT: add x10, x1, #48
+; CHECK-BE-NEXT: add x9, x1, #48
+; CHECK-BE-NEXT: add x8, x1, #32
+; CHECK-BE-NEXT: ld1 { v18.4s }, [x9]
; CHECK-BE-NEXT: ld1 { v16.4s }, [x1]
-; CHECK-BE-NEXT: add x9, x1, #32
-; CHECK-BE-NEXT: ld1 { v18.4s }, [x10]
; CHECK-BE-NEXT: add x1, x1, #16
-; CHECK-BE-NEXT: ld1 { v20.4s }, [x9]
+; CHECK-BE-NEXT: ld1 { v20.4s }, [x8]
; CHECK-BE-NEXT: ld1 { v22.4s }, [x1]
-; CHECK-BE-NEXT: add x9, x0, #96
+; CHECK-BE-NEXT: add x8, x0, #96
; CHECK-BE-NEXT: tbl v5.16b, { v4.16b }, v3.16b
; CHECK-BE-NEXT: tbl v6.16b, { v4.16b }, v2.16b
; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v1.16b
; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v0.16b
; CHECK-BE-NEXT: ext v24.16b, v18.16b, v18.16b, #8
-; CHECK-BE-NEXT: mov x8, x0
+; CHECK-BE-NEXT: add x9, x0, #32
; CHECK-BE-NEXT: ext v25.16b, v20.16b, v20.16b, #8
-; CHECK-BE-NEXT: add x10, x0, #32
+; CHECK-BE-NEXT: add x10, x0, #16
; CHECK-BE-NEXT: subs w2, w2, #1
; CHECK-BE-NEXT: ext v17.16b, v5.16b, v5.16b, #8
-; CHECK-BE-NEXT: rev32 v5.8b, v5.8b
; CHECK-BE-NEXT: ext v19.16b, v6.16b, v6.16b, #8
+; CHECK-BE-NEXT: rev32 v5.8b, v5.8b
; CHECK-BE-NEXT: rev32 v21.8b, v7.8b
; CHECK-BE-NEXT: rev32 v23.8b, v4.8b
; CHECK-BE-NEXT: ext v7.16b, v7.16b, v7.16b, #8
@@ -2986,22 +2986,22 @@ define i32 @test_widening_instr_mull_64(ptr %p1, ptr %p2, i32 %h) {
; CHECK-BE-NEXT: rev32 v4.8b, v4.8b
; CHECK-BE-NEXT: umull v17.2d, v17.2s, v24.2s
; CHECK-BE-NEXT: umull v19.2d, v19.2s, v25.2s
-; CHECK-BE-NEXT: st1 { v5.2d }, [x9]
+; CHECK-BE-NEXT: st1 { v5.2d }, [x8]
; CHECK-BE-NEXT: umull v5.2d, v6.2s, v20.2s
; CHECK-BE-NEXT: umull v6.2d, v7.2s, v21.2s
-; CHECK-BE-NEXT: add x9, x0, #112
+; ...
[truncated]
|
boomanaiden154
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM assuming premerge passes. If you land this later, might be a good idea to rerun tests to make sure nothing has changed.
Reland d5f3ab8, fix testcases.