diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 7ef7a89b5749f..9912190e1bced 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -149,6 +149,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } bool enablePostRAScheduler() const override { return usePostRAScheduler(); } + bool enableSubRegLiveness() const override { return true; } bool enableMachinePipeliner() const override; bool useDFAforSMS() const override { return false; } diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll index a1712a5ec7a27..444f579f23242 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll @@ -2273,10 +2273,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: casp x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 @@ -2298,10 +2298,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspa x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 @@ -2323,10 +2323,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspl x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 @@ -2348,10 +2348,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 @@ -2373,10 +2373,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 @@ -3406,7 +3406,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3427,7 +3427,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3448,7 +3448,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3469,7 +3469,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3490,7 +3490,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3947,7 +3947,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -3975,7 +3975,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4003,7 +4003,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4031,7 +4031,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4059,7 +4059,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4592,7 +4592,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4620,7 +4620,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4648,7 +4648,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4676,7 +4676,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4704,7 +4704,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5237,7 +5237,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5265,7 +5265,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5293,7 +5293,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5321,7 +5321,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5349,7 +5349,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5877,7 +5877,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5905,7 +5905,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5933,7 +5933,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5961,7 +5961,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5989,7 +5989,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll index ee5fbe39b4492..62af028defde5 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll @@ -1616,7 +1616,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -1637,7 +1637,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -1658,7 +1658,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -1679,7 +1679,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -1700,7 +1700,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -2343,10 +2343,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: casp x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 @@ -2368,10 +2368,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspa x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 @@ -2393,10 +2393,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspl x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 @@ -2418,10 +2418,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 @@ -2443,10 +2443,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 @@ -2996,7 +2996,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3017,7 +3017,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3038,7 +3038,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3059,7 +3059,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3080,7 +3080,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3531,7 +3531,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3552,7 +3552,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3573,7 +3573,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3594,7 +3594,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3615,7 +3615,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -4072,7 +4072,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4100,7 +4100,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4128,7 +4128,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4156,7 +4156,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4184,7 +4184,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4717,7 +4717,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4745,7 +4745,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4773,7 +4773,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4801,7 +4801,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4829,7 +4829,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5362,7 +5362,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5390,7 +5390,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5418,7 +5418,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5446,7 +5446,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5474,7 +5474,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6002,7 +6002,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6030,7 +6030,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6058,7 +6058,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6086,7 +6086,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6114,7 +6114,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll index 83e383f335637..f043f99327308 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll @@ -517,7 +517,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -534,7 +534,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -551,7 +551,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -568,7 +568,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -585,7 +585,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1102,7 +1102,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1119,7 +1119,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1136,7 +1136,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1153,7 +1153,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1170,7 +1170,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -2356,10 +2356,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: casp x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 @@ -2379,10 +2379,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspa x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 @@ -2402,10 +2402,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspl x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 @@ -2425,10 +2425,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 @@ -2448,10 +2448,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 @@ -3479,7 +3479,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3498,7 +3498,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3517,7 +3517,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3536,7 +3536,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3555,7 +3555,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -4004,8 +4004,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4025,8 +4025,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4046,8 +4046,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4067,8 +4067,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4088,8 +4088,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4589,8 +4589,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4610,8 +4610,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4631,8 +4631,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4652,8 +4652,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4673,8 +4673,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5174,8 +5174,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5195,8 +5195,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5216,8 +5216,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5237,8 +5237,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5258,8 +5258,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5759,8 +5759,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5780,8 +5780,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5801,8 +5801,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5822,8 +5822,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5843,8 +5843,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll index 0c3ed9b0f1de0..df7b57e7e18f4 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll @@ -542,7 +542,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -559,7 +559,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -576,7 +576,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -593,7 +593,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -610,7 +610,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1127,7 +1127,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1144,7 +1144,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1161,7 +1161,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1178,7 +1178,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1195,7 +1195,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1674,7 +1674,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1693,7 +1693,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1712,7 +1712,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1731,7 +1731,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1750,7 +1750,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -2406,10 +2406,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: casp x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 @@ -2429,10 +2429,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspa x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 @@ -2452,10 +2452,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspl x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 @@ -2475,10 +2475,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 @@ -2498,10 +2498,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 @@ -3049,7 +3049,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3068,7 +3068,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3087,7 +3087,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3106,7 +3106,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3125,7 +3125,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3574,7 +3574,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3593,7 +3593,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3612,7 +3612,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3631,7 +3631,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3650,7 +3650,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -4099,8 +4099,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4120,8 +4120,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4141,8 +4141,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4162,8 +4162,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4183,8 +4183,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4684,8 +4684,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4705,8 +4705,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4726,8 +4726,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4747,8 +4747,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4768,8 +4768,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5269,8 +5269,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5290,8 +5290,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5311,8 +5311,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5332,8 +5332,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5353,8 +5353,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5854,8 +5854,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5875,8 +5875,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5896,8 +5896,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5917,8 +5917,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5938,8 +5938,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll index 1fe63c9be8c62..80310a11add69 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll @@ -53,10 +53,6 @@ define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) { ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap: ; CHECK-CAS-O1: // %bb.0: -; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspa x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov v0.d[0], x2 ; CHECK-CAS-O1-NEXT: mov v0.d[1], x3 @@ -180,10 +176,6 @@ define void @val_compare_and_swap_monotonic_seqcst(ptr %p, i128 %oldval, i128 %n ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic_seqcst: ; CHECK-CAS-O1: // %bb.0: -; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspal x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov v0.d[0], x2 ; CHECK-CAS-O1-NEXT: mov v0.d[1], x3 @@ -307,10 +299,6 @@ define void @val_compare_and_swap_release_acquire(ptr %p, i128 %oldval, i128 %ne ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_release_acquire: ; CHECK-CAS-O1: // %bb.0: -; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspal x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov v0.d[0], x2 ; CHECK-CAS-O1-NEXT: mov v0.d[1], x3 @@ -434,10 +422,6 @@ define void @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval) ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic: ; CHECK-CAS-O1: // %bb.0: -; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspal x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov v0.d[0], x2 ; CHECK-CAS-O1-NEXT: mov v0.d[1], x3 @@ -658,10 +642,6 @@ define i128 @val_compare_and_swap_return(ptr %p, i128 %oldval, i128 %newval) { ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_return: ; CHECK-CAS-O1: // %bb.0: -; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspa x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov x0, x2 ; CHECK-CAS-O1-NEXT: mov x1, x3 diff --git a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll index 07fbe5d7310f6..7141f53802bff 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll @@ -27,9 +27,8 @@ BB: define void @f_undef_15(<8 x i64> %a, ptr %dst) { ; CHECK-LABEL: f_undef_15: ; CHECK: // %bb.0: // %BB -; CHECK-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8], #32 ; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8] ; CHECK-NEXT: add x8, x0, #64 @@ -46,19 +45,17 @@ BB: define void @f_undef_1(<8 x i64> %a, ptr %dst) { ; CHECK-LABEL: f_undef_1: ; CHECK: // %bb.0: // %BB -; CHECK-NEXT: mov v16.16b, v0.16b -; CHECK-NEXT: mov v5.16b, v2.16b -; CHECK-NEXT: // kill: def $q1 killed $q1 def $q1_q2 -; CHECK-NEXT: // kill: def $q3 killed $q3 def $q3_q4 +; CHECK-NEXT: mov v4.16b, v2.16b +; CHECK-NEXT: mov v5.16b, v0.16b ; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: mov v6.16b, v0.16b ; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: mov v4.16b, v3.16b -; CHECK-NEXT: mov v17.16b, v16.16b -; CHECK-NEXT: mov v6.16b, v5.16b -; CHECK-NEXT: st2 { v16.2d, v17.2d }, [x8], #32 +; CHECK-NEXT: st2 { v5.2d, v6.2d }, [x8], #32 +; CHECK-NEXT: mov v5.16b, v4.16b ; CHECK-NEXT: st2 { v1.2d, v2.2d }, [x8] ; CHECK-NEXT: add x8, x0, #64 -; CHECK-NEXT: st2 { v5.2d, v6.2d }, [x8] +; CHECK-NEXT: st2 { v4.2d, v5.2d }, [x8] +; CHECK-NEXT: mov v4.16b, v3.16b ; CHECK-NEXT: add x8, x0, #96 ; CHECK-NEXT: st2 { v3.2d, v4.2d }, [x8] ; CHECK-NEXT: ret @@ -73,11 +70,10 @@ define void @noundefs(<8 x i32> %a, <8 x i32> %b, ptr %dst) { ; CHECK-LABEL: noundefs: ; CHECK: // %bb.0: // %BB ; CHECK-NEXT: mov v5.16b, v2.16b -; CHECK-NEXT: // kill: def $q3 killed $q3 def $q2_q3 ; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: mov v2.16b, v3.16b ; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x0], #32 -; CHECK-NEXT: st2 { v2.4s, v3.4s }, [x0] +; CHECK-NEXT: st2 { v1.4s, v2.4s }, [x0] ; CHECK-NEXT: ret BB: %S = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> @@ -89,11 +85,10 @@ define void @undefs(<8 x i32> %a, <8 x i32> %b, ptr %dst) { ; CHECK-LABEL: undefs: ; CHECK: // %bb.0: // %BB ; CHECK-NEXT: mov v5.16b, v2.16b -; CHECK-NEXT: // kill: def $q3 killed $q3 def $q2_q3 ; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: mov v2.16b, v3.16b ; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x0], #32 -; CHECK-NEXT: st2 { v2.4s, v3.4s }, [x0] +; CHECK-NEXT: st2 { v1.4s, v2.4s }, [x0] ; CHECK-NEXT: ret BB: %S = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll index 3c8aca5145261..f0fcafa5302e6 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll @@ -146,11 +146,11 @@ define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) { ; CHECK-LABEL: insert_vec_v6i64_uaddlv_from_v4i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v2, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d1, v0 -; CHECK-NEXT: str d2, [x0, #16] ; CHECK-NEXT: mov.d v0[0], v1[0] +; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: ucvtf.2d v0, v0 +; CHECK-NEXT: str d1, [x0, #16] ; CHECK-NEXT: fcvtn v0.2s, v0.2d ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret @@ -210,9 +210,9 @@ define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) { ; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: uaddlv.8h s0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret entry: @@ -232,10 +232,10 @@ define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) { ; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: uaddlv.8h s0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: st1.s { v1 }[2], [x8] -; CHECK-NEXT: str d1, [x0] +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: st1.s { v0 }[2], [x8] +; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret entry: @@ -278,9 +278,9 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) { ; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: mov.h v2[0], v1[0] ; CHECK-NEXT: bic.4h v2, #255, lsl #8 -; CHECK-NEXT: ushll.4s v2, v2, #0 -; CHECK-NEXT: ucvtf.4s v2, v2 -; CHECK-NEXT: stp q2, q0, [x0] +; CHECK-NEXT: ushll.4s v1, v2, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll b/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll index 7f20b5e5ee4df..75a96be9b435e 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll @@ -24,8 +24,6 @@ declare i128 @llvm.read_volatile_register.i128(metadata) #1 define void @test_wsr128(i128 noundef %v) #0 { ; CHECK-LE-LABEL: test_wsr128: ; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 -; CHECK-LE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 ; CHECK-LE-NEXT: msrr S1_2_C3_C4_5, x0, x1 ; CHECK-LE-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll index 37c61d0a4a0fb..4a84c673af8cf 100644 --- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -43,10 +43,6 @@ define i128 @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) { ; ; LSE-LABEL: val_compare_and_swap: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: caspa x2, x3, x4, x5, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -94,10 +90,6 @@ define i128 @val_compare_and_swap_seqcst(ptr %p, i128 %oldval, i128 %newval) { ; ; LSE-LABEL: val_compare_and_swap_seqcst: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: caspal x2, x3, x4, x5, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -145,10 +137,6 @@ define i128 @val_compare_and_swap_release(ptr %p, i128 %oldval, i128 %newval) { ; ; LSE-LABEL: val_compare_and_swap_release: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: caspl x2, x3, x4, x5, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -196,10 +184,6 @@ define i128 @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval) ; ; LSE-LABEL: val_compare_and_swap_monotonic: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: casp x2, x3, x4, x5, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -251,7 +235,7 @@ define void @fetch_and_nand(ptr %p, i128 %bits) { ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 -; LSE-NEXT: and x8, x7, x3 +; LSE-NEXT: and x8, x5, x3 ; LSE-NEXT: and x9, x4, x2 ; LSE-NEXT: mvn x10, x9 ; LSE-NEXT: mvn x11, x8 @@ -311,7 +295,7 @@ define void @fetch_and_or(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: orr x8, x4, x2 -; LSE-NEXT: orr x9, x7, x3 +; LSE-NEXT: orr x9, x5, x3 ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 ; LSE-NEXT: caspal x4, x5, x8, x9, [x0] @@ -368,7 +352,7 @@ define void @fetch_and_add(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: adds x8, x4, x2 -; LSE-NEXT: adc x9, x7, x3 +; LSE-NEXT: adc x9, x5, x3 ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 ; LSE-NEXT: caspal x4, x5, x8, x9, [x0] @@ -424,7 +408,7 @@ define void @fetch_and_sub(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: subs x8, x4, x2 -; LSE-NEXT: sbc x9, x7, x3 +; LSE-NEXT: sbc x9, x5, x3 ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 ; LSE-NEXT: caspal x4, x5, x8, x9, [x0] @@ -484,8 +468,8 @@ define void @fetch_and_min(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: cmp x2, x4 -; LSE-NEXT: sbcs xzr, x3, x7 -; LSE-NEXT: csel x9, x7, x3, ge +; LSE-NEXT: sbcs xzr, x3, x5 +; LSE-NEXT: csel x9, x5, x3, ge ; LSE-NEXT: csel x8, x4, x2, ge ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 @@ -546,8 +530,8 @@ define void @fetch_and_max(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: cmp x2, x4 -; LSE-NEXT: sbcs xzr, x3, x7 -; LSE-NEXT: csel x9, x7, x3, lt +; LSE-NEXT: sbcs xzr, x3, x5 +; LSE-NEXT: csel x9, x5, x3, lt ; LSE-NEXT: csel x8, x4, x2, lt ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 @@ -608,8 +592,8 @@ define void @fetch_and_umin(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: cmp x2, x4 -; LSE-NEXT: sbcs xzr, x3, x7 -; LSE-NEXT: csel x9, x7, x3, hs +; LSE-NEXT: sbcs xzr, x3, x5 +; LSE-NEXT: csel x9, x5, x3, hs ; LSE-NEXT: csel x8, x4, x2, hs ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 @@ -670,8 +654,8 @@ define void @fetch_and_umax(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: cmp x2, x4 -; LSE-NEXT: sbcs xzr, x3, x7 -; LSE-NEXT: csel x9, x7, x3, lo +; LSE-NEXT: sbcs xzr, x3, x5 +; LSE-NEXT: csel x9, x5, x3, lo ; LSE-NEXT: csel x8, x4, x2, lo ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 @@ -714,8 +698,8 @@ define i128 @atomic_load_seq_cst(ptr %p) { ; ; LSE-LABEL: atomic_load_seq_cst: ; LSE: // %bb.0: -; LSE-NEXT: mov x2, #0 -; LSE-NEXT: mov x3, #0 +; LSE-NEXT: mov x2, #0 // =0x0 +; LSE-NEXT: mov x3, #0 // =0x0 ; LSE-NEXT: caspal x2, x3, x2, x3, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -747,8 +731,8 @@ define i128 @atomic_load_relaxed(i64, i64, ptr %p) { ; ; LSE-LABEL: atomic_load_relaxed: ; LSE: // %bb.0: -; LSE-NEXT: mov x0, #0 -; LSE-NEXT: mov x1, #0 +; LSE-NEXT: mov x0, #0 // =0x0 +; LSE-NEXT: mov x1, #0 // =0x0 ; LSE-NEXT: casp x0, x1, x0, x1, [x2] ; LSE-NEXT: ret %r = load atomic i128, ptr %p monotonic, align 16 @@ -779,9 +763,7 @@ define void @atomic_store_seq_cst(i128 %in, ptr %p) { ; ; LSE-LABEL: atomic_store_seq_cst: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 ; LSE-NEXT: ldp x4, x5, [x2] -; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 ; LSE-NEXT: .LBB14_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x6, x4 @@ -821,9 +803,7 @@ define void @atomic_store_release(i128 %in, ptr %p) { ; ; LSE-LABEL: atomic_store_release: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 ; LSE-NEXT: ldp x4, x5, [x2] -; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 ; LSE-NEXT: .LBB15_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x6, x4 @@ -863,9 +843,7 @@ define void @atomic_store_relaxed(i128 %in, ptr %p) { ; ; LSE-LABEL: atomic_store_relaxed: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 ; LSE-NEXT: ldp x4, x5, [x2] -; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 ; LSE-NEXT: .LBB16_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x6, x4 @@ -921,10 +899,6 @@ define void @cmpxchg_dead(ptr %ptr, i128 %desired, i128 %new) { ; ; LSE-LABEL: cmpxchg_dead: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: casp x2, x3, x4, x5, [x0] ; LSE-NEXT: ret cmpxchg ptr %ptr, i128 %desired, i128 %new monotonic monotonic diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll index 2bf5419e54830..979a8b16f4217 100644 --- a/llvm/test/CodeGen/AArch64/arm64-dup.ll +++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll @@ -463,9 +463,7 @@ define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) n ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI35_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -482,9 +480,7 @@ define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float> ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI36_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %r = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -504,14 +500,13 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) { ; CHECK-GI-LABEL: disguised_dup: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI37_1 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_1] +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI37_1] ; CHECK-GI-NEXT: adrp x8, .LCPI37_0 -; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0] -; CHECK-GI-NEXT: tbl.16b v2, { v0, v1 }, v2 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v1 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI37_0] +; CHECK-GI-NEXT: tbl.16b v1, { v0, v1 }, v1 ; CHECK-GI-NEXT: str q0, [x0] -; CHECK-GI-NEXT: str q2, [x1] +; CHECK-GI-NEXT: str q1, [x1] ; CHECK-GI-NEXT: ret %shuf = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %dup = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 628fb550a0532..fc469a3169deb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -5490,18 +5490,14 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], #2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A) @@ -5513,18 +5509,14 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A) @@ -5539,18 +5531,14 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8>, define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], #2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A) @@ -5562,18 +5550,14 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A) @@ -5588,18 +5572,14 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8>, <8 x define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], #4 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #4 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #4 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A) @@ -5611,19 +5591,15 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A) @@ -5638,18 +5614,14 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16>, define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], #4 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #4 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #4 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A) @@ -5661,19 +5633,15 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A) @@ -5688,18 +5656,14 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16>, define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #8 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #8 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A) @@ -5711,19 +5675,15 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A) @@ -5738,18 +5698,14 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>, define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #8 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #8 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A) @@ -5761,19 +5717,15 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A) @@ -5788,18 +5740,14 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32>, define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A) @@ -5811,19 +5759,15 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A) @@ -5838,18 +5782,14 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64>, define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A) @@ -5861,19 +5801,15 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A) @@ -5888,18 +5824,14 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64>, define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #8 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #8 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A) @@ -5911,19 +5843,15 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %pt define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A) @@ -5938,18 +5866,14 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x fl define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #8 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #8 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A) @@ -5961,19 +5885,15 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %pt define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A) @@ -5988,18 +5908,14 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x fl define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A) @@ -6011,19 +5927,15 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr % define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A) @@ -6038,18 +5950,14 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A) @@ -6061,19 +5969,15 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr % define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A) @@ -6088,20 +5992,14 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A) @@ -6113,20 +6011,14 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A) @@ -6141,20 +6033,14 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0( define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A) @@ -6166,20 +6052,14 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A) @@ -6194,20 +6074,14 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #6 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #6 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A) @@ -6219,21 +6093,15 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A) @@ -6248,20 +6116,14 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0( define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #6 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #6 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A) @@ -6273,21 +6135,15 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A) @@ -6302,20 +6158,14 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0( define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #12 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #12 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A) @@ -6327,21 +6177,15 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A) @@ -6356,20 +6200,14 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0( define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #12 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #12 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A) @@ -6381,21 +6219,15 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A) @@ -6410,20 +6242,14 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0( define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #24 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #24 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A) @@ -6435,21 +6261,15 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A) @@ -6464,20 +6284,14 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0( define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #24 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A) @@ -6489,21 +6303,15 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A) @@ -6518,20 +6326,14 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0( define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #12 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #12 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A) @@ -6543,21 +6345,15 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(pt define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A) @@ -6572,20 +6368,14 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #12 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #12 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A) @@ -6597,21 +6387,15 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(pt define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A) @@ -6626,20 +6410,14 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #24 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #24 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A) @@ -6651,21 +6429,15 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A) @@ -6680,20 +6452,14 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane. define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #24 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A) @@ -6705,21 +6471,15 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A) @@ -6734,22 +6494,14 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane. define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #4 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #4 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A) @@ -6761,22 +6513,14 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4la define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A) @@ -6791,22 +6535,14 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lan define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #4 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #4 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A) @@ -6818,22 +6554,14 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(pt define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A) @@ -6848,22 +6576,14 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #8 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #8 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A) @@ -6875,23 +6595,15 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4la define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A) @@ -6906,22 +6618,14 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lan define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #8 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #8 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A) @@ -6933,23 +6637,15 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4la define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A) @@ -6964,22 +6660,14 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lan define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A) @@ -6991,23 +6679,15 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4la define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A) @@ -7022,22 +6702,14 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lan define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A) @@ -7049,23 +6721,15 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4la define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A) @@ -7080,22 +6744,14 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lan define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #32 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A) @@ -7107,23 +6763,15 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4la define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A) @@ -7138,22 +6786,14 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lan define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #32 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A) @@ -7165,23 +6805,15 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4la define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A) @@ -7196,22 +6828,14 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lan define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A) @@ -7223,23 +6847,15 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A) @@ -7254,22 +6870,14 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A) @@ -7281,23 +6889,15 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A) @@ -7312,22 +6912,14 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #32 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A) @@ -7339,23 +6931,15 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A) @@ -7370,22 +6954,14 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #32 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A) @@ -7397,23 +6973,15 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A) @@ -7428,17 +6996,13 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64 define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.16b { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.16b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A) @@ -7449,17 +7013,13 @@ define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C define ptr @test_v16i8_post_reg_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.16b { v0, v1 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.16b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A) @@ -7473,17 +7033,13 @@ declare void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.8b { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.8b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A) @@ -7494,17 +7050,13 @@ define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) n define ptr @test_v8i8_post_reg_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.8b { v0, v1 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.8b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A) @@ -7518,17 +7070,13 @@ declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.8h { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.8h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A) @@ -7540,8 +7088,6 @@ define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C ; CHECK-SD-LABEL: test_v8i16_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.8h { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7549,8 +7095,6 @@ define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.8h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A) @@ -7564,17 +7108,13 @@ declare void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) define ptr @test_v4i16_post_imm_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.4h { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.4h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A) @@ -7586,8 +7126,6 @@ define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C ; CHECK-SD-LABEL: test_v4i16_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.4h { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7595,8 +7133,6 @@ define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.4h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A) @@ -7610,17 +7146,13 @@ declare void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) define ptr @test_v4i32_post_imm_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A) @@ -7632,8 +7164,6 @@ define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C ; CHECK-SD-LABEL: test_v4i32_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7641,8 +7171,6 @@ define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A) @@ -7656,17 +7184,13 @@ declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) define ptr @test_v2i32_post_imm_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A) @@ -7678,8 +7202,6 @@ define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C ; CHECK-SD-LABEL: test_v2i32_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7687,8 +7209,6 @@ define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A) @@ -7702,17 +7222,13 @@ declare void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) define ptr @test_v2i64_post_imm_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A) @@ -7724,8 +7240,6 @@ define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C ; CHECK-SD-LABEL: test_v2i64_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7733,8 +7247,6 @@ define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A) @@ -7748,17 +7260,13 @@ declare void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) define ptr @test_v1i64_post_imm_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A) @@ -7770,8 +7278,6 @@ define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C ; CHECK-SD-LABEL: test_v1i64_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7779,8 +7285,6 @@ define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A) @@ -7794,17 +7298,13 @@ declare void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) define ptr @test_v4f32_post_imm_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A) @@ -7816,8 +7316,6 @@ define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float ; CHECK-SD-LABEL: test_v4f32_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7825,8 +7323,6 @@ define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A) @@ -7840,17 +7336,13 @@ declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr) define ptr @test_v2f32_post_imm_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A) @@ -7862,8 +7354,6 @@ define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float ; CHECK-SD-LABEL: test_v2f32_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7871,8 +7361,6 @@ define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A) @@ -7886,17 +7374,13 @@ declare void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float>, <2 x float>, ptr) define ptr @test_v2f64_post_imm_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A) @@ -7908,8 +7392,6 @@ define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub ; CHECK-SD-LABEL: test_v2f64_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7917,8 +7399,6 @@ define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A) @@ -7932,17 +7412,13 @@ declare void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double>, <2 x double>, ptr) define ptr @test_v1f64_post_imm_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A) @@ -7954,8 +7430,6 @@ define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub ; CHECK-SD-LABEL: test_v1f64_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7963,8 +7437,6 @@ define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A) @@ -7978,19 +7450,13 @@ declare void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double>, <1 x double>, ptr) define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.16b { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.16b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A) @@ -8001,19 +7467,13 @@ define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C define ptr @test_v16i8_post_reg_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.16b { v0, v1, v2 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.16b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A) @@ -8027,19 +7487,13 @@ declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, pt define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.8b { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.8b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A) @@ -8050,19 +7504,13 @@ define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, < define ptr @test_v8i8_post_reg_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.8b { v0, v1, v2 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.8b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A) @@ -8076,19 +7524,13 @@ declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.8h { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.8h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A) @@ -8099,20 +7541,14 @@ define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C define ptr @test_v8i16_post_reg_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.8h { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.8h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A) @@ -8126,19 +7562,13 @@ declare void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, pt define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.4h { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.4h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A) @@ -8149,20 +7579,14 @@ define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C define ptr @test_v4i16_post_reg_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.4h { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.4h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A) @@ -8176,19 +7600,13 @@ declare void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, pt define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A) @@ -8199,20 +7617,14 @@ define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C define ptr @test_v4i32_post_reg_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A) @@ -8226,19 +7638,13 @@ declare void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, pt define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A) @@ -8249,20 +7655,14 @@ define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C define ptr @test_v2i32_post_reg_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A) @@ -8276,19 +7676,13 @@ declare void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, pt define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A) @@ -8299,20 +7693,14 @@ define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C define ptr @test_v2i64_post_reg_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A) @@ -8326,19 +7714,13 @@ declare void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, pt define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A) @@ -8349,20 +7731,14 @@ define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C define ptr @test_v1i64_post_reg_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A) @@ -8376,19 +7752,13 @@ declare void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, pt define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A) @@ -8399,20 +7769,14 @@ define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float define ptr @test_v4f32_post_reg_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A) @@ -8426,19 +7790,13 @@ declare void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float>, <4 x float>, <4 x floa define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A) @@ -8449,20 +7807,14 @@ define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float define ptr @test_v2f32_post_reg_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A) @@ -8476,19 +7828,13 @@ declare void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float>, <2 x float>, <2 x floa define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A) @@ -8499,20 +7845,14 @@ define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub define ptr @test_v2f64_post_reg_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A) @@ -8526,19 +7866,13 @@ declare void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double>, <2 x double>, <2 x do define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A) @@ -8549,20 +7883,14 @@ define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub define ptr @test_v1f64_post_reg_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A) @@ -8576,21 +7904,13 @@ declare void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double>, <1 x double>, <1 x do define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.16b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A) @@ -8601,21 +7921,13 @@ define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C define ptr @test_v16i8_post_reg_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.16b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A) @@ -8629,21 +7941,13 @@ declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <1 define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.8b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A) @@ -8654,21 +7958,13 @@ define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, < define ptr @test_v8i8_post_reg_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.8b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A) @@ -8682,21 +7978,13 @@ declare void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.8h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A) @@ -8707,22 +7995,14 @@ define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C define ptr @test_v8i16_post_reg_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.8h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A) @@ -8736,21 +8016,13 @@ declare void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.4h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A) @@ -8761,22 +8033,14 @@ define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C define ptr @test_v4i16_post_reg_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.4h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A) @@ -8790,21 +8054,13 @@ declare void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<4 define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A) @@ -8815,22 +8071,14 @@ define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C define ptr @test_v4i32_post_reg_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A) @@ -8844,21 +8092,13 @@ declare void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<4 define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A) @@ -8869,22 +8109,14 @@ define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C define ptr @test_v2i32_post_reg_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A) @@ -8898,21 +8130,13 @@ declare void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A) @@ -8923,22 +8147,14 @@ define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C define ptr @test_v2i64_post_reg_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A) @@ -8952,21 +8168,13 @@ declare void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<2 define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A) @@ -8977,22 +8185,14 @@ define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C define ptr @test_v1i64_post_reg_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A) @@ -9006,21 +8206,13 @@ declare void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<1 define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A) @@ -9031,22 +8223,14 @@ define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float define ptr @test_v4f32_post_reg_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A) @@ -9060,21 +8244,13 @@ declare void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float>, <4 x float>, <4 x floa define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A) @@ -9085,22 +8261,14 @@ define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float define ptr @test_v2f32_post_reg_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A) @@ -9114,21 +8282,13 @@ declare void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float>, <2 x float>, <2 x floa define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A) @@ -9139,22 +8299,14 @@ define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub define ptr @test_v2f64_post_reg_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A) @@ -9168,21 +8320,13 @@ declare void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double>, <2 x double>, <2 x do define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A) @@ -9193,22 +8337,14 @@ define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub define ptr @test_v1f64_post_reg_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A) @@ -9222,17 +8358,13 @@ declare void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double>, <1 x double>, <1 x do define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.16b { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.16b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A) @@ -9243,17 +8375,13 @@ define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> define ptr @test_v16i8_post_reg_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.16b { v0, v1 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.16b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A) @@ -9267,17 +8395,13 @@ declare void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.8b { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.8b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A) @@ -9288,17 +8412,13 @@ define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) define ptr @test_v8i8_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.8b { v0, v1 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.8b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A) @@ -9312,17 +8432,13 @@ declare void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.8h { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.8h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A) @@ -9334,8 +8450,6 @@ define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.8h { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9343,8 +8457,6 @@ define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.8h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A) @@ -9358,17 +8470,13 @@ declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) define ptr @test_v4i16_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.4h { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.4h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A) @@ -9380,8 +8488,6 @@ define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.4h { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9389,8 +8495,6 @@ define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.4h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A) @@ -9404,17 +8508,13 @@ declare void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) define ptr @test_v4i32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A) @@ -9426,8 +8526,6 @@ define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9435,8 +8533,6 @@ define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A) @@ -9450,17 +8546,13 @@ declare void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) define ptr @test_v2i32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A) @@ -9472,8 +8564,6 @@ define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9481,8 +8571,6 @@ define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A) @@ -9496,17 +8584,13 @@ declare void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) define ptr @test_v2i64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A) @@ -9518,8 +8602,6 @@ define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9527,8 +8609,6 @@ define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A) @@ -9542,17 +8622,13 @@ declare void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) define ptr @test_v1i64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A) @@ -9564,8 +8640,6 @@ define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9573,8 +8647,6 @@ define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A) @@ -9588,17 +8660,13 @@ declare void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) define ptr @test_v4f32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A) @@ -9610,8 +8678,6 @@ define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9619,8 +8685,6 @@ define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A) @@ -9634,17 +8698,13 @@ declare void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float>, <4 x float>, ptr) define ptr @test_v2f32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A) @@ -9656,8 +8716,6 @@ define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9665,8 +8723,6 @@ define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A) @@ -9680,17 +8736,13 @@ declare void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float>, <2 x float>, ptr) define ptr @test_v2f64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A) @@ -9702,8 +8754,6 @@ define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9711,8 +8761,6 @@ define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A) @@ -9726,17 +8774,13 @@ declare void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double>, <2 x double>, ptr) define ptr @test_v1f64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A) @@ -9748,8 +8792,6 @@ define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9757,8 +8799,6 @@ define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A) @@ -9772,19 +8812,13 @@ declare void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double>, <1 x double>, ptr) define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.16b { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.16b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A) @@ -9795,19 +8829,13 @@ define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> define ptr @test_v16i8_post_reg_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.16b { v0, v1, v2 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.16b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A) @@ -9821,19 +8849,13 @@ declare void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.8b { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.8b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A) @@ -9844,19 +8866,13 @@ define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, define ptr @test_v8i8_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.8b { v0, v1, v2 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.8b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A) @@ -9870,19 +8886,13 @@ declare void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.8h { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.8h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A) @@ -9893,20 +8903,14 @@ define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> define ptr @test_v8i16_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.8h { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.8h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A) @@ -9920,19 +8924,13 @@ declare void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.4h { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.4h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A) @@ -9943,20 +8941,14 @@ define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> define ptr @test_v4i16_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.4h { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.4h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A) @@ -9970,19 +8962,13 @@ declare void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A) @@ -9993,20 +8979,14 @@ define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> define ptr @test_v4i32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A) @@ -10020,19 +9000,13 @@ declare void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A) @@ -10043,20 +9017,14 @@ define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> define ptr @test_v2i32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A) @@ -10070,19 +9038,13 @@ declare void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A) @@ -10093,20 +9055,14 @@ define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> define ptr @test_v2i64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A) @@ -10120,19 +9076,13 @@ declare void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A) @@ -10143,20 +9093,14 @@ define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> define ptr @test_v1i64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A) @@ -10170,19 +9114,13 @@ declare void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A) @@ -10193,20 +9131,14 @@ define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo define ptr @test_v4f32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A) @@ -10220,19 +9152,13 @@ declare void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float>, <4 x float>, <4 x fl define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A) @@ -10243,20 +9169,14 @@ define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo define ptr @test_v2f32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A) @@ -10270,19 +9190,13 @@ declare void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float>, <2 x float>, <2 x fl define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A) @@ -10293,20 +9207,14 @@ define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x do define ptr @test_v2f64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A) @@ -10320,19 +9228,13 @@ declare void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double>, <2 x double>, <2 x define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A) @@ -10343,20 +9245,14 @@ define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x do define ptr @test_v1f64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A) @@ -10370,21 +9266,13 @@ declare void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double>, <1 x double>, <1 x define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.16b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A) @@ -10395,21 +9283,13 @@ define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> define ptr @test_v16i8_post_reg_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.16b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A) @@ -10423,21 +9303,13 @@ declare void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.8b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A) @@ -10448,21 +9320,13 @@ define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, define ptr @test_v8i8_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.8b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A) @@ -10476,21 +9340,13 @@ declare void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.8h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A) @@ -10501,22 +9357,14 @@ define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> define ptr @test_v8i16_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.8h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A) @@ -10530,21 +9378,13 @@ declare void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.4h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A) @@ -10555,22 +9395,14 @@ define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> define ptr @test_v4i16_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.4h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A) @@ -10584,21 +9416,13 @@ declare void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,< define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A) @@ -10609,22 +9433,14 @@ define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> define ptr @test_v4i32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A) @@ -10638,21 +9454,13 @@ declare void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,< define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A) @@ -10663,22 +9471,14 @@ define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> define ptr @test_v2i32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A) @@ -10692,21 +9492,13 @@ declare void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A) @@ -10717,22 +9509,14 @@ define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> define ptr @test_v2i64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A) @@ -10746,21 +9530,13 @@ declare void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,< define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A) @@ -10771,22 +9547,14 @@ define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> define ptr @test_v1i64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A) @@ -10800,21 +9568,13 @@ declare void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,< define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A) @@ -10825,22 +9585,14 @@ define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo define ptr @test_v4f32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A) @@ -10854,21 +9606,13 @@ declare void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float>, <4 x float>, <4 x fl define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A) @@ -10879,22 +9623,14 @@ define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo define ptr @test_v2f32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A) @@ -10908,21 +9644,13 @@ declare void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float>, <2 x float>, <2 x fl define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A) @@ -10933,22 +9661,14 @@ define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x do define ptr @test_v2f64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A) @@ -10962,21 +9682,13 @@ declare void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double>, <2 x double>, <2 x define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A) @@ -10987,22 +9699,14 @@ define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x do define ptr @test_v1f64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A) @@ -11015,17 +9719,13 @@ declare void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double>, <1 x double>, <1 x define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], #2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A) @@ -11036,17 +9736,13 @@ define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8 define ptr @test_v16i8_post_reg_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A) @@ -11060,17 +9756,13 @@ declare void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr) define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], #2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A) @@ -11081,17 +9773,13 @@ define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> % define ptr @test_v8i8_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A) @@ -11105,17 +9793,13 @@ declare void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr) define ptr @test_v8i16_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #4 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A) @@ -11127,8 +9811,6 @@ define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 ; CHECK-SD-LABEL: test_v8i16_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11136,8 +9818,6 @@ define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A) @@ -11151,17 +9831,13 @@ declare void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr) define ptr @test_v4i16_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #4 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A) @@ -11173,8 +9849,6 @@ define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 ; CHECK-SD-LABEL: test_v4i16_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11182,8 +9856,6 @@ define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A) @@ -11197,17 +9869,13 @@ declare void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16>, <4 x i16>, i64, ptr) define ptr @test_v4i32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #8 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A) @@ -11219,8 +9887,6 @@ define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 ; CHECK-SD-LABEL: test_v4i32_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11228,8 +9894,6 @@ define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A) @@ -11243,17 +9907,13 @@ declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) define ptr @test_v2i32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #8 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A) @@ -11265,8 +9925,6 @@ define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 ; CHECK-SD-LABEL: test_v2i32_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11274,8 +9932,6 @@ define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A) @@ -11289,17 +9945,13 @@ declare void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32>, <2 x i32>, i64, ptr) define ptr @test_v2i64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A) @@ -11311,8 +9963,6 @@ define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 ; CHECK-SD-LABEL: test_v2i64_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11320,8 +9970,6 @@ define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A) @@ -11335,17 +9983,13 @@ declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr) define ptr @test_v1i64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A) @@ -11357,8 +10001,6 @@ define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 ; CHECK-SD-LABEL: test_v1i64_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11366,8 +10008,6 @@ define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A) @@ -11381,17 +10021,13 @@ declare void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64>, <1 x i64>, i64, ptr) define ptr @test_v4f32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #8 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A) @@ -11403,8 +10039,6 @@ define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f ; CHECK-SD-LABEL: test_v4f32_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11412,8 +10046,6 @@ define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A) @@ -11427,17 +10059,13 @@ declare void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float>, <4 x float>, i64, define ptr @test_v2f32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #8 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A) @@ -11449,8 +10077,6 @@ define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f ; CHECK-SD-LABEL: test_v2f32_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11458,8 +10084,6 @@ define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A) @@ -11473,17 +10097,13 @@ declare void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float>, <2 x float>, i64, define ptr @test_v2f64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A) @@ -11495,8 +10115,6 @@ define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x ; CHECK-SD-LABEL: test_v2f64_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11504,8 +10122,6 @@ define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A) @@ -11519,17 +10135,13 @@ declare void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double>, <2 x double>, i64 define ptr @test_v1f64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A) @@ -11541,8 +10153,6 @@ define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x ; CHECK-SD-LABEL: test_v1f64_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11550,8 +10160,6 @@ define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A) @@ -11565,19 +10173,13 @@ declare void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double>, <1 x double>, i64 define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A) @@ -11588,19 +10190,13 @@ define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8 define ptr @test_v16i8_post_reg_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A) @@ -11614,19 +10210,13 @@ declare void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8> define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A) @@ -11637,19 +10227,13 @@ define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> % define ptr @test_v8i8_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A) @@ -11663,19 +10247,13 @@ declare void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, i6 define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #6 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A) @@ -11686,20 +10264,14 @@ define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 define ptr @test_v8i16_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A) @@ -11713,19 +10285,13 @@ declare void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16> define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #6 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A) @@ -11736,20 +10302,14 @@ define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 define ptr @test_v4i16_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A) @@ -11763,19 +10323,13 @@ declare void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16> define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #12 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A) @@ -11786,20 +10340,14 @@ define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 define ptr @test_v4i32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A) @@ -11813,19 +10361,13 @@ declare void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32> define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #12 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A) @@ -11836,20 +10378,14 @@ define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 define ptr @test_v2i32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A) @@ -11863,19 +10399,13 @@ declare void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32> define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A) @@ -11886,20 +10416,14 @@ define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 define ptr @test_v2i64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A) @@ -11913,19 +10437,13 @@ declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64> define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A) @@ -11936,20 +10454,14 @@ define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 define ptr @test_v1i64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A) @@ -11963,19 +10475,13 @@ declare void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64> define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #12 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A) @@ -11986,20 +10492,14 @@ define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f define ptr @test_v4f32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A) @@ -12013,19 +10513,13 @@ declare void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float>, <4 x float>, <4 x define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #12 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A) @@ -12036,20 +10530,14 @@ define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f define ptr @test_v2f32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A) @@ -12063,19 +10551,13 @@ declare void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float>, <2 x float>, <2 x define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A) @@ -12086,20 +10568,14 @@ define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x define ptr @test_v2f64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A) @@ -12113,19 +10589,13 @@ declare void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double>, <2 x double>, <2 define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A) @@ -12136,20 +10606,14 @@ define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x define ptr @test_v1f64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A) @@ -12163,21 +10627,13 @@ declare void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double>, <1 x double>, <1 define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #4 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A) @@ -12188,21 +10644,13 @@ define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8 define ptr @test_v16i8_post_reg_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A) @@ -12216,21 +10664,13 @@ declare void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8> define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #4 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A) @@ -12241,21 +10681,13 @@ define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> % define ptr @test_v8i8_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A) @@ -12269,21 +10701,13 @@ declare void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #8 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A) @@ -12294,22 +10718,14 @@ define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 define ptr @test_v8i16_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A) @@ -12323,21 +10739,13 @@ declare void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16> define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #8 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A) @@ -12348,22 +10756,14 @@ define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 define ptr @test_v4i16_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A) @@ -12377,21 +10777,13 @@ declare void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16> define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A) @@ -12402,22 +10794,14 @@ define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 define ptr @test_v4i32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A) @@ -12431,21 +10815,13 @@ declare void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32> define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A) @@ -12456,22 +10832,14 @@ define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 define ptr @test_v2i32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A) @@ -12485,21 +10853,13 @@ declare void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32> define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A) @@ -12510,22 +10870,14 @@ define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 define ptr @test_v2i64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A) @@ -12539,21 +10891,13 @@ declare void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64> define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A) @@ -12564,22 +10908,14 @@ define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 define ptr @test_v1i64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A) @@ -12593,21 +10929,13 @@ declare void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64> define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A) @@ -12618,22 +10946,14 @@ define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f define ptr @test_v4f32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A) @@ -12647,21 +10967,13 @@ declare void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float>, <4 x float>, <4 x define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A) @@ -12672,22 +10984,14 @@ define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f define ptr @test_v2f32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A) @@ -12701,21 +11005,13 @@ declare void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float>, <2 x float>, <2 x define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A) @@ -12726,22 +11022,14 @@ define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x define ptr @test_v2f64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A) @@ -12755,21 +11043,13 @@ declare void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double>, <2 x double>, <2 define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A) @@ -12780,22 +11060,14 @@ define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x define ptr @test_v1f64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A) diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll index 54b96520dce41..c9d94f945f7af 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll @@ -351,63 +351,30 @@ declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr) nounwi define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld2lane_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ld2.b { v0, v1 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld2lane_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ld2.b { v0, v1 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld2lane_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2.b { v0, v1 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, i64 1, ptr %A) ret %struct.__neon_int8x16x2_t %tmp2 } define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld3lane_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld3lane_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld3lane_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3.b { v0, v1, v2 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, ptr %A) ret %struct.__neon_int8x16x3_t %tmp2 } define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld4lane_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld4lane_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld4lane_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, ptr %A) ret %struct.__neon_int8x16x4_t %tmp2 } @@ -418,63 +385,30 @@ declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld2lane_8h: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ld2.h { v0, v1 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld2lane_8h: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ld2.h { v0, v1 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld2lane_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2.h { v0, v1 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, i64 1, ptr %A) ret %struct.__neon_int16x8x2_t %tmp2 } define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld3lane_8h: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld3lane_8h: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld3lane_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3.h { v0, v1, v2 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, ptr %A) ret %struct.__neon_int16x8x3_t %tmp2 } define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld4lane_8h: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld4lane_8h: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld4lane_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, ptr %A) ret %struct.__neon_int16x8x4_t %tmp2 } @@ -485,63 +419,30 @@ declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld2lane_4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ld2.s { v0, v1 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld2lane_4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ld2.s { v0, v1 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld2lane_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2.s { v0, v1 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, i64 1, ptr %A) ret %struct.__neon_int32x4x2_t %tmp2 } define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld3lane_4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld3lane_4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld3lane_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3.s { v0, v1, v2 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, ptr %A) ret %struct.__neon_int32x4x3_t %tmp2 } define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld4lane_4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld4lane_4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld4lane_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, ptr %A) ret %struct.__neon_int32x4x4_t %tmp2 } @@ -552,63 +453,30 @@ declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld2lane_2d: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ld2.d { v0, v1 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld2lane_2d: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ld2.d { v0, v1 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld2lane_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2.d { v0, v1 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, i64 1, ptr %A) ret %struct.__neon_int64x2x2_t %tmp2 } define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld3lane_2d: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld3lane_2d: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld3lane_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3.d { v0, v1, v2 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, ptr %A) ret %struct.__neon_int64x2x3_t %tmp2 } define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld4lane_2d: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld4lane_2d: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld4lane_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, ptr %A) ret %struct.__neon_int64x2x4_t %tmp2 } diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index 43d5ab5ab54e1..ad4b0f377627d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1780,9 +1780,7 @@ define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { ; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v16i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI126_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI126_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret entry: @@ -1799,11 +1797,9 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v16i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: adrp x8, .LCPI127_0 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov b2, v0.b[1] ; CHECK-GI-NEXT: mov b3, v0.b[2] +; CHECK-GI-NEXT: adrp x8, .LCPI127_0 ; CHECK-GI-NEXT: mov b4, v0.b[3] ; CHECK-GI-NEXT: mov b5, v0.b[4] ; CHECK-GI-NEXT: mov b6, v0.b[5] @@ -2003,9 +1999,7 @@ define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { ; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v8i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI130_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI130_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret entry: @@ -2022,11 +2016,9 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v8i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: adrp x8, .LCPI131_0 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov h2, v0.h[1] ; CHECK-GI-NEXT: mov h3, v0.h[2] +; CHECK-GI-NEXT: adrp x8, .LCPI131_0 ; CHECK-GI-NEXT: mov h4, v0.h[3] ; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI131_0] @@ -2146,9 +2138,7 @@ define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { ; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v4i32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI134_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI134_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret entry: @@ -2165,10 +2155,8 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v4i32_v2i32_v4i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: adrp x8, .LCPI135_0 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov s2, v0.s[1] +; CHECK-GI-NEXT: adrp x8, .LCPI135_0 ; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI135_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll index 6327679756739..d04bac78377bf 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll @@ -7,12 +7,11 @@ define <4 x i32> @copyTuple.QPair(ptr %a, ptr %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v3.4s, #2 ; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff -; CHECK-NEXT: mov v1.16b, v3.16b ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: mov v1.16b, v3.16b ; CHECK-NEXT: ld2 { v0.s, v1.s }[1], [x0] ; CHECK-NEXT: mov v1.16b, v2.16b ; CHECK-NEXT: ld2 { v0.s, v1.s }[1], [x1] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 ; CHECK-NEXT: ret entry: %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> , <4 x i32> , i64 1, ptr %a) @@ -25,17 +24,16 @@ entry: define <4 x i32> @copyTuple.QTriple(ptr %a, ptr %b, <4 x i32> %c) { ; CHECK-LABEL: copyTuple.QTriple: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $q0 killed $q0 def $q31_q0_q1 ; CHECK-NEXT: movi v31.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: mov v2.16b, v0.16b +; CHECK-NEXT: mov v3.16b, v1.16b +; CHECK-NEXT: mov v1.16b, v31.16b +; CHECK-NEXT: ld3 { v1.s, v2.s, v3.s }[1], [x0] ; CHECK-NEXT: mov v2.16b, v31.16b ; CHECK-NEXT: mov v3.16b, v0.16b -; CHECK-NEXT: mov v4.16b, v1.16b -; CHECK-NEXT: ld3 { v2.s, v3.s, v4.s }[1], [x0] -; CHECK-NEXT: mov v3.16b, v31.16b -; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: ld3 { v2.s, v3.s, v4.s }[1], [x1] -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ld3 { v1.s, v2.s, v3.s }[1], [x1] +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> , <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a) @@ -48,20 +46,19 @@ entry: define <4 x i32> @copyTuple.QQuad(ptr %a, ptr %b, <4 x i32> %c) { ; CHECK-LABEL: copyTuple.QQuad: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $q0 killed $q0 def $q31_q0_q1_q2 ; CHECK-NEXT: movi v31.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov v2.16b, v0.16b -; CHECK-NEXT: mov v3.16b, v31.16b +; CHECK-NEXT: mov v4.16b, v2.16b +; CHECK-NEXT: mov v3.16b, v1.16b +; CHECK-NEXT: mov v2.16b, v0.16b +; CHECK-NEXT: mov v1.16b, v31.16b +; CHECK-NEXT: ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x0] +; CHECK-NEXT: mov v2.16b, v31.16b +; CHECK-NEXT: mov v3.16b, v0.16b ; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: mov v5.16b, v1.16b -; CHECK-NEXT: mov v6.16b, v2.16b -; CHECK-NEXT: ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x0] -; CHECK-NEXT: mov v4.16b, v31.16b -; CHECK-NEXT: mov v5.16b, v0.16b -; CHECK-NEXT: mov v6.16b, v0.16b -; CHECK-NEXT: ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x1] -; CHECK-NEXT: mov v0.16b, v3.16b +; CHECK-NEXT: ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x1] +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> , <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a) diff --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll index 44b92e6ccd088..2044a866b830a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll +++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll @@ -21,121 +21,55 @@ define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { } define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { -; CHECK-SD-LABEL: tbl2_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v2 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbl2_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v2 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbl2_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v2 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) ret <8 x i8> %tmp3 } define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { -; CHECK-SD-LABEL: tbl2_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbl2_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbl2_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) ret <16 x i8> %tmp3 } define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { -; CHECK-SD-LABEL: tbl3_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbl3_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbl3_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %tmp3 } define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { -; CHECK-SD-LABEL: tbl3_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbl3_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbl3_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %tmp3 } define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { -; CHECK-SD-LABEL: tbl4_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbl4_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbl4_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) ret <8 x i8> %tmp3 } define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { -; CHECK-SD-LABEL: tbl4_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbl4_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbl4_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %tmp3 } @@ -173,11 +107,7 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI8_0 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0] -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4 ; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4 ; CHECK-SD-NEXT: mov.s v0[1], v1[1] @@ -187,11 +117,7 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI8_1 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr d4, [x8, :lo12:.LCPI8_1] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: adrp x8, .LCPI8_0 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.8b v1, { v2, v3 }, v4 @@ -262,23 +188,15 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8 define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI9_0 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI9_1 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI9_1] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: adrp x8, .LCPI9_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 @@ -331,11 +249,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov s4, w0 ; CHECK-SD-NEXT: mov w8, #32 // =0x20 -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: mov.b v4[1], w0 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: mov.b v4[2], w0 ; CHECK-SD-NEXT: mov.b v4[3], w0 ; CHECK-SD-NEXT: mov.b v4[4], w0 @@ -364,10 +278,6 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s4, w0 ; CHECK-GI-NEXT: mov w8, #255 // =0xff -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: mov.16b v5, v4 ; CHECK-GI-NEXT: mov.b v5[1], v4[0] ; CHECK-GI-NEXT: mov.b v5[2], v4[0] @@ -454,11 +364,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov w8, #1 // =0x1 -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: fmov s4, w8 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: mov.b v4[1], w8 ; CHECK-SD-NEXT: mov.b v4[2], w8 ; CHECK-SD-NEXT: mov.b v4[3], w8 @@ -489,12 +395,8 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: fmov s6, w0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: mov w8, #255 // =0xff -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: mov.16b v5, v4 ; CHECK-GI-NEXT: mov.b v5[1], v4[0] ; CHECK-GI-NEXT: mov.b v5[2], v4[0] @@ -600,11 +502,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi.2d v4, #0xffffffffffffffff ; CHECK-SD-NEXT: adrp x8, .LCPI12_0 -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI12_0] -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5 ; CHECK-SD-NEXT: mov.b v4[0], w0 ; CHECK-SD-NEXT: mov.b v4[1], w0 @@ -623,10 +521,6 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s4, w0 ; CHECK-GI-NEXT: mov w8, #255 // =0xff -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov.16b v5, v4 ; CHECK-GI-NEXT: mov.b v5[1], v4[0] ; CHECK-GI-NEXT: mov.b v5[2], v4[0] @@ -749,10 +643,6 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: dup.16b v4, w0 ; CHECK-SD-NEXT: mov w8, #255 // =0xff -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: mov.b v4[8], w8 ; CHECK-SD-NEXT: mov.b v4[9], w8 ; CHECK-SD-NEXT: mov.b v4[10], w8 @@ -772,12 +662,8 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s4, w0 ; CHECK-GI-NEXT: mov w8, #255 // =0xff -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: fmov s6, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI13_1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov.16b v5, v4 ; CHECK-GI-NEXT: mov.b v5[1], v4[0] ; CHECK-GI-NEXT: mov.b v5[2], v4[0] @@ -879,23 +765,15 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI14_0 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI14_0] -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI14_1] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: adrp x8, .LCPI14_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 @@ -981,24 +859,16 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI15_0 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI15_0] -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI15_2 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI15_2] ; CHECK-GI-NEXT: adrp x8, .LCPI15_1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI15_1] ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 @@ -1085,24 +955,16 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI16_2 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI16_2] ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI16_1] ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 @@ -1144,121 +1006,55 @@ define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { } define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { -; CHECK-SD-LABEL: tbx2_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 -; CHECK-SD-NEXT: tbx.8b v0, { v1, v2 }, v3 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbx2_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 -; CHECK-GI-NEXT: tbx.8b v0, { v1, v2 }, v3 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbx2_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.8b v0, { v1, v2 }, v3 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %tmp3 } define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { -; CHECK-SD-LABEL: tbx2_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 -; CHECK-SD-NEXT: tbx.16b v0, { v1, v2 }, v3 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbx2_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 -; CHECK-GI-NEXT: tbx.16b v0, { v1, v2 }, v3 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbx2_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.16b v0, { v1, v2 }, v3 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %tmp3 } define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { -; CHECK-SD-LABEL: tbx3_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbx3_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbx3_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) ret <8 x i8> %tmp3 } define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { -; CHECK-SD-LABEL: tbx3_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbx3_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbx3_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %tmp3 } define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { -; CHECK-SD-LABEL: tbx4_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbx4_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbx4_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) ret <8 x i8> %tmp3 } define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { -; CHECK-SD-LABEL: tbx4_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbx4_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbx4_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) ret <16 x i8> %tmp3 } diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll index 9955b253f563e..fd862dfcbd693 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zip.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll @@ -359,20 +359,18 @@ define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) { define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) { ; CHECK-SD-LABEL: combine_v8i16_8first: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2 +; CHECK-SD-NEXT: fmov d31, d1 ; CHECK-SD-NEXT: adrp x8, .LCPI25_0 -; CHECK-SD-NEXT: fmov d2, d0 -; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI25_0] -; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3 +; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] +; CHECK-SD-NEXT: tbl.16b v0, { v31, v0 }, v1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_v8i16_8first: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0 +; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: adrp x8, .LCPI25_0 -; CHECK-GI-NEXT: fmov d31, d1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI25_0] -; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2 +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI25_0] +; CHECK-GI-NEXT: tbl.16b v0, { v1, v2 }, v0 ; CHECK-GI-NEXT: ret %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> ret <16 x i8> %3 @@ -383,20 +381,18 @@ define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) { define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) { ; CHECK-SD-LABEL: combine_v8i16_8firstundef: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2 +; CHECK-SD-NEXT: fmov d31, d1 ; CHECK-SD-NEXT: adrp x8, .LCPI26_0 -; CHECK-SD-NEXT: fmov d2, d0 -; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI26_0] -; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3 +; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI26_0] +; CHECK-SD-NEXT: tbl.16b v0, { v31, v0 }, v1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_v8i16_8firstundef: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0 +; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: adrp x8, .LCPI26_0 -; CHECK-GI-NEXT: fmov d31, d1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_0] -; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2 +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI26_0] +; CHECK-GI-NEXT: tbl.16b v0, { v1, v2 }, v0 ; CHECK-GI-NEXT: ret %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> ret <16 x i8> %3 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll index 98033a8e449ff..66f3c5c93fcbf 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE ; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE @@ -103,8 +103,8 @@ define fp128 @test_rmw_xchg_f128(ptr %dst, fp128 %new) { ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 -; LSE-NEXT: mov x5, x7 ; LSE-NEXT: mov x4, x6 +; LSE-NEXT: mov x5, x7 ; LSE-NEXT: caspal x4, x5, x2, x3, [x0] ; LSE-NEXT: cmp x5, x7 ; LSE-NEXT: ccmp x4, x6, #0, eq diff --git a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll index d59de3c56f4ee..f10b7282669ae 100644 --- a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll @@ -224,14 +224,11 @@ define <8 x bfloat> @shuffle3step0_bf16(<32 x bfloat> %src) { ; CHECK-LABEL: shuffle3step0_bf16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: mov v3.16b, v2.16b -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: adrp x8, .LCPI16_1 -; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b +; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_1] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECK-NEXT: ret entry: %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> @@ -242,14 +239,11 @@ define <8 x bfloat> @shuffle3step1_bf16(<32 x bfloat> %src) { ; CHECK-LABEL: shuffle3step1_bf16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: mov v3.16b, v2.16b -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: adrp x8, .LCPI17_1 -; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b +; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_1] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECK-NEXT: ret entry: %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> @@ -260,14 +254,11 @@ define <8 x bfloat> @shuffle3step2_bf16(<32 x bfloat> %src) { ; CHECK-LABEL: shuffle3step2_bf16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: mov v3.16b, v2.16b -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: adrp x8, .LCPI18_1 -; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b +; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_1] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECK-NEXT: ret entry: %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll index 5cfa59a302239..dbbfbea9176f6 100644 --- a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll +++ b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll @@ -78,9 +78,9 @@ entry: define <16 x i8> @test5(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { ; CHECK-LABEL: test5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: ld1r { v1.16b }, [x1] +; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll index 039025dafa0d6..eae724870fb9d 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll @@ -111,8 +111,8 @@ define <4 x float> @multiple_muls_shuffle_external(<4 x float> %a, <4 x float> % ; CHECK-NEXT: fmul v17.2s, v6.2s, v5.2s ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: fmul v5.2s, v4.2s, v5.2s -; CHECK-NEXT: fmla v17.2s, v1.2s, v4.2s ; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #0 +; CHECK-NEXT: fmla v17.2s, v1.2s, v4.2s ; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: fneg v16.2s, v5.2s ; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #90 @@ -162,19 +162,19 @@ define <4 x float> @multiple_muls_shuffle_external_with_loads(ptr %ptr_a, ptr %p ; CHECK-NEXT: ld2 { v0.2s, v1.2s }, [x0] ; CHECK-NEXT: ld2 { v2.2s, v3.2s }, [x1] ; CHECK-NEXT: fmul v4.2s, v3.2s, v1.2s -; CHECK-NEXT: fmul v6.2s, v2.2s, v1.2s +; CHECK-NEXT: fmul v1.2s, v2.2s, v1.2s ; CHECK-NEXT: fneg v4.2s, v4.2s -; CHECK-NEXT: fmla v6.2s, v0.2s, v3.2s +; CHECK-NEXT: fmla v1.2s, v0.2s, v3.2s ; CHECK-NEXT: fmla v4.2s, v0.2s, v2.2s ; CHECK-NEXT: str d4, [x4] ; CHECK-NEXT: ldr q5, [x2] -; CHECK-NEXT: ext v7.16b, v5.16b, v5.16b, #8 -; CHECK-NEXT: zip1 v0.2s, v5.2s, v7.2s -; CHECK-NEXT: zip2 v1.2s, v5.2s, v7.2s -; CHECK-NEXT: fmul v3.2s, v0.2s, v6.2s -; CHECK-NEXT: fmul v6.2s, v1.2s, v6.2s -; CHECK-NEXT: fmla v3.2s, v4.2s, v1.2s -; CHECK-NEXT: fneg v2.2s, v6.2s +; CHECK-NEXT: ext v2.16b, v5.16b, v5.16b, #8 +; CHECK-NEXT: zip1 v0.2s, v5.2s, v2.2s +; CHECK-NEXT: zip2 v2.2s, v5.2s, v2.2s +; CHECK-NEXT: fmul v3.2s, v0.2s, v1.2s +; CHECK-NEXT: fmul v1.2s, v2.2s, v1.2s +; CHECK-NEXT: fmla v3.2s, v4.2s, v2.2s +; CHECK-NEXT: fneg v2.2s, v1.2s ; CHECK-NEXT: fmla v2.2s, v4.2s, v0.2s ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: st2 { v2.2s, v3.2s }, [x5] @@ -241,20 +241,20 @@ define <4 x float> @multiple_muls_mul_external(<4 x float> %a, <4 x float> %b, < ; CHECK-NEXT: zip1 v3.2s, v3.2s, v17.2s ; CHECK-NEXT: fmul v18.2s, v6.2s, v7.2s ; CHECK-NEXT: fmul v5.2s, v19.2s, v16.2s -; CHECK-NEXT: fmul v16.2s, v2.2s, v16.2s ; CHECK-NEXT: fmul v7.2s, v0.2s, v7.2s +; CHECK-NEXT: fmul v16.2s, v2.2s, v16.2s ; CHECK-NEXT: fneg v4.2s, v18.2s ; CHECK-NEXT: fmla v5.2s, v3.2s, v2.2s -; CHECK-NEXT: fneg v2.2s, v16.2s ; CHECK-NEXT: fmla v7.2s, v1.2s, v6.2s +; CHECK-NEXT: fneg v2.2s, v16.2s ; CHECK-NEXT: fmla v4.2s, v1.2s, v0.2s -; CHECK-NEXT: fmla v2.2s, v3.2s, v19.2s ; CHECK-NEXT: fmul v0.2s, v7.2s, v5.2s +; CHECK-NEXT: fmla v2.2s, v3.2s, v19.2s ; CHECK-NEXT: fmul v17.2s, v4.2s, v5.2s ; CHECK-NEXT: str d4, [x0] -; CHECK-NEXT: fmla v17.2s, v2.2s, v7.2s ; CHECK-NEXT: fneg v16.2s, v0.2s ; CHECK-NEXT: zip1 v0.4s, v2.4s, v5.4s +; CHECK-NEXT: fmla v17.2s, v2.2s, v7.2s ; CHECK-NEXT: fmla v16.2s, v2.2s, v4.2s ; CHECK-NEXT: st2 { v16.2s, v17.2s }, [x1] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll index 0481d997d24fa..c8dc092bb05e4 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll @@ -920,10 +920,8 @@ define i32 @extract_v4i32_shuffle(<4 x i32> %a, <4 x i32> %b, i32 %c) { ; CHECK-GI-NEXT: sub sp, sp, #16 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GI-NEXT: adrp x8, .LCPI35_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov x9, sp ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov w8, w0 ; CHECK-GI-NEXT: and x8, x8, #0x3 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b diff --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll index 0a3b9a070c2b3..4253b06e1f1ac 100644 --- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll @@ -39,9 +39,9 @@ define void @fptoui_v8f32_to_v8i8_in_loop(ptr %A, ptr %dst) { ; CHECK-NEXT: add x8, x8, #1 ; CHECK-NEXT: cmp x8, #1000 ; CHECK-NEXT: ldp q2, q1, [x9] -; CHECK-NEXT: fcvtzu.4s v4, v1 -; CHECK-NEXT: fcvtzu.4s v3, v2 -; CHECK-NEXT: tbl.16b v1, { v3, v4 }, v0 +; CHECK-NEXT: fcvtzu.4s v3, v1 +; CHECK-NEXT: fcvtzu.4s v2, v2 +; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v0 ; CHECK-NEXT: str d1, [x1], #16 ; CHECK-NEXT: b.eq LBB0_1 ; CHECK-NEXT: ; %bb.2: ; %exit @@ -252,12 +252,12 @@ define void @fptoui_v16f32_to_v16i8_in_loop(ptr %A, ptr %dst) { ; CHECK-NEXT: add x8, x8, #1 ; CHECK-NEXT: cmp x8, #1000 ; CHECK-NEXT: ldp q2, q1, [x9, #32] -; CHECK-NEXT: fcvtzu.4s v7, v1 +; CHECK-NEXT: fcvtzu.4s v5, v1 ; CHECK-NEXT: ldp q1, q3, [x9] -; CHECK-NEXT: fcvtzu.4s v6, v2 -; CHECK-NEXT: fcvtzu.4s v5, v3 -; CHECK-NEXT: fcvtzu.4s v4, v1 -; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0 +; CHECK-NEXT: fcvtzu.4s v4, v2 +; CHECK-NEXT: fcvtzu.4s v3, v3 +; CHECK-NEXT: fcvtzu.4s v2, v1 +; CHECK-NEXT: tbl.16b v1, { v2, v3, v4, v5 }, v0 ; CHECK-NEXT: str q1, [x1], #32 ; CHECK-NEXT: b.eq LBB4_1 ; CHECK-NEXT: ; %bb.2: ; %exit @@ -316,20 +316,20 @@ define void @fptoui_2x_v16f32_to_v16i8_in_loop(ptr %A, ptr %B, ptr %dst) { ; CHECK-NEXT: ldp q3, q4, [x9, #32] ; CHECK-NEXT: ldp q5, q6, [x10] ; CHECK-NEXT: fcvtzu.4s v19, v1 +; CHECK-NEXT: ldp q7, q1, [x9] +; CHECK-NEXT: fcvtzu.4s v4, v4 ; CHECK-NEXT: fcvtzu.4s v18, v2 -; CHECK-NEXT: ldp q2, q1, [x9] -; CHECK-NEXT: fcvtzu.4s v23, v4 +; CHECK-NEXT: fcvtzu.4s v3, v3 ; CHECK-NEXT: fcvtzu.4s v17, v6 -; CHECK-NEXT: add x9, x2, x8, lsl #5 -; CHECK-NEXT: fcvtzu.4s v22, v3 ; CHECK-NEXT: fcvtzu.4s v16, v5 +; CHECK-NEXT: add x9, x2, x8, lsl #5 +; CHECK-NEXT: fcvtzu.4s v2, v1 +; CHECK-NEXT: fcvtzu.4s v1, v7 ; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: fcvtzu.4s v21, v1 ; CHECK-NEXT: cmp x8, #1000 -; CHECK-NEXT: fcvtzu.4s v20, v2 -; CHECK-NEXT: tbl.16b v1, { v16, v17, v18, v19 }, v0 -; CHECK-NEXT: tbl.16b v2, { v20, v21, v22, v23 }, v0 -; CHECK-NEXT: stp q2, q1, [x9] +; CHECK-NEXT: tbl.16b v5, { v16, v17, v18, v19 }, v0 +; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0 +; CHECK-NEXT: stp q1, q5, [x9] ; CHECK-NEXT: b.eq LBB5_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index 3b8054a635bcd..e38394f2b0533 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -1483,12 +1483,12 @@ define <8 x i16> @fptos_v8f64_v8i16(<8 x double> %a) { ; CHECK-SD-NEXT: adrp x8, .LCPI70_0 ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v6.2s, v3.2d -; CHECK-SD-NEXT: xtn v5.2s, v2.2d -; CHECK-SD-NEXT: xtn v4.2s, v1.2d -; CHECK-SD-NEXT: xtn v3.2s, v0.2d -; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI70_0] -; CHECK-SD-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI70_0] +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v8f64_v8i16: @@ -1514,12 +1514,12 @@ define <8 x i16> @fptou_v8f64_v8i16(<8 x double> %a) { ; CHECK-SD-NEXT: adrp x8, .LCPI71_0 ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v6.2s, v3.2d -; CHECK-SD-NEXT: xtn v5.2s, v2.2d -; CHECK-SD-NEXT: xtn v4.2s, v1.2d -; CHECK-SD-NEXT: xtn v3.2s, v0.2d -; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI71_0] -; CHECK-SD-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI71_0] +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v8f64_v8i16: @@ -1545,21 +1545,21 @@ define <16 x i16> @fptos_v16f64_v16i16(<16 x double> %a) { ; CHECK-SD-NEXT: adrp x8, .LCPI72_0 ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI72_0] ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v19.2s, v3.2d -; CHECK-SD-NEXT: xtn v23.2s, v7.2d -; CHECK-SD-NEXT: xtn v18.2s, v2.2d -; CHECK-SD-NEXT: xtn v22.2s, v6.2d -; CHECK-SD-NEXT: xtn v17.2s, v1.2d -; CHECK-SD-NEXT: xtn v21.2s, v5.2d -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI72_0] -; CHECK-SD-NEXT: xtn v16.2s, v0.2d -; CHECK-SD-NEXT: xtn v20.2s, v4.2d -; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-SD-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v7.2s, v7.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v6.2s, v6.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v5.2s, v5.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: xtn v4.2s, v4.2d +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b +; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v16f64_v16i16: @@ -1592,21 +1592,21 @@ define <16 x i16> @fptou_v16f64_v16i16(<16 x double> %a) { ; CHECK-SD-NEXT: adrp x8, .LCPI73_0 ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI73_0] ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v19.2s, v3.2d -; CHECK-SD-NEXT: xtn v23.2s, v7.2d -; CHECK-SD-NEXT: xtn v18.2s, v2.2d -; CHECK-SD-NEXT: xtn v22.2s, v6.2d -; CHECK-SD-NEXT: xtn v17.2s, v1.2d -; CHECK-SD-NEXT: xtn v21.2s, v5.2d -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI73_0] -; CHECK-SD-NEXT: xtn v16.2s, v0.2d -; CHECK-SD-NEXT: xtn v20.2s, v4.2d -; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-SD-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v7.2s, v7.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v6.2s, v6.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v5.2s, v5.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: xtn v4.2s, v4.2d +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b +; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v16f64_v16i16: @@ -1634,65 +1634,48 @@ entry: define <32 x i16> @fptos_v32f64_v32i16(<32 x double> %a) { ; CHECK-SD-LABEL: fptos_v32f64_v32i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 -; CHECK-SD-NEXT: .cfi_offset b8, -8 -; CHECK-SD-NEXT: .cfi_offset b9, -16 -; CHECK-SD-NEXT: .cfi_offset b10, -24 -; CHECK-SD-NEXT: .cfi_offset b11, -32 -; CHECK-SD-NEXT: .cfi_offset b12, -40 -; CHECK-SD-NEXT: .cfi_offset b13, -48 -; CHECK-SD-NEXT: .cfi_offset b14, -56 -; CHECK-SD-NEXT: .cfi_offset b15, -64 +; CHECK-SD-NEXT: ldp q16, q17, [sp, #64] ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-SD-NEXT: fcvtzs v18.2d, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI74_0 -; CHECK-SD-NEXT: fcvtzs v19.2d, v1.2d -; CHECK-SD-NEXT: ldp q20, q21, [sp, #160] -; CHECK-SD-NEXT: fcvtzs v22.2d, v0.2d -; CHECK-SD-NEXT: ldp q23, q24, [sp, #96] +; CHECK-SD-NEXT: ldp q18, q19, [sp, #96] +; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-SD-NEXT: ldp q20, q21, [sp, #32] +; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-SD-NEXT: ldp q22, q23, [sp] +; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-SD-NEXT: ldp q16, q17, [sp, #128] -; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: fcvtzs v19.2d, v19.2d ; CHECK-SD-NEXT: fcvtzs v21.2d, v21.2d +; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v20.2d, v20.2d -; CHECK-SD-NEXT: xtn v2.2s, v18.2d -; CHECK-SD-NEXT: ldp q18, q25, [sp, #64] -; CHECK-SD-NEXT: xtn v1.2s, v19.2d -; CHECK-SD-NEXT: fcvtzs v19.2d, v24.2d -; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d -; CHECK-SD-NEXT: xtn v0.2s, v22.2d -; CHECK-SD-NEXT: fcvtzs v22.2d, v23.2d -; CHECK-SD-NEXT: xtn v29.2s, v7.2d -; CHECK-SD-NEXT: fcvtzs v7.2d, v25.2d -; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d -; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d -; CHECK-SD-NEXT: xtn v15.2s, v21.2d -; CHECK-SD-NEXT: xtn v11.2s, v19.2d +; CHECK-SD-NEXT: fcvtzs v23.2d, v23.2d +; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d ; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v14.2s, v20.2d -; CHECK-SD-NEXT: xtn v10.2s, v22.2d -; CHECK-SD-NEXT: xtn v13.2s, v17.2d -; CHECK-SD-NEXT: xtn v9.2s, v7.2d -; CHECK-SD-NEXT: xtn v28.2s, v6.2d -; CHECK-SD-NEXT: xtn v8.2s, v18.2d -; CHECK-SD-NEXT: xtn v12.2s, v16.2d -; CHECK-SD-NEXT: xtn v27.2s, v5.2d -; CHECK-SD-NEXT: xtn v26.2s, v4.2d -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI74_0] -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b -; CHECK-SD-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b -; CHECK-SD-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b -; CHECK-SD-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b -; CHECK-SD-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-SD-NEXT: fcvtzs v22.2d, v22.2d +; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: adrp x8, .LCPI74_0 +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: xtn v7.2s, v7.2d +; CHECK-SD-NEXT: xtn v6.2s, v6.2d +; CHECK-SD-NEXT: xtn v21.2s, v21.2d +; CHECK-SD-NEXT: xtn v25.2s, v19.2d +; CHECK-SD-NEXT: xtn v5.2s, v5.2d +; CHECK-SD-NEXT: xtn v20.2s, v20.2d +; CHECK-SD-NEXT: xtn v24.2s, v18.2d +; CHECK-SD-NEXT: xtn v19.2s, v23.2d +; CHECK-SD-NEXT: xtn v23.2s, v17.2d +; CHECK-SD-NEXT: xtn v4.2s, v4.2d +; CHECK-SD-NEXT: xtn v18.2s, v22.2d +; CHECK-SD-NEXT: xtn v22.2s, v16.2d +; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI74_0] +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b +; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b +; CHECK-SD-NEXT: tbl v2.16b, { v18.16b, v19.16b, v20.16b, v21.16b }, v16.16b +; CHECK-SD-NEXT: tbl v3.16b, { v22.16b, v23.16b, v24.16b, v25.16b }, v16.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v32f64_v32i16: @@ -1738,65 +1721,48 @@ entry: define <32 x i16> @fptou_v32f64_v32i16(<32 x double> %a) { ; CHECK-SD-LABEL: fptou_v32f64_v32i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 -; CHECK-SD-NEXT: .cfi_offset b8, -8 -; CHECK-SD-NEXT: .cfi_offset b9, -16 -; CHECK-SD-NEXT: .cfi_offset b10, -24 -; CHECK-SD-NEXT: .cfi_offset b11, -32 -; CHECK-SD-NEXT: .cfi_offset b12, -40 -; CHECK-SD-NEXT: .cfi_offset b13, -48 -; CHECK-SD-NEXT: .cfi_offset b14, -56 -; CHECK-SD-NEXT: .cfi_offset b15, -64 +; CHECK-SD-NEXT: ldp q16, q17, [sp, #64] ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-SD-NEXT: fcvtzs v18.2d, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI75_0 -; CHECK-SD-NEXT: fcvtzs v19.2d, v1.2d -; CHECK-SD-NEXT: ldp q20, q21, [sp, #160] -; CHECK-SD-NEXT: fcvtzs v22.2d, v0.2d -; CHECK-SD-NEXT: ldp q23, q24, [sp, #96] +; CHECK-SD-NEXT: ldp q18, q19, [sp, #96] +; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-SD-NEXT: ldp q20, q21, [sp, #32] +; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-SD-NEXT: ldp q22, q23, [sp] +; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-SD-NEXT: ldp q16, q17, [sp, #128] -; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: fcvtzs v19.2d, v19.2d ; CHECK-SD-NEXT: fcvtzs v21.2d, v21.2d +; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v20.2d, v20.2d -; CHECK-SD-NEXT: xtn v2.2s, v18.2d -; CHECK-SD-NEXT: ldp q18, q25, [sp, #64] -; CHECK-SD-NEXT: xtn v1.2s, v19.2d -; CHECK-SD-NEXT: fcvtzs v19.2d, v24.2d -; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d -; CHECK-SD-NEXT: xtn v0.2s, v22.2d -; CHECK-SD-NEXT: fcvtzs v22.2d, v23.2d -; CHECK-SD-NEXT: xtn v29.2s, v7.2d -; CHECK-SD-NEXT: fcvtzs v7.2d, v25.2d -; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d -; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d -; CHECK-SD-NEXT: xtn v15.2s, v21.2d -; CHECK-SD-NEXT: xtn v11.2s, v19.2d +; CHECK-SD-NEXT: fcvtzs v23.2d, v23.2d +; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d ; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v14.2s, v20.2d -; CHECK-SD-NEXT: xtn v10.2s, v22.2d -; CHECK-SD-NEXT: xtn v13.2s, v17.2d -; CHECK-SD-NEXT: xtn v9.2s, v7.2d -; CHECK-SD-NEXT: xtn v28.2s, v6.2d -; CHECK-SD-NEXT: xtn v8.2s, v18.2d -; CHECK-SD-NEXT: xtn v12.2s, v16.2d -; CHECK-SD-NEXT: xtn v27.2s, v5.2d -; CHECK-SD-NEXT: xtn v26.2s, v4.2d -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI75_0] -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b -; CHECK-SD-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b -; CHECK-SD-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b -; CHECK-SD-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b -; CHECK-SD-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-SD-NEXT: fcvtzs v22.2d, v22.2d +; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: adrp x8, .LCPI75_0 +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: xtn v7.2s, v7.2d +; CHECK-SD-NEXT: xtn v6.2s, v6.2d +; CHECK-SD-NEXT: xtn v21.2s, v21.2d +; CHECK-SD-NEXT: xtn v25.2s, v19.2d +; CHECK-SD-NEXT: xtn v5.2s, v5.2d +; CHECK-SD-NEXT: xtn v20.2s, v20.2d +; CHECK-SD-NEXT: xtn v24.2s, v18.2d +; CHECK-SD-NEXT: xtn v19.2s, v23.2d +; CHECK-SD-NEXT: xtn v23.2s, v17.2d +; CHECK-SD-NEXT: xtn v4.2s, v4.2d +; CHECK-SD-NEXT: xtn v18.2s, v22.2d +; CHECK-SD-NEXT: xtn v22.2s, v16.2d +; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI75_0] +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b +; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b +; CHECK-SD-NEXT: tbl v2.16b, { v18.16b, v19.16b, v20.16b, v21.16b }, v16.16b +; CHECK-SD-NEXT: tbl v3.16b, { v22.16b, v23.16b, v24.16b, v25.16b }, v16.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v32f64_v32i16: diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index d620a8851ee44..2d0931fb4f525 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -3365,111 +3365,111 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) { ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: csel w12, w13, w8, lt ; CHECK-NEXT: mov v0.s[1], w11 +; CHECK-NEXT: csel w12, w13, w8, lt ; CHECK-NEXT: fcvtzs w11, d1 ; CHECK-NEXT: cmn w12, #128 ; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: fmov s1, w12 ; CHECK-NEXT: fcvtzs w12, d2 ; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: cmp w11, #127 ; CHECK-NEXT: mov w13, v0.s[1] +; CHECK-NEXT: cmp w11, #127 ; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: csel w10, w11, w8, lt ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: fcvtzs w11, d2 +; CHECK-NEXT: mov d2, v4.d[1] +; CHECK-NEXT: mov v0.b[1], w13 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: mov v0.b[1], w13 ; CHECK-NEXT: csel w12, w12, w8, lt ; CHECK-NEXT: cmn w12, #128 ; CHECK-NEXT: mov w13, v1.s[1] ; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fmov s2, w12 -; CHECK-NEXT: fcvtzs w12, d3 -; CHECK-NEXT: mov d3, v4.d[1] ; CHECK-NEXT: mov v0.b[2], v1.b[0] -; CHECK-NEXT: mov v2.s[1], w10 +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: fcvtzs w12, d3 +; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: csel w10, w11, w8, lt +; CHECK-NEXT: fcvtzs w11, d2 ; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w11, d3 +; CHECK-NEXT: mov v0.b[3], w13 +; CHECK-NEXT: mov d2, v5.d[1] ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: mov v0.b[3], w13 ; CHECK-NEXT: csel w12, w12, w8, lt ; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: mov w13, v2.s[1] +; CHECK-NEXT: mov w13, v1.s[1] ; CHECK-NEXT: csel w12, w12, w9, gt +; CHECK-NEXT: mov v0.b[4], v1.b[0] ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fmov s3, w12 +; CHECK-NEXT: fmov s1, w12 ; CHECK-NEXT: fcvtzs w12, d4 -; CHECK-NEXT: mov v0.b[4], v2.b[0] -; CHECK-NEXT: mov d4, v5.d[1] -; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: csel w10, w11, w8, lt -; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: mov v0.b[5], w13 +; CHECK-NEXT: cmn w10, #128 +; CHECK-NEXT: fcvtzs w11, d2 +; CHECK-NEXT: mov d2, v6.d[1] ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: fcvtzs w11, d4 ; CHECK-NEXT: csel w12, w12, w8, lt ; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: mov w13, v3.s[1] +; CHECK-NEXT: mov w13, v1.s[1] +; CHECK-NEXT: mov v0.b[6], v1.b[0] ; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: mov v0.b[6], v3.b[0] -; CHECK-NEXT: fmov s4, w12 -; CHECK-NEXT: fcvtzs w12, d5 ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: mov d5, v6.d[1] -; CHECK-NEXT: mov v4.s[1], w10 -; CHECK-NEXT: csel w10, w11, w8, lt +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: fcvtzs w12, d5 ; CHECK-NEXT: mov v0.b[7], w13 +; CHECK-NEXT: fcvtzs w13, d2 +; CHECK-NEXT: mov d2, v7.d[1] +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: csel w10, w11, w8, lt ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: fcvtzs w13, d5 ; CHECK-NEXT: csel w11, w12, w8, lt ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: mov w12, v4.s[1] -; CHECK-NEXT: mov v0.b[8], v4.b[0] +; CHECK-NEXT: mov w12, v1.s[1] +; CHECK-NEXT: mov v0.b[8], v1.b[0] ; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: fmov s5, w11 -; CHECK-NEXT: fcvtzs w11, d6 ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov d6, v7.d[1] +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: fcvtzs w11, d6 ; CHECK-NEXT: mov v0.b[9], w12 -; CHECK-NEXT: mov v5.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: csel w10, w13, w8, lt +; CHECK-NEXT: fcvtzs w13, d2 ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fcvtzs w13, d6 ; CHECK-NEXT: csel w11, w11, w8, lt ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: mov v0.b[10], v5.b[0] -; CHECK-NEXT: mov w12, v5.s[1] +; CHECK-NEXT: mov v0.b[10], v1.b[0] +; CHECK-NEXT: mov w12, v1.s[1] ; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: fmov s6, w11 -; CHECK-NEXT: fcvtzs w11, d7 ; CHECK-NEXT: cmp w13, #127 +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: fcvtzs w11, d7 ; CHECK-NEXT: mov v0.b[11], w12 -; CHECK-NEXT: mov v6.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: csel w10, w13, w8, lt ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w11, #127 ; CHECK-NEXT: csel w8, w11, w8, lt ; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: mov v0.b[12], v6.b[0] -; CHECK-NEXT: mov w11, v6.s[1] +; CHECK-NEXT: mov v0.b[12], v1.b[0] +; CHECK-NEXT: mov w11, v1.s[1] ; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: fmov s7, w8 +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.b[13], w11 -; CHECK-NEXT: mov v7.s[1], w10 -; CHECK-NEXT: mov v0.b[14], v7.b[0] -; CHECK-NEXT: mov w8, v7.s[1] +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov v0.b[14], v1.b[0] +; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov v0.b[15], w8 ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptosi.sat.v16f64.v16i8(<16 x double> %f) @@ -3575,26 +3575,32 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) { ; CHECK-NEXT: cmp w13, w9 ; CHECK-NEXT: csel w11, w13, w9, lt ; CHECK-NEXT: fcvtzs w13, d3 +; CHECK-NEXT: fmov s3, w12 ; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w11, w11, w8, gt ; CHECK-NEXT: cmp w14, w9 ; CHECK-NEXT: csel w14, w14, w9, lt +; CHECK-NEXT: mov v3.s[1], w10 ; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w14, w14, w8, gt ; CHECK-NEXT: cmp w13, w9 ; CHECK-NEXT: csel w13, w13, w9, lt +; CHECK-NEXT: fmov s2, w14 ; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w13, w13, w8, gt ; CHECK-NEXT: cmp w15, w9 ; CHECK-NEXT: csel w15, w15, w9, lt +; CHECK-NEXT: mov v2.s[1], w11 ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w16, w15, w8, gt ; CHECK-NEXT: cmp w17, w9 ; CHECK-NEXT: csel w15, w17, w9, lt +; CHECK-NEXT: fmov s1, w16 ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w15, w15, w8, gt ; CHECK-NEXT: cmp w18, w9 ; CHECK-NEXT: csel w17, w18, w9, lt +; CHECK-NEXT: mov v1.s[1], w13 ; CHECK-NEXT: cmn w17, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w17, w17, w8, gt ; CHECK-NEXT: cmp w0, w9 @@ -3617,38 +3623,32 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) { ; CHECK-NEXT: cmp w2, w9 ; CHECK-NEXT: fcvtzs w5, d0 ; CHECK-NEXT: csel w2, w2, w9, lt -; CHECK-NEXT: fmov s3, w12 +; CHECK-NEXT: fmov s0, w17 ; CHECK-NEXT: mov v7.s[1], w18 ; CHECK-NEXT: cmn w2, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w2, w2, w8, gt ; CHECK-NEXT: cmp w3, w9 ; CHECK-NEXT: csel w3, w3, w9, lt -; CHECK-NEXT: mov v3.s[1], w10 ; CHECK-NEXT: fmov s6, w2 +; CHECK-NEXT: mov v0.s[1], w15 ; CHECK-NEXT: cmn w3, #8, lsl #12 // =32768 -; CHECK-NEXT: fmov s2, w14 ; CHECK-NEXT: csel w3, w3, w8, gt ; CHECK-NEXT: cmp w4, w9 ; CHECK-NEXT: csel w4, w4, w9, lt ; CHECK-NEXT: mov v6.s[1], w0 ; CHECK-NEXT: cmn w4, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v2.s[1], w11 ; CHECK-NEXT: csel w12, w4, w8, gt ; CHECK-NEXT: cmp w5, w9 -; CHECK-NEXT: fmov s1, w16 ; CHECK-NEXT: csel w10, w5, w9, lt ; CHECK-NEXT: fmov s5, w12 ; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w10, w10, w8, gt ; CHECK-NEXT: cmp w6, w9 -; CHECK-NEXT: mov v1.s[1], w13 ; CHECK-NEXT: csel w9, w6, w9, lt ; CHECK-NEXT: mov v5.s[1], w3 -; CHECK-NEXT: fmov s0, w17 ; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w8, w9, w8, gt ; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: mov v0.s[1], w15 ; CHECK-NEXT: adrp x8, .LCPI85_0 ; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI85_0] ; CHECK-NEXT: mov v4.s[1], w10 diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 16e04070b6543..62f5e0fe2dcaa 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -2751,8 +2751,8 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) { ; CHECK-NEXT: fcvtzu w12, d2 ; CHECK-NEXT: fcvtzu w14, d1 ; CHECK-NEXT: fcvtzu w8, d4 -; CHECK-NEXT: mov d4, v0.d[1] ; CHECK-NEXT: fcvtzu w10, d5 +; CHECK-NEXT: mov d4, v0.d[1] ; CHECK-NEXT: fcvtzu w13, d3 ; CHECK-NEXT: cmp w8, #255 ; CHECK-NEXT: fcvtzu w15, d4 @@ -2760,29 +2760,29 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) { ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w11, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: fmov s4, w9 +; CHECK-NEXT: fmov s3, w9 ; CHECK-NEXT: csel w9, w10, w11, lo ; CHECK-NEXT: cmp w12, #255 ; CHECK-NEXT: fcvtzu w10, d0 -; CHECK-NEXT: mov v4.s[1], w8 +; CHECK-NEXT: mov v3.s[1], w8 ; CHECK-NEXT: csel w8, w12, w11, lo ; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: csel w8, w13, w11, lo ; CHECK-NEXT: cmp w14, #255 -; CHECK-NEXT: mov v3.s[1], w9 +; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: csel w9, w14, w11, lo ; CHECK-NEXT: cmp w15, #255 -; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: csel w9, w15, w11, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: mov v1.s[1], w8 ; CHECK-NEXT: csel w8, w10, w11, lo -; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: adrp x8, .LCPI82_0 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI82_0] -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b +; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI82_0] +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b ; CHECK-NEXT: ret %x = call <8 x i8> @llvm.fptoui.sat.v8f64.v8i8(<8 x double> %f) ret <8 x i8> %x @@ -2802,29 +2802,29 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) { ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: fmov s0, w10 ; CHECK-NEXT: fcvtzu w10, d16 -; CHECK-NEXT: mov d16, v2.d[1] ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov d1, v2.d[1] ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: mov w11, v0.s[1] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: fcvtzu w9, d16 -; CHECK-NEXT: mov d16, v3.d[1] +; CHECK-NEXT: fmov s16, w9 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov d1, v3.d[1] ; CHECK-NEXT: mov v0.b[1], w11 -; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov v16.s[1], w10 ; CHECK-NEXT: fcvtzu w10, d2 ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov w11, v1.s[1] -; CHECK-NEXT: mov v0.b[2], v1.b[0] +; CHECK-NEXT: mov w11, v16.s[1] +; CHECK-NEXT: mov v0.b[2], v16.b[0] ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: fcvtzu w10, d16 -; CHECK-NEXT: mov d16, v4.d[1] +; CHECK-NEXT: fcvtzu w10, d1 +; CHECK-NEXT: mov d1, v4.d[1] ; CHECK-NEXT: mov v0.b[3], w11 ; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: fcvtzu w9, d3 @@ -2834,58 +2834,58 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) { ; CHECK-NEXT: mov w11, v2.s[1] ; CHECK-NEXT: mov v0.b[4], v2.b[0] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fmov s3, w9 -; CHECK-NEXT: fcvtzu w9, d16 -; CHECK-NEXT: mov d16, v5.d[1] +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov d1, v5.d[1] ; CHECK-NEXT: mov v0.b[5], w11 -; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: mov v2.s[1], w10 ; CHECK-NEXT: fcvtzu w10, d4 ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov w11, v3.s[1] -; CHECK-NEXT: mov v0.b[6], v3.b[0] +; CHECK-NEXT: mov w11, v2.s[1] +; CHECK-NEXT: mov v0.b[6], v2.b[0] ; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: fmov s4, w10 -; CHECK-NEXT: fcvtzu w10, d16 +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: fcvtzu w10, d1 +; CHECK-NEXT: mov d1, v6.d[1] ; CHECK-NEXT: mov v0.b[7], w11 -; CHECK-NEXT: mov v4.s[1], w9 +; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: fcvtzu w9, d5 -; CHECK-NEXT: mov d5, v6.d[1] ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov w11, v4.s[1] -; CHECK-NEXT: mov v0.b[8], v4.b[0] +; CHECK-NEXT: mov w11, v2.s[1] +; CHECK-NEXT: mov v0.b[8], v2.b[0] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fmov s16, w9 -; CHECK-NEXT: fcvtzu w9, d5 -; CHECK-NEXT: mov d5, v7.d[1] +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov d1, v7.d[1] ; CHECK-NEXT: mov v0.b[9], w11 -; CHECK-NEXT: mov v16.s[1], w10 +; CHECK-NEXT: mov v2.s[1], w10 ; CHECK-NEXT: fcvtzu w10, d6 ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v0.b[10], v16.b[0] -; CHECK-NEXT: mov w11, v16.s[1] +; CHECK-NEXT: mov v0.b[10], v2.b[0] +; CHECK-NEXT: mov w11, v2.s[1] ; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: fmov s6, w10 +; CHECK-NEXT: fmov s2, w10 ; CHECK-NEXT: fcvtzu w10, d7 ; CHECK-NEXT: mov v0.b[11], w11 -; CHECK-NEXT: mov v6.s[1], w9 -; CHECK-NEXT: fcvtzu w9, d5 +; CHECK-NEXT: mov v2.s[1], w9 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov v0.b[12], v6.b[0] -; CHECK-NEXT: mov w11, v6.s[1] +; CHECK-NEXT: mov v0.b[12], v2.b[0] +; CHECK-NEXT: mov w11, v2.s[1] ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: csel w8, w10, w8, lo -; CHECK-NEXT: fmov s5, w8 +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.b[13], w11 -; CHECK-NEXT: mov v5.s[1], w9 -; CHECK-NEXT: mov v0.b[14], v5.b[0] -; CHECK-NEXT: mov w8, v5.s[1] +; CHECK-NEXT: mov v1.s[1], w9 +; CHECK-NEXT: mov v0.b[14], v1.b[0] +; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov v0.b[15], w8 ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptoui.sat.v16f64.v16i8(<16 x double> %f) @@ -2903,8 +2903,8 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) { ; CHECK-NEXT: fcvtzu w12, d2 ; CHECK-NEXT: fcvtzu w14, d1 ; CHECK-NEXT: fcvtzu w8, d4 -; CHECK-NEXT: mov d4, v0.d[1] ; CHECK-NEXT: fcvtzu w11, d5 +; CHECK-NEXT: mov d4, v0.d[1] ; CHECK-NEXT: fcvtzu w13, d3 ; CHECK-NEXT: cmp w8, w10 ; CHECK-NEXT: fcvtzu w15, d4 @@ -2912,29 +2912,29 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) { ; CHECK-NEXT: cmp w9, w10 ; CHECK-NEXT: csel w9, w9, w10, lo ; CHECK-NEXT: cmp w11, w10 -; CHECK-NEXT: fmov s4, w9 +; CHECK-NEXT: fmov s3, w9 ; CHECK-NEXT: csel w9, w11, w10, lo ; CHECK-NEXT: cmp w12, w10 ; CHECK-NEXT: fcvtzu w11, d0 -; CHECK-NEXT: mov v4.s[1], w8 +; CHECK-NEXT: mov v3.s[1], w8 ; CHECK-NEXT: csel w8, w12, w10, lo ; CHECK-NEXT: cmp w13, w10 -; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: csel w8, w13, w10, lo ; CHECK-NEXT: cmp w14, w10 -; CHECK-NEXT: mov v3.s[1], w9 +; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: csel w9, w14, w10, lo ; CHECK-NEXT: cmp w15, w10 -; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: csel w9, w15, w10, lo ; CHECK-NEXT: cmp w11, w10 -; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: mov v1.s[1], w8 ; CHECK-NEXT: csel w8, w11, w10, lo -; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: adrp x8, .LCPI84_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI84_0] -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI84_0] +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret %x = call <8 x i16> @llvm.fptoui.sat.v8f64.v8i16(<8 x double> %f) ret <8 x i16> %x @@ -2973,53 +2973,53 @@ define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) { ; CHECK-NEXT: fcvtzu w16, d0 ; CHECK-NEXT: csel w11, w11, w8, lo ; CHECK-NEXT: cmp w17, w8 +; CHECK-NEXT: fmov s18, w11 ; CHECK-NEXT: mov v19.s[1], w13 ; CHECK-NEXT: csel w13, w17, w8, lo ; CHECK-NEXT: cmp w10, w8 ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w18, w8 -; CHECK-NEXT: fmov s18, w11 +; CHECK-NEXT: fcvtzu w17, d2 ; CHECK-NEXT: csel w11, w18, w8, lo ; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: fcvtzu w17, d2 +; CHECK-NEXT: mov v18.s[1], w9 ; CHECK-NEXT: csel w12, w12, w8, lo ; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fcvtzu w18, d6 -; CHECK-NEXT: mov v18.s[1], w9 +; CHECK-NEXT: fmov s17, w10 ; CHECK-NEXT: csel w9, w16, w8, lo ; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: fmov s17, w10 -; CHECK-NEXT: csel w10, w14, w8, lo ; CHECK-NEXT: fcvtzu w16, d5 -; CHECK-NEXT: fmov s23, w10 +; CHECK-NEXT: csel w10, w14, w8, lo +; CHECK-NEXT: fcvtzu w18, d6 ; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: fcvtzu w14, d3 +; CHECK-NEXT: fmov s5, w10 ; CHECK-NEXT: csel w10, w17, w8, lo ; CHECK-NEXT: cmp w15, w8 +; CHECK-NEXT: fcvtzu w14, d3 ; CHECK-NEXT: fcvtzu w17, d4 +; CHECK-NEXT: fmov s16, w12 ; CHECK-NEXT: mov v17.s[1], w13 -; CHECK-NEXT: mov v23.s[1], w9 +; CHECK-NEXT: mov v5.s[1], w9 ; CHECK-NEXT: csel w9, w15, w8, lo ; CHECK-NEXT: cmp w18, w8 -; CHECK-NEXT: fmov s22, w9 +; CHECK-NEXT: fmov s4, w9 ; CHECK-NEXT: csel w9, w18, w8, lo ; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s16, w12 -; CHECK-NEXT: mov v22.s[1], w10 +; CHECK-NEXT: mov v16.s[1], w11 +; CHECK-NEXT: mov v4.s[1], w10 ; CHECK-NEXT: csel w10, w16, w8, lo ; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: fmov s21, w10 +; CHECK-NEXT: fmov s3, w10 ; CHECK-NEXT: csel w10, w14, w8, lo ; CHECK-NEXT: cmp w17, w8 ; CHECK-NEXT: csel w8, w17, w8, lo -; CHECK-NEXT: mov v16.s[1], w11 -; CHECK-NEXT: mov v21.s[1], w9 -; CHECK-NEXT: fmov s20, w8 +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: adrp x8, .LCPI85_0 +; CHECK-NEXT: mov v3.s[1], w9 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI85_0] -; CHECK-NEXT: mov v20.s[1], w10 +; CHECK-NEXT: mov v2.s[1], w10 ; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-NEXT: tbl v1.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v1.16b ; CHECK-NEXT: ret %x = call <16 x i16> @llvm.fptoui.sat.v16f64.v16i16(<16 x double> %f) ret <16 x i16> %x diff --git a/llvm/test/CodeGen/AArch64/insert-subvector.ll b/llvm/test/CodeGen/AArch64/insert-subvector.ll index 6828fa9f1508c..d664421086fef 100644 --- a/llvm/test/CodeGen/AArch64/insert-subvector.ll +++ b/llvm/test/CodeGen/AArch64/insert-subvector.ll @@ -47,11 +47,10 @@ define <16 x i8> @insert_v16i8_4_1(float %tmp, <16 x i8> %b, <16 x i8> %a) { define <16 x i8> @insert_v16i8_4_15(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_4_15: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: mov v3.16b, v1.16b -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 @@ -146,11 +145,10 @@ define <8 x i16> @insert_v8i16_2_1(float %tmp, <8 x i16> %b, <8 x i16> %a) { define <8 x i16> @insert_v8i16_2_15(float %tmp, <8 x i16> %b, <8 x i16> %a) { ; CHECK-LABEL: insert_v8i16_2_15: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: adrp x8, .LCPI13_0 -; CHECK-NEXT: mov v3.16b, v1.16b -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 @@ -272,7 +270,6 @@ define <16 x i8> @load_v16i8_4_1(float %tmp, <16 x i8> %b, ptr %a) { define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v16i8_4_15: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_0] @@ -493,7 +490,6 @@ define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) { define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_15: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI40_0 ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI40_0] diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 50c0c8b11e751..4907abc6e946e 100644 --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1349,18 +1349,14 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-SD-LABEL: vselect_equivalent_shuffle_v8i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI92_0 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI92_0] -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI92_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI92_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -1386,9 +1382,8 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zero(<8 x i16> %a) { ; ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16_zero: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-GI-NEXT: adrp x8, .LCPI93_0 ; CHECK-GI-NEXT: movi v1.2d, #0000000000000000 +; CHECK-GI-NEXT: adrp x8, .LCPI93_0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI93_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret @@ -1422,9 +1417,8 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zeroswap(<8 x i16> %a) { ; ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16_zeroswap: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q31_q0 -; CHECK-GI-NEXT: adrp x8, .LCPI94_0 ; CHECK-GI-NEXT: movi v31.2d, #0000000000000000 +; CHECK-GI-NEXT: adrp x8, .LCPI94_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI94_0] ; CHECK-GI-NEXT: tbl v0.16b, { v31.16b, v0.16b }, v1.16b ; CHECK-GI-NEXT: ret @@ -1466,9 +1460,7 @@ define <4 x i32> @vselect_equivalent_shuffle_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI96_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI96_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll index 3f590226c4715..8c88d3c33e07c 100644 --- a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll +++ b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll @@ -267,12 +267,8 @@ entry: define <16 x i8> @extract_4_v4i32_badindex(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-LABEL: extract_4_v4i32_badindex: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll index de90024a4a257..1ed9e7cc5254d 100644 --- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll +++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll @@ -47,9 +47,7 @@ define <8 x i16> @v8i16_2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: v8i16_2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -82,9 +80,7 @@ define <16 x i8> @v16i8_2(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: v16i8_2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI7_0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll index afcced5dcb9ab..d315c306aa37a 100644 --- a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll @@ -137,9 +137,7 @@ define <8 x i16> @shuffle_widen_faili1(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: shuffle_widen_faili1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI12_0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -152,9 +150,7 @@ define <8 x i16> @shuffle_widen_fail2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: shuffle_widen_fail2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI13_0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -167,9 +163,7 @@ define <8 x i16> @shuffle_widen_fail3(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: shuffle_widen_fail3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/seqpairspill.mir b/llvm/test/CodeGen/AArch64/seqpairspill.mir index 0e6c94c44712c..b29ab7727f65d 100644 --- a/llvm/test/CodeGen/AArch64/seqpairspill.mir +++ b/llvm/test/CodeGen/AArch64/seqpairspill.mir @@ -7,11 +7,11 @@ body: | bb.0: ; Check the spill/reload sequence for the %0 register ; CHECK: renamable $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALX - ; CHECK-NEXT: STPXi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s128) into %stack.0, align 8) + ; CHECK-NEXT: STPXi killed renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0 :: (store (s128) into %stack.0, align 8) ; CHECK: INLINEASM - ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s128) from %stack.0, align 8) + ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0 :: (load (s128) from %stack.0, align 8) ; CHECK-NEXT: $xzr = COPY renamable $[[REG2]] - ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]] + ; CHECK-NEXT: $xzr = COPY killed renamable $[[REG3]] %0 : xseqpairsclass = IMPLICIT_DEF %1 : xseqpairsclass = IMPLICIT_DEF %2 : gpr64common = IMPLICIT_DEF @@ -27,11 +27,11 @@ body: | bb.0: ; Check the spill/reload sequence for the %0 register ; CHECK: $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALW - ; CHECK-NEXT: STPWi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s64) into %stack.0, align 4) + ; CHECK-NEXT: STPWi killed renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0 :: (store (s64) into %stack.0, align 4) ; CHECK: INLINEASM - ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s64) from %stack.0, align 4) + ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0 :: (load (s64) from %stack.0, align 4) ; CHECK-NEXT: $xzr = COPY renamable $[[REG2]] - ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]] + ; CHECK-NEXT: $xzr = COPY killed renamable $[[REG3]] %0 : wseqpairsclass = IMPLICIT_DEF %1 : wseqpairsclass = IMPLICIT_DEF %2 : gpr64common = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll index fb571eff39fe5..4e49a05506784 100644 --- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll +++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll @@ -21,12 +21,8 @@ define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; CHECK-LABEL: shuffle4_v4i8_16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> @@ -47,12 +43,8 @@ define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; CHECK-LABEL: shuffle4_v4i8_8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> @@ -101,10 +93,10 @@ define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> define <16 x i8> @shuffle4_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: shuffle4_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: mov v2.d[1], v3.d[0] @@ -214,10 +206,10 @@ define <8 x i16> @shuffle4_v8i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x ; CHECK-LABEL: shuffle4_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d5, d2 +; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 ; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: mov v4.d[1], v1.d[0] ; CHECK-NEXT: mov v5.d[1], v3.d[0] @@ -282,10 +274,10 @@ define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI6_0] ; CHECK-NEXT: adrp x8, .LCPI6_1 -; CHECK-NEXT: tbl v3.8b, { v2.16b }, v1.8b -; CHECK-NEXT: tbl v2.8b, { v0.16b }, v1.8b +; CHECK-NEXT: tbl v2.8b, { v2.16b }, v1.8b +; CHECK-NEXT: tbl v1.8b, { v0.16b }, v1.8b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_1] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECK-NEXT: ret %x = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> %y = shufflevector <8 x i8> %c, <8 x i8> %d, <4 x i32> @@ -354,10 +346,10 @@ define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x ; CHECK-LABEL: shuffle4_v4i8_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d5, d2 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] ; CHECK-NEXT: mov v4.d[1], v1.d[0] ; CHECK-NEXT: mov v5.d[1], v3.d[0] @@ -393,12 +385,8 @@ define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> %ce, <4 x i16> %de) { ; CHECK-LABEL: shuffle4_v4i16_trunc: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret %a = trunc <4 x i16> %ae to <4 x i8> @@ -432,13 +420,13 @@ define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> % define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> %ce, <4 x i32> %de) { ; CHECK-LABEL: shuffle4_v4i32_trunc: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v4.4h, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn v1.4h, v1.4s ; CHECK-NEXT: adrp x8, .LCPI10_0 -; CHECK-NEXT: xtn v5.4h, v1.4s -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: xtn v6.4h, v2.4s -; CHECK-NEXT: xtn v7.4h, v3.4s -; CHECK-NEXT: tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn v3.4h, v3.4s +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret %a = trunc <4 x i32> %ae to <4 x i8> %b = trunc <4 x i32> %be to <4 x i8> @@ -470,11 +458,8 @@ define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> % define <12 x i8> @shuffle3_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) { ; CHECK-LABEL: shuffle3_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> @@ -504,9 +489,9 @@ define <8 x i16> @shuffle3_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) { ; CHECK-LABEL: shuffle3_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d3, d2 +; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: adrp x8, .LCPI12_0 -; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: mov v2.d[1], v1.d[0] ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b @@ -563,12 +548,12 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> ; CHECK-LABEL: insert4_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov v4.16b, v3.16b ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v4.16b, v3.16b +; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: adrp x9, .LCPI14_1 ; CHECK-NEXT: mov v0.d[1], v2.d[0] -; CHECK-NEXT: mov v3.16b, v1.16b +; CHECK-NEXT: adrp x9, .LCPI14_1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_1] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b @@ -632,16 +617,14 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> define <16 x i8> @insert4_v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) { ; CHECK-LABEL: insert4_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov v4.16b, v3.16b -; CHECK-NEXT: adrp x8, .LCPI15_0 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q31_q0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v3.16b, v1.16b -; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] -; CHECK-NEXT: tbl v31.16b, { v3.16b, v4.16b }, v5.16b +; CHECK-NEXT: tbl v31.16b, { v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: tbl v0.16b, { v31.16b, v0.16b }, v1.16b ; CHECK-NEXT: ret %e1 = extractelement <8 x i8> %a, i32 4 @@ -705,6 +688,7 @@ define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l2 ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: frintm v1.2d, v1.2d ; CHECK-NEXT: frintm v5.2d, v5.2d +; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: frintm v2.2d, v2.2d ; CHECK-NEXT: frintm v6.2d, v6.2d ; CHECK-NEXT: frintm v3.2d, v3.2d @@ -717,17 +701,16 @@ define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l2 ; CHECK-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-NEXT: xtn v16.2s, v0.2d -; CHECK-NEXT: xtn v20.2s, v4.2d -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: xtn v17.2s, v1.2d -; CHECK-NEXT: xtn v21.2s, v5.2d -; CHECK-NEXT: xtn v18.2s, v2.2d -; CHECK-NEXT: xtn v22.2s, v6.2d -; CHECK-NEXT: xtn v19.2s, v3.2d -; CHECK-NEXT: xtn v23.2s, v7.2d -; CHECK-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b -; CHECK-NEXT: tbl v2.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: xtn v4.2s, v4.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v5.2s, v5.2d +; CHECK-NEXT: xtn v2.2s, v2.2d +; CHECK-NEXT: xtn v6.2s, v6.2d +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: xtn v7.2s, v7.2d +; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b +; CHECK-NEXT: tbl v2.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b ; CHECK-NEXT: uzp1 v0.8h, v1.8h, v2.8h ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll index 41dd7f06712d2..00057ea3359b7 100644 --- a/llvm/test/CodeGen/AArch64/shuffles.ll +++ b/llvm/test/CodeGen/AArch64/shuffles.ll @@ -366,9 +366,7 @@ define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b) ; CHECKLE-LABEL: test_shuf9: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI13_0 -; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] -; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -378,10 +376,10 @@ define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI13_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI13_0 -; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ld1 { v0.16b }, [x8] -; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECKBE-NEXT: ld1 { v2.16b }, [x8] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -418,9 +416,7 @@ define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf11: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI15_0 -; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] -; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -430,10 +426,10 @@ define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI15_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI15_0 -; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ld1 { v0.16b }, [x8] -; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECKBE-NEXT: ld1 { v2.16b }, [x8] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -446,9 +442,7 @@ define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf12: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI16_0 -; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] -; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -458,10 +452,10 @@ define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI16_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI16_0 -; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ld1 { v0.16b }, [x8] -; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECKBE-NEXT: ld1 { v2.16b }, [x8] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -474,9 +468,7 @@ define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf13: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI17_0 -; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] -; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -486,10 +478,10 @@ define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI17_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI17_0 -; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ld1 { v0.16b }, [x8] -; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECKBE-NEXT: ld1 { v2.16b }, [x8] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -502,9 +494,7 @@ define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf14: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI18_0 -; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] -; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -514,10 +504,10 @@ define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI18_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI18_0 -; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ld1 { v0.16b }, [x8] -; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECKBE-NEXT: ld1 { v2.16b }, [x8] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -530,9 +520,7 @@ define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf15: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI19_0 -; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] -; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -542,10 +530,10 @@ define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI19_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI19_0 -; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ld1 { v0.16b }, [x8] -; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECKBE-NEXT: ld1 { v2.16b }, [x8] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll index b1131f287fe9a..1f81a2e4bbb82 100644 --- a/llvm/test/CodeGen/AArch64/shufflevector.ll +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -33,23 +33,12 @@ define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) { } define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-SD-LABEL: shufflevector_v16i8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: adrp x8, .LCPI1_0 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: shufflevector_v16i8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI1_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: shufflevector_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %c } @@ -64,23 +53,12 @@ define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) { } define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-SD-LABEL: shufflevector_v8i16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: adrp x8, .LCPI3_0 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: shufflevector_v8i16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI3_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: shufflevector_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %c } @@ -237,26 +215,25 @@ define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){ define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){ ; CHECK-SD-LABEL: shufflevector_v32i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 +; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 ; CHECK-SD-NEXT: adrp x9, .LCPI16_1 -; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI16_0] -; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI16_1] -; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b -; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] +; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI16_1] +; CHECK-SD-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-SD-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b +; CHECK-SD-NEXT: mov v0.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v32i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov v3.16b, v0.16b +; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 ; CHECK-GI-NEXT: adrp x9, .LCPI16_0 -; CHECK-GI-NEXT: mov v4.16b, v2.16b ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_1] -; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI16_0] -; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b -; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b +; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI16_0] +; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECK-GI-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b ; CHECK-GI-NEXT: ret %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %c @@ -298,26 +275,25 @@ define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){ define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){ ; CHECK-SD-LABEL: shufflevector_v16i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 +; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: adrp x8, .LCPI18_0 ; CHECK-SD-NEXT: adrp x9, .LCPI18_1 -; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] -; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1] -; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b -; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI18_1] +; CHECK-SD-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-SD-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b +; CHECK-SD-NEXT: mov v0.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v16i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov v3.16b, v0.16b +; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: adrp x8, .LCPI18_1 ; CHECK-GI-NEXT: adrp x9, .LCPI18_0 -; CHECK-GI-NEXT: mov v4.16b, v2.16b ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1] -; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0] -; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b -; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b +; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI18_0] +; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECK-GI-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b ; CHECK-GI-NEXT: ret %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %c @@ -344,10 +320,8 @@ define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK-GI-LABEL: shufflevector_v8i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI20_0 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI20_0] -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v4.16b ; CHECK-GI-NEXT: ret %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -563,23 +537,12 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) { } define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) { -; CHECK-SD-LABEL: shufflevector_v7i16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: adrp x8, .LCPI33_0 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: shufflevector_v7i16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI33_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: shufflevector_v7i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI33_0 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ret %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> ret <7 x i16> %c } @@ -594,9 +557,7 @@ define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) { ; CHECK-GI-LABEL: shufflevector_v3i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI34_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll index e7a6c0d6c549b..52a161ba78525 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll @@ -6,9 +6,7 @@ target triple = "aarch64-linux" define void @add_f16_vg1x2(i32 %slice, %zn0, %zn1) #0 { ; CHECK-LABEL: add_f16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.h[w8, 0, vgx2], { z0.h, z1.h } ; CHECK-NEXT: fadd za.h[w8, 7, vgx2], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -21,11 +19,7 @@ define void @add_f16_vg1x2(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: add_f16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fadd za.h[w8, 0, vgx4], { z0.h - z3.h } ; CHECK-NEXT: fadd za.h[w8, 7, vgx4], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -41,9 +35,7 @@ define void @add_f16_vg1x4(i32 %slice, %zn0, %zn0, %zn1) #1 { ; CHECK-LABEL: sub_f16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fsub za.h[w8, 0, vgx2], { z0.h, z1.h } ; CHECK-NEXT: fsub za.h[w8, 7, vgx2], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -56,11 +48,7 @@ define void @sub_f16_vg1x2(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: sub_f16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fsub za.h[w8, 0, vgx4], { z0.h - z3.h } ; CHECK-NEXT: fsub za.h[w8, 7, vgx4], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -76,9 +64,7 @@ define void @sub_f16_vg1x4(i32 %slice, %zn0, %zn0, %zn1) #2 { ; CHECK-LABEL: add_bf16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfadd za.h[w8, 0, vgx2], { z0.h, z1.h } ; CHECK-NEXT: bfadd za.h[w8, 7, vgx2], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -91,11 +77,7 @@ define void @add_bf16_vg1x2(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: add_bf16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfadd za.h[w8, 0, vgx4], { z0.h - z3.h } ; CHECK-NEXT: bfadd za.h[w8, 7, vgx4], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -111,9 +93,7 @@ define void @add_bf16_vg1x4(i32 %slice, %zn0, %zn0, %zn1) #2 { ; CHECK-LABEL: sub_bf16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfsub za.h[w8, 0, vgx2], { z0.h, z1.h } ; CHECK-NEXT: bfsub za.h[w8, 7, vgx2], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -126,11 +106,7 @@ define void @sub_bf16_vg1x2(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: sub_bf16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfsub za.h[w8, 0, vgx4], { z0.h - z3.h } ; CHECK-NEXT: bfsub za.h[w8, 7, vgx4], { z0.h - z3.h } ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll index ecaf8bccb71fb..402183ab12372 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll @@ -8,9 +8,7 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret @@ -27,9 +25,7 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret @@ -50,11 +46,7 @@ define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret @@ -75,11 +67,7 @@ define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret @@ -128,11 +112,7 @@ define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret @@ -155,15 +135,7 @@ define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret @@ -187,15 +159,7 @@ define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: add za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret @@ -223,9 +187,7 @@ define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, define void @multi_vector_add_za_vg1x2_i32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -238,9 +200,7 @@ define void @multi_vector_add_za_vg1x2_i32(i32 %slice, %zn0, define void @multi_vector_add_za_vg1x2_i64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -253,9 +213,7 @@ define void @multi_vector_add_za_vg1x2_i64(i32 %slice, %zn0, define void @multi_vector_add_za_vg1x2_f32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: fadd za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -270,9 +228,7 @@ define void @multi_vector_add_za_vg1x2_f32(i32 %slice, %zn0 define void @multi_vector_add_za_vg1x2_f64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: fadd za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -289,11 +245,7 @@ define void @multi_vector_add_za_vg1x2_f64(i32 %slice, %zn define void @multi_vector_add_za_vg1x4_i32(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret @@ -310,11 +262,7 @@ define void @multi_vector_add_za_vg1x4_i32(i32 %slice, %zn0, define void @multi_vector_add_za_vg1x4_i64(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: add za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -331,11 +279,7 @@ define void @multi_vector_add_za_vg1x4_i64(i32 %slice, %zn0, define void @multi_vector_add_za_vg1x4_f32(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fadd za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: fadd za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret @@ -352,11 +296,7 @@ define void @multi_vector_add_za_vg1x4_f32(i32 %slice, %zn0 define void @multi_vector_add_za_vg1x4_f64(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fadd za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: fadd za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll index 3a73ff7cdc29c..613fba4a73838 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll @@ -7,8 +7,6 @@ define @multi_vector_cvtn_x2_f16( %zn1, %zn2) { ; CHECK-LABEL: multi_vector_cvtn_x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fcvtn z0.h, { z0.s, z1.s } ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.fcvtn.x2.nxv4f32( %zn1, %zn2) @@ -22,8 +20,6 @@ define @multi_vector_cvtn_x2_f16( %zn1, define @multi_vector_bfcvtn_x2( %zn1, %zn2) { ; CHECK-LABEL: multi_vector_bfcvtn_x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfcvtn z0.h, { z0.s, z1.s } ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.bfcvtn.x2( %zn1, %zn2) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll index 401cdd0b9dfb7..07b10fdc8eeb2 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll @@ -6,9 +6,7 @@ define void @multi_vector_add_single_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret @@ -25,9 +23,7 @@ define void @multi_vector_add_single_vg1x2_s(i32 %slice, %z define void @multi_vector_add_single_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret @@ -44,11 +40,7 @@ define void @multi_vector_add_single_vg1x2_d(i32 %slice, % define void @multi_vector_add_single_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_single_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret @@ -68,11 +60,7 @@ define void @multi_vector_add_single_vg1x4_s(i32 %slice, %z define void @multi_vector_add_single_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_single_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: ret @@ -94,9 +82,7 @@ define void @multi_vector_add_single_vg1x4_d(i32 %slice, % define void @multi_vector_sub_single_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret @@ -113,9 +99,7 @@ define void @multi_vector_sub_single_vg1x2_s(i32 %slice, %z define void @multi_vector_sub_single_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret @@ -132,11 +116,7 @@ define void @multi_vector_sub_single_vg1x2_d(i32 %slice, % define void @multi_vector_sub_single_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_single_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret @@ -156,11 +136,7 @@ define void @multi_vector_sub_single_vg1x4_s(i32 %slice, %z define void @multi_vector_sub_single_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_single_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: ret @@ -182,11 +158,7 @@ define void @multi_vector_sub_single_vg1x4_d(i32 %slice, % define void @multi_vector_add_vg1x2_s(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret @@ -204,11 +176,7 @@ define void @multi_vector_add_vg1x2_s(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret @@ -244,15 +212,7 @@ define void @multi_vector_add_vg1x2_s_regclass(i32 %slice, define void @multi_vector_add_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret @@ -270,15 +230,7 @@ define void @multi_vector_add_vg1x4_s(i32 %slice, %zn0, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret @@ -320,11 +272,7 @@ define void @multi_vector_add_vg1x4_s_regclass(i32 %slice, define void @multi_vector_sub_vg1x2_s(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret @@ -342,11 +290,7 @@ define void @multi_vector_sub_vg1x2_s(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret @@ -364,15 +308,7 @@ define void @multi_vector_sub_vg1x2_d(i32 %slice, %zn0, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret @@ -390,15 +326,7 @@ define void @multi_vector_sub_vg1x4_s(i32 %slice, %zn0, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret @@ -418,9 +346,7 @@ define void @multi_vector_sub_vg1x4_d(i32 %slice, %zn0, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: ret @@ -437,9 +363,7 @@ define void @multi_vector_add_lane_vg1x2_s(i32 %slice, %zn0 define void @multi_vector_add_lane_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: ret @@ -458,8 +382,8 @@ define void @multi_vector_add_lane_vg1x2_s_regclass(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_lane_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3] ; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3] ; CHECK-NEXT: ret @@ -495,11 +415,7 @@ define void @multi_vector_add_lane_vg1x4_s(i32 %slice, %zn0 define void @multi_vector_add_lane_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_lane_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1] ; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1] ; CHECK-NEXT: ret @@ -521,8 +437,8 @@ define void @multi_vector_add_lane_vg1x4_s_regclass(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: ret @@ -559,9 +473,7 @@ define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: ret @@ -578,11 +490,7 @@ define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, %zn define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_lane_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3] ; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3] ; CHECK-NEXT: ret @@ -602,11 +510,7 @@ define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_lane_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1] ; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll index b4fd5a2272e7e..cd8d22441eaa2 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll @@ -26,18 +26,18 @@ define void @fdot_multi_za32_f16_vg1x2(i32 %slice, %unused, < define void @fdot_multi_za32_f16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: fdot_multi_za32_f16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -71,18 +71,18 @@ define void @bfdot_multi_za32_bf16_vg1x2(i32 %slice, %unused, define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: fdot_multi_za32_bf16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -99,9 +99,7 @@ define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, %unused, define void @fdot_single_za32_f16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: fdot_single_za32_f16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: fdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: fdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -114,11 +112,7 @@ define void @fdot_single_za32_f16_vg1x2(i32 %slice, %unused, define void @fdot_single_za32_f16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: fdot_single_za32_f16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -134,9 +128,7 @@ define void @fdot_single_za32_f16_vg1x4(i32 %slice, %unused, define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: bfdot_single_za32_bf16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -149,11 +141,7 @@ define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, %unused define void @bfdot_single_za32_bf16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: bfdot_single_za32_bf16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -170,8 +158,8 @@ define void @fdot_lane_za32_f16_vg1x2(i32 %slice, %unused, %unused, %unused, ; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3] ; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3] ; CHECK-NEXT: ret @@ -222,8 +210,8 @@ define void @bfdot_lane_za32_bf16_vg1x4(i32 %slice, %unused, ; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x4: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z25.d, z2.d ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z24.h - z27.h }, z5.h[3] diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll index 99de6f832a3c9..f144e33793fe8 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll @@ -10,9 +10,7 @@ define void @za_write_vg2_horiz_b(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.b[w12, 0:1], { z0.b, z1.b } ; CHECK-NEXT: mov za0h.b[w12, 14:15], { z0.b, z1.b } ; CHECK-NEXT: ret @@ -25,9 +23,7 @@ define void @za_write_vg2_horiz_b(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -40,9 +36,7 @@ define void @za_write_vg2_horiz_h(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -55,9 +49,7 @@ define void @za_write_vg2_horiz_f16(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -70,9 +62,7 @@ define void @za_write_vg2_horiz_bf16(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -85,9 +75,7 @@ define void @za_write_vg2_horiz_s(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -100,9 +88,7 @@ define void @za_write_vg2_horiz_f32(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 0, i32 %slice, %zn1, %zn2) @@ -112,9 +98,7 @@ define void @za_write_vg2_horiz_d(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 0, i32 %slice, %zn1, %zn2) @@ -126,9 +110,7 @@ define void @za_write_vg2_horiz_f64(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.b[w12, 0:1], { z0.b, z1.b } ; CHECK-NEXT: mov za0v.b[w12, 14:15], { z0.b, z1.b } ; CHECK-NEXT: ret @@ -141,9 +123,7 @@ define void @za_write_vg2_vert_b(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -156,9 +136,7 @@ define void @za_write_vg2_vert_h(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -171,9 +149,7 @@ define void @za_write_vg2_vert_f16(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -186,9 +162,7 @@ define void @za_write_vg2_vert_bf16(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -201,9 +175,7 @@ define void @za_write_vg2_vert_s(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -216,9 +188,7 @@ define void @za_write_vg2_vert_f32(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 0, i32 %slice, %zn1, %zn2) @@ -228,9 +198,7 @@ define void @za_write_vg2_vert_d(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 0, i32 %slice, %zn1, %zn2) @@ -246,11 +214,7 @@ define void @za_write_vg2_vert_f64(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.b[w12, 0:3], { z0.b - z3.b } ; CHECK-NEXT: mov za0h.b[w12, 12:15], { z0.b - z3.b } ; CHECK-NEXT: ret @@ -263,11 +227,7 @@ define void @za_write_vg4_horiz_b(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -280,11 +240,7 @@ define void @za_write_vg4_horiz_h(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -297,11 +253,7 @@ define void @za_write_vg4_horiz_f16(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -314,11 +266,7 @@ define void @za_write_vg4_horiz_bf16(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -328,11 +276,7 @@ define void @za_write_vg4_horiz_s(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -342,11 +286,7 @@ define void @za_write_vg4_horiz_f32(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -356,11 +296,7 @@ define void @za_write_vg4_horiz_d(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -372,11 +308,7 @@ define void @za_write_vg4_horiz_f64(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.b[w12, 0:3], { z0.b - z3.b } ; CHECK-NEXT: mov za0v.b[w12, 12:15], { z0.b - z3.b } ; CHECK-NEXT: ret @@ -389,11 +321,7 @@ define void @za_write_vg4_vert_b(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -406,11 +334,7 @@ define void @za_write_vg4_vert_h(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -423,11 +347,7 @@ define void @za_write_vg4_vert_f16(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -440,11 +360,7 @@ define void @za_write_vg4_vert_bf16(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -454,11 +370,7 @@ define void @za_write_vg4_vert_s(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -468,11 +380,7 @@ define void @za_write_vg4_vert_f32(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -482,11 +390,7 @@ define void @za_write_vg4_vert_d(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -500,9 +404,7 @@ define void @za_write_vg4_vert_f64(i32 %slice, %zn1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -515,9 +417,7 @@ define void @za_write_vg1x2_b(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -530,9 +430,7 @@ define void @za_write_vg1x2_h(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -545,9 +443,7 @@ define void @za_write_vg1x2_f16(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -560,9 +456,7 @@ define void @za_write_vg1x2_bf16(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -575,9 +469,7 @@ define void @za_write_vg1x2_s(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -590,9 +482,7 @@ define void @za_write_vg1x2_f32(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -605,9 +495,7 @@ define void @za_write_vg1x2_d(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -624,11 +512,7 @@ define void @za_write_vg1x2_f64(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -641,11 +525,7 @@ define void @za_write_vg1x4_b(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -658,11 +538,7 @@ define void @za_write_vg1x4_h(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -675,11 +551,7 @@ define void @za_write_vg1x4_f16(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -692,11 +564,7 @@ define void @za_write_vg1x4_bf16(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -709,11 +577,7 @@ define void @za_write_vg1x4_s(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -726,11 +590,7 @@ define void @za_write_vg1x4_f32(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -743,11 +603,7 @@ define void @za_write_vg1x4_d(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll index e154a4df86efe..3ce77cd8e0321 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll @@ -26,18 +26,18 @@ define void @udot_multi_za32_u16_vg1x2(i32 %slice, %unused, < define void @udot_multi_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: udot_multi_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -68,18 +68,18 @@ define void @udot_multi_za32_u8_vg1x2(i32 %slice, %unused, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: udot_multi_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -110,18 +110,18 @@ define void @udot_multi_za64_u16_vg1x2(i32 %slice, %unused, < define void @udot_multi_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: udot_multi_za64_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #1 { call void @llvm.aarch64.sme.udot.za64.vg1x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -152,18 +152,18 @@ define void @usdot_multi_za32_u8_vg1x2(i32 %slice, %unused, < define void @usdot_multi_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: usdot_multi_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -197,18 +197,18 @@ define void @sdot_multi_za32_u16_vg1x2(i32 %slice, %unused, < define void @sdot_multi_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: sdot_multi_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -239,18 +239,18 @@ define void @sdot_multi_za32_u8_vg1x2(i32 %slice, %unused, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: sdot_multi_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -281,18 +281,18 @@ define void @sdot_multi_za64_u16_vg1x2(i32 %slice, %unused, < define void @sdot_multi_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: sdot_multi_za64_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #1 { call void @llvm.aarch64.sme.sdot.za64.vg1x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -309,9 +309,7 @@ define void @sdot_multi_za64_u16_vg1x4(i32 %slice, %unused, < define void @udot_single_za32_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: udot_single_za32_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -324,11 +322,7 @@ define void @udot_single_za32_u16_vg1x2(i32 %slice, %unused, define void @udot_single_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: udot_single_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -341,9 +335,7 @@ define void @udot_single_za32_u16_vg1x4(i32 %slice, %unused, define void @udot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: udot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -356,11 +348,7 @@ define void @udot_single_za32_u8_vg1x2(i32 %slice, %unused, < define void @udot_single_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: udot_single_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -373,9 +361,7 @@ define void @udot_single_za32_u8_vg1x4(i32 %slice, %unused, < define void @udot_single_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: udot_single_za64_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: udot za.d[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: udot za.d[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -388,11 +374,7 @@ define void @udot_single_za64_u16_vg1x2(i32 %slice, %unused, define void @udot_single_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #1 { ; CHECK-LABEL: udot_single_za64_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -405,9 +387,7 @@ define void @udot_single_za64_u16_vg1x4(i32 %slice, %unused, define void @usdot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: usdot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: usdot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -420,11 +400,7 @@ define void @usdot_single_za32_u8_vg1x2(i32 %slice, %unused, define void @usdot_single_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: usdot_single_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -440,9 +416,7 @@ define void @usdot_single_za32_u8_vg1x4(i32 %slice, %unused, define void @sdot_single_za32_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sdot_single_za32_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -455,11 +429,7 @@ define void @sdot_single_za32_u16_vg1x2(i32 %slice, %unused, define void @sdot_single_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: sdot_single_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -472,9 +442,7 @@ define void @sdot_single_za32_u16_vg1x4(i32 %slice, %unused, define void @sdot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sdot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -487,11 +455,7 @@ define void @sdot_single_za32_u8_vg1x2(i32 %slice, %unused, < define void @sdot_single_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: sdot_single_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -504,9 +468,7 @@ define void @sdot_single_za32_u8_vg1x4(i32 %slice, %unused, < define void @sdot_single_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: sdot_single_za64_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sdot za.d[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: sdot za.d[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -519,11 +481,7 @@ define void @sdot_single_za64_u16_vg1x2(i32 %slice, %unused, define void @sdot_single_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #1 { ; CHECK-LABEL: sdot_single_za64_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -536,9 +494,7 @@ define void @sdot_single_za64_u16_vg1x4(i32 %slice, %unused, define void @sudot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sudot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: sudot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -551,11 +507,7 @@ define void @sudot_single_za32_u8_vg1x2(i32 %slice, %unused, define void @sudot_single_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: sudot_single_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: sudot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -571,8 +523,8 @@ define void @udot_lane_za32_u16_vg1x2(i32 %slice, %unused, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: udot_lane_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[3] ; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z0.h - z3.h }, z4.h[3] ; CHECK-NEXT: ret @@ -605,8 +553,8 @@ define void @udot_lane_za32_u8_vg1x2(i32 %slice, %unused, %unused, %unused, , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , } %1, 0 + %3 = extractvalue { , } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { , } %4, 0 + %6 = extractvalue { , } %4, 1 + tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 0, %2, %5, undef, i32 0) + tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 0, %3, %6, undef, i32 0) + ret void +} + +define void @udot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: udot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} + define void @udot_lane_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: udot_lane_za64_u16_vg1x2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: udot za.d[w8, 0, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: udot za.d[w8, 7, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: ret @@ -654,8 +697,8 @@ define void @udot_lane_za64_u16_vg1x4(i32 %slice, %unused, %unused, %unused, %unused, , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , } %1, 0 + %3 = extractvalue { , } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { , } %4, 0 + %6 = extractvalue { , } %4, 1 + tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 0, %2, %5, undef, i32 0) + tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 0, %3, %6, undef, i32 0) + ret void +} + +define void @usdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: usdot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} ; == Multi, indexed (signed) == @@ -710,8 +847,8 @@ define void @sdot_lane_za32_u16_vg1x2(i32 %slice, %unused, %unused, %unused, %unused, %unused, , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , } %1, 0 + %3 = extractvalue { , } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { , } %4, 0 + %6 = extractvalue { , } %4, 1 + tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 0, %2, %5, undef, i32 0) + tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 0, %3, %6, undef, i32 0) + ret void +} + +define void @sdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: sdot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} + define void @sdot_lane_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: sdot_lane_za64_u16_vg1x2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: sdot za.d[w8, 0, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: sdot za.d[w8, 7, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: ret @@ -793,8 +1025,8 @@ define void @sdot_lane_za64_u16_vg1x4(i32 %slice, %unused, %unused, %unused, %unused, , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , } %1, 0 + %3 = extractvalue { , } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { , } %4, 0 + %6 = extractvalue { , } %4, 1 + tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 0, %2, %5, undef, i32 0) + tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 0, %3, %6, undef, i32 0) + ret void +} + +define void @sudot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: sudot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} -attributes #0 = { nounwind "target-features"="+sme2" } -attributes #1 = { nounwind "target-features"="+sme2,+sme-i16i64" } +attributes #0 = { nounwind "target-features"="+sme2" "aarch64_pstate_sm_enabled" } +attributes #1 = { nounwind "target-features"="+sme2,+sme-i16i64" "aarch64_pstate_sm_enabled" } ; == Multi, multi (unsigned) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll index 79db677853cb5..3616e074d408e 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll @@ -114,8 +114,6 @@ define { , } @multi_vec_max_single_x2_u64(< define { , } @multi_vec_max_single_x2_bf16( %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmax { z0.h, z1.h }, { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) @@ -316,10 +314,6 @@ define { , , , , , , } @multi_vec_max_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_max_single_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmax { z0.h - z3.h }, { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) @@ -511,10 +505,6 @@ define { , } @multi_vec_max_multi_x2_u64(, } @multi_vec_max_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: bfmax { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) @@ -573,20 +563,20 @@ define { , } @multi_vec_max_multi_x2_ define { , , , } @multi_vec_max_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -598,20 +588,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -623,20 +613,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_s32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -648,20 +638,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_s64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -675,20 +665,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_u8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -700,20 +690,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_u16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -725,20 +715,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_u32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_u32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -750,20 +740,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_u64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_u64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -777,14 +767,6 @@ define { , , , , , , } @multi_vec_max_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_max_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: bfmax { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) @@ -796,20 +778,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_f16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -821,20 +803,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_f32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -846,20 +828,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_f64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -873,8 +855,6 @@ define { , , , , } @multi_vec_maxnm_single_x2_bf16( %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_maxnm_single_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) @@ -927,10 +907,6 @@ define { , } @multi_vec_maxnm_single define { , , , } @multi_vec_maxnm_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_maxnm_single_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) @@ -998,10 +974,6 @@ define { , , , , } @multi_vec_maxnm_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_maxnm_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) @@ -1060,14 +1032,6 @@ define { , } @multi_vec_maxnm_x2_f64( define { , , , } @multi_vec_maxnm_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_maxnm_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) @@ -1079,20 +1043,20 @@ define { , , , , , , } @multi_vec_maxnm_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_maxnm_x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmaxnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -1103,20 +1067,20 @@ define { , , , , , , } @multi_vec_maxnm_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_maxnm_x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmaxnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -1127,20 +1091,20 @@ define { , , , , , , } @multi_vec_maxnm_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_maxnm_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmaxnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll index e5c36d42fb135..58a0989f25d82 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll @@ -114,8 +114,6 @@ define { , } @multi_vec_min_single_x2_u64(< define { , } @multi_vec_min_single_x2_bf16( %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmin { z0.h, z1.h }, { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) @@ -316,10 +314,6 @@ define { , , , , , , } @multi_vec_min_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_min_single_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmin { z0.h - z3.h }, { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) @@ -511,10 +505,6 @@ define { , } @multi_vec_min_multi_x2_u64(, } @multi_vec_min_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: bfmin { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) @@ -573,20 +563,20 @@ define { , } @multi_vec_min_multi_x2_ define { , , , } @multi_vec_min_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -598,20 +588,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -623,20 +613,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_s32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -648,20 +638,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_s64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -675,20 +665,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_u8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -700,20 +690,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_u16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -725,20 +715,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_u32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_u32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -750,20 +740,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_u64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_u64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -778,14 +768,6 @@ define { , , , , , , } @multi_vec_min_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_min_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: bfmin { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) @@ -797,20 +779,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_f16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -822,20 +804,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_f32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -847,20 +829,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_f64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -874,8 +856,6 @@ define { , , , , } @multi_vec_minnm_single_x2_bf16( %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_minnm_single_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) @@ -928,10 +908,6 @@ define { , } @multi_vec_minnm_single define { , , , } @multi_vec_minnm_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_minnm_single_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) @@ -999,10 +975,6 @@ define { , , , , } @multi_vec_minnm_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_minnm_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) @@ -1061,14 +1033,6 @@ define { , } @multi_vec_minnm_x2_f64( define { , , , } @multi_vec_minnm_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_minnm_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) @@ -1080,20 +1044,20 @@ define { , , , , , , } @multi_vec_minnm_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_minnm_x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fminnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fminnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -1104,20 +1068,20 @@ define { , , , , , , } @multi_vec_minnm_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_minnm_x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fminnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fminnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -1128,20 +1092,20 @@ define { , , , , , , } @multi_vec_minnm_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_minnm_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fminnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fminnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll index 346afc611eb75..e5e3da05edced 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll @@ -38,9 +38,7 @@ define void @multi_vector_mul_add_single_long_vg4x1_s16(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: smlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -53,9 +51,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: smlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -70,11 +66,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -87,11 +79,7 @@ define void @multi_vector_mul_add_single_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -142,18 +130,18 @@ define void @multi_vector_mul_add_multi_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -164,18 +152,18 @@ define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -217,8 +205,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: umlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -329,9 +315,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: umlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -346,11 +330,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -363,11 +343,7 @@ define void @multi_vector_mul_add_single_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -418,18 +394,18 @@ define void @multi_vector_mul_add_multi_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -440,18 +416,18 @@ define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -493,8 +469,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -605,9 +579,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -622,11 +594,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -639,11 +607,7 @@ define void @multi_vector_mul_sub_single_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -694,18 +658,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -716,18 +680,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -769,8 +733,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -881,9 +843,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -898,11 +858,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -915,11 +871,7 @@ define void @multi_vector_mul_sub_single_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -970,18 +922,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -992,18 +944,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -1045,8 +997,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sumlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: sumlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -1133,11 +1083,7 @@ define void @multi_vector_mul_add_single_signed_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sumlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: sumlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -1168,8 +1114,8 @@ define void @multi_vector_mul_add_lane_signed_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -1237,11 +1181,7 @@ define void @multi_vector_mul_add_single_unsigned_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -1275,18 +1215,18 @@ define void @multi_vector_mul_add_multi_unsigned_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_unsigned_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -1315,8 +1255,8 @@ define void @multi_vector_mul_add_lane_unsigned_long_vg4x2_s8(i32 %slice, %z define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -135,9 +133,7 @@ define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -150,9 +146,7 @@ define void @multi_vector_add_single_vg2x2_f16(i32 %slice, % define void @multi_vector_add_single_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -165,9 +159,7 @@ define void @multi_vector_add_single_vg2x2_s16(i32 %slice, %z define void @multi_vector_add_single_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -184,9 +176,7 @@ define void @multi_vector_add_single_vg2x2_u16(i32 %slice, %z define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -199,9 +189,7 @@ define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -214,9 +202,7 @@ define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, % define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -229,9 +215,7 @@ define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, %z define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -248,11 +232,7 @@ define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, %z define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -269,11 +249,8 @@ define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -290,11 +267,7 @@ define void @multi_vector_add_single_vg2x4_f16(i32 %slice, % define void @multi_vector_add_single_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -311,11 +284,7 @@ define void @multi_vector_add_single_vg2x4_s16(i32 %slice, %z define void @multi_vector_add_single_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -336,11 +305,7 @@ define void @multi_vector_add_single_vg2x4_u16(i32 %slice, %z define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -357,11 +322,7 @@ define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -378,11 +339,7 @@ define void @multi_vector_sub_single_vg2x4_f16(i32 %slice, % define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -399,11 +356,7 @@ define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, %z define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -424,11 +377,7 @@ define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, %z define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -443,11 +392,7 @@ define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -462,11 +407,7 @@ define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, %z define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -481,11 +422,7 @@ define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, %zn define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -504,11 +441,7 @@ define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, %zn define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -523,11 +456,7 @@ define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -542,11 +471,7 @@ define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, %z define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -561,11 +486,7 @@ define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, %zn define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -584,15 +505,7 @@ define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, %zn define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_multi_vg2x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -610,15 +523,7 @@ define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_multi_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -636,15 +541,7 @@ define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, %z define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_multi_vg2x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -662,15 +559,7 @@ define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, %zn define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_multi_vg2x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -692,15 +581,7 @@ define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, %zn define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -718,15 +599,7 @@ define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -744,15 +617,7 @@ define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, %z define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -770,15 +635,7 @@ define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, %zn define void @multi_vector_sub_multi_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -912,9 +769,7 @@ define void @multi_vector_sub_lane_vg2x1_u16(i32 %slice, %zn, define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -929,9 +784,7 @@ define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, %zn define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -946,9 +799,7 @@ define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -963,9 +814,7 @@ define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, %zn0 define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -984,9 +833,7 @@ define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -1001,9 +848,7 @@ define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, %zn define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -1018,9 +863,7 @@ define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -1035,9 +878,7 @@ define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -1056,11 +897,7 @@ define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, %zn0 define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1077,11 +914,7 @@ define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, %zn define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1098,11 +931,7 @@ define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1119,11 +948,7 @@ define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, %zn0 define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1144,11 +969,7 @@ define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1165,11 +986,7 @@ define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, %zn define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1186,11 +1003,7 @@ define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1207,11 +1020,7 @@ define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll index 12a940ff03e29..b95a774e899c8 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll @@ -324,20 +324,20 @@ define { , } @multi_vec_rounding_shl_x2_s64 define { , , , } @multi_vec_rounding_shl_x4_s8( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: srshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, @@ -348,20 +348,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_s16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: srshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -372,20 +372,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_s32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_s32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: srshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -396,20 +396,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_s64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_s64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: srshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, @@ -484,20 +484,20 @@ define { , } @multi_vec_rounding_uhl_x2_u64 define { , , , } @multi_vec_rounding_shl_x4_u8( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: urshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, @@ -508,20 +508,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_u16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: urshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -532,20 +532,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_u32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_u32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: urshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -556,20 +556,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_u64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_u64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: urshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll index f41791e626f5f..07a5f7993a1cb 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll @@ -8,7 +8,6 @@ define @test_tileslice_no_add(i32 %idx) #0 { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 ; CHECK-NEXT: ret entry: %read = call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %idx) @@ -21,7 +20,6 @@ define @test_tileslice_add_nonconstant(i32 %idx1, i32 %idx2) ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: add w8, w0, w1 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 ; CHECK-NEXT: ret entry: %add = add i32 %idx1, %idx2 diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll index e71afe213d8a5..68ae92bc68f4b 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll @@ -196,20 +196,20 @@ define { , } @multi_vec_sat_double_mulh_mul define { , , , } @multi_vec_sat_double_mulh_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -221,20 +221,20 @@ define { , , , , , , } @multi_vec_sat_double_mulh_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -246,20 +246,20 @@ define { , , , , , , } @multi_vec_sat_double_mulh_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -271,20 +271,20 @@ define { , , , , , , } @multi_vec_sat_double_mulh_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll index da8c679d5a39a..46409a0a80b78 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll @@ -8,9 +8,7 @@ define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret @@ -27,9 +25,7 @@ define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret @@ -50,11 +46,7 @@ define void @multi_vector_sub_write_single_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: sub za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret @@ -75,11 +67,7 @@ define void @multi_vector_sub_write_single_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret @@ -128,11 +112,7 @@ define void @multi_vector_sub_write_za_vg1x2_i32(i32 %slice, define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret @@ -155,15 +135,7 @@ define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret @@ -187,15 +159,7 @@ define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret @@ -225,9 +189,7 @@ define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -240,9 +202,7 @@ define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, %zn0, define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -255,9 +215,7 @@ define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, %zn0, define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fsub za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: fsub za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -272,9 +230,7 @@ define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, %zn0 define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fsub za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: fsub za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -291,11 +247,7 @@ define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, %zn define void @multi_vector_sub_za_vg1x4_i32(i32 %slice, ; CHECK-LABEL: multi_vector_sub_za_vg1x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret @@ -314,11 +266,7 @@ define void @multi_vector_sub_za_vg1x4_i32(i32 %slice, define void @multi_vector_sub_za_vg1x4_i64(i32 %slice, ; CHECK-LABEL: multi_vector_sub_za_vg1x4_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -337,11 +285,7 @@ define void @multi_vector_sub_za_vg1x4_i64(i32 %slice, define void @multi_vector_sub_za_vg1x4_f32(i32 %slice, ; CHECK-LABEL: multi_vector_sub_za_vg1x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fsub za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: fsub za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret @@ -360,11 +304,7 @@ define void @multi_vector_sub_za_vg1x4_f32(i32 %slice, define void @multi_vector_sub_za_vg1x4_f64(i32 %slice, ; CHECK-LABEL: multi_vector_sub_za_vg1x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fsub za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: fsub za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll index b698b60007eb9..f552c9e604bdd 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll @@ -1,15 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+sme-i16i64 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck %s +target triple="aarch64-linux-gnu" ; == FVDOT == -define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, %zn1, %zn2, %zm) { +define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, %zn1, %zn2, %zm) #0 { ; CHECK-LABEL: test_fvdot_lane_za32_vg1x2_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: fvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: ret @@ -22,12 +21,10 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, ; == BFVDOT == -define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, %zn1, %zn2, %zm) { +define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, %zn1, %zn2, %zm) #0 { ; CHECK-LABEL: test_fvdot_lane_za32_vg1x2_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: bfvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: ret @@ -40,12 +37,10 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, %zn1, %zn2, %zm) { +define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, %zn1, %zn2, %zm) #0 { ; CHECK-LABEL: test_svdot_lane_za32_vg1x2_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: svdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: ret @@ -55,14 +50,10 @@ define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, % ret void } -define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { +define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #0 { ; CHECK-LABEL: test_svdot_lane_za32_vg1x4_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: svdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: ret @@ -72,14 +63,10 @@ define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, % ret void } -define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { +define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #1 { ; CHECK-LABEL: test_svdot_lane_za64_vg1x4_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: svdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z4.h[1] ; CHECK-NEXT: svdot za.d[w8, 7, vgx4], { z0.h - z3.h }, z4.h[1] ; CHECK-NEXT: ret @@ -89,15 +76,108 @@ define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, % ret void } +define void @svdot_form_2x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: svdot_form_2x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: add x9, x0, x1 +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1h { z16.h, z24.h }, pn8/z, [x0] +; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x9] +; CHECK-NEXT: mov z2.d, z16.d +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z2.h, z3.h }, z0.h[0] +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , } %1, 0 + %3 = extractvalue { , } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { , } %4, 0 + %6 = extractvalue { , } %4, 1 + tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 0, %2, %5, undef, i32 0) + tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 0, %3, %6, undef, i32 0) + ret void +} + +define void @svdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: svdot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} ; == UVDOT == -define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, %zn1, %zn2, %zm) { +define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, %zn1, %zn2, %zm) #0 { ; CHECK-LABEL: test_uvdot_lane_za32_vg1x2_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: uvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: ret @@ -107,14 +187,10 @@ define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, % ret void } -define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { +define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #0 { ; CHECK-LABEL: test_uvdot_lane_za32_vg1x4_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: uvdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: ret @@ -124,14 +200,10 @@ define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, % ret void } -define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { +define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #1 { ; CHECK-LABEL: test_uvdot_lane_za64_vg1x4_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uvdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z4.h[1] ; CHECK-NEXT: uvdot za.d[w8, 7, vgx4], { z0.h - z3.h }, z4.h[1] ; CHECK-NEXT: ret @@ -141,17 +213,108 @@ define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, % ret void } +define void @uvdot_form_2x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: uvdot_form_2x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: add x9, x0, x1 +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1h { z16.h, z24.h }, pn8/z, [x0] +; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x9] +; CHECK-NEXT: mov z2.d, z16.d +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z2.h, z3.h }, z0.h[0] +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , } %1, 0 + %3 = extractvalue { , } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { , } %4, 0 + %6 = extractvalue { , } %4, 1 + tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 0, %2, %5, undef, i32 0) + tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 0, %3, %6, undef, i32 0) + ret void +} + +define void @uvdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: uvdot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} ; == SUVDOT == -define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { +define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #0 { ; CHECK-LABEL: test_suvdot_lane_za32_vg1x4_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: suvdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: ret @@ -161,17 +324,80 @@ define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, ret void } +define void @suvdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: suvdot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} ; == USVDOT == -define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { +define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #0 { ; CHECK-LABEL: test_usvdot_lane_za32_vg1x4_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: usvdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: ret @@ -181,6 +407,76 @@ define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, ret void } +define void @usvdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: usvdot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} + +attributes #0 = { nounwind "target-features"="+sme2" "aarch64_pstate_sm_enabled" } +attributes #1 = { nounwind "target-features"="+sme2,+sme-i16i64" "aarch64_pstate_sm_enabled" } ; == FVDOT == declare void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8f16(i32, , , , i32) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll index fb169491b0c90..6895d1854e87d 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll @@ -13,7 +13,7 @@ define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) v ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #16 +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 ; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x0] ; CHECK-NEXT: ret %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll index 8882fc9290386..0ecf1b1a98834 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll @@ -15,8 +15,6 @@ define void @st2b_i8_valid_imm( %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_valid_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 2, i64 0 @@ -30,9 +28,7 @@ define void @st2b_i8_valid_imm( %v0, %v1, < define void @st2b_i8_invalid_imm_not_multiple_of_2( %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: rdvl x8, #3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 3, i64 0 @@ -46,9 +42,7 @@ define void @st2b_i8_invalid_imm_not_multiple_of_2( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: rdvl x8, #-18 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -18, i64 0 @@ -62,9 +56,7 @@ define void @st2b_i8_invalid_imm_out_of_lower_bound( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: rdvl x8, #16 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 16, i64 0 @@ -78,8 +70,6 @@ define void @st2b_i8_invalid_imm_out_of_upper_bound( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_valid_imm_lower_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -16, i64 0 @@ -93,8 +83,6 @@ define void @st2b_i8_valid_imm_lower_bound( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_valid_imm_upper_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 14, i64 0 @@ -112,8 +100,6 @@ define void @st2b_i8_valid_imm_upper_bound( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 2, i64 0 @@ -127,8 +113,6 @@ define void @st2h_i16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 2, i64 0 @@ -146,8 +130,6 @@ define void @st2h_f16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, #4, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 4, i64 0 @@ -161,8 +143,6 @@ define void @st2w_i32( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, #6, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 6, i64 0 @@ -180,8 +160,6 @@ define void @st2w_f32( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, #8, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 8, i64 0 @@ -195,8 +173,6 @@ define void @st2d_i64( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, #10, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 10, i64 0 @@ -214,9 +190,6 @@ define void @st2d_f64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_valid_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #3, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 3, i64 0 @@ -231,10 +204,7 @@ define void @st3b_i8_valid_imm( %v0, %v1, < define void @st3b_i8_invalid_imm_not_multiple_of_3_01( %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: rdvl x8, #4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 4, i64 0 @@ -249,10 +219,7 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_01( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: rdvl x8, #5 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 5, i64 0 @@ -267,10 +234,7 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_02( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: rdvl x8, #-27 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -27, i64 0 @@ -285,10 +249,7 @@ define void @st3b_i8_invalid_imm_out_of_lower_bound( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: rdvl x8, #24 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 24, i64 0 @@ -303,9 +264,6 @@ define void @st3b_i8_invalid_imm_out_of_upper_bound( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_valid_imm_lower_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -24, i64 0 @@ -320,9 +278,6 @@ define void @st3b_i8_valid_imm_lower_bound( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_valid_imm_upper_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21, i64 0 @@ -341,9 +296,6 @@ define void @st3b_i8_valid_imm_upper_bound( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #6, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 6, i64 0 @@ -358,9 +310,6 @@ define void @st3h_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #9, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 9, i64 0 @@ -379,9 +328,6 @@ define void @st3h_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #12, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 12, i64 0 @@ -396,9 +342,6 @@ define void @st3w_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #15, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 15, i64 0 @@ -417,9 +360,6 @@ define void @st3w_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #18, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 18, i64 0 @@ -434,9 +374,6 @@ define void @st3d_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #-3, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -3, i64 0 @@ -455,10 +392,6 @@ define void @st3d_f64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_valid_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #4, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 4, i64 0 @@ -474,11 +407,7 @@ define void @st4b_i8_valid_imm( %v0, %v1, < define void @st4b_i8_invalid_imm_not_multiple_of_4_01( %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: rdvl x8, #5 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 5, i64 0 @@ -494,11 +423,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_01( %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: rdvl x8, #6 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 6, i64 0 @@ -514,11 +439,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_02( %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: rdvl x8, #7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 7, i64 0 @@ -536,12 +457,8 @@ define void @st4b_i8_invalid_imm_out_of_lower_bound( %v0, %v0, %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_valid_imm_lower_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -32, i64 0 @@ -607,10 +516,6 @@ define void @st4b_i8_valid_imm_lower_bound( %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_valid_imm_upper_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28, i64 0 @@ -630,10 +535,6 @@ define void @st4b_i8_valid_imm_upper_bound( %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #8, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 8, i64 0 @@ -649,10 +550,6 @@ define void @st4h_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #12, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 12, i64 0 @@ -672,10 +569,6 @@ define void @st4h_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 16, i64 0 @@ -691,10 +584,6 @@ define void @st4w_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #20, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 20, i64 0 @@ -714,10 +603,6 @@ define void @st4w_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 24, i64 0 @@ -733,10 +618,6 @@ define void @st4d_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28, i64 0 diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll index d6ee787a23f87..d7b7e59548003 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll @@ -9,8 +9,6 @@ define void @st2b_i8( %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2b_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x1] ; CHECK-NEXT: ret %1 = getelementptr i8, ptr %addr, i64 %offset @@ -28,8 +26,6 @@ define void @st2b_i8( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr i16, ptr %addr, i64 %offset @@ -43,8 +39,6 @@ define void @st2h_i16( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr half, ptr %addr, i64 %offset @@ -62,8 +56,6 @@ define void @st2h_f16( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr i32, ptr %addr, i64 %offset @@ -77,8 +69,6 @@ define void @st2w_i32( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr float, ptr %addr, i64 %offset @@ -96,8 +86,6 @@ define void @st2w_f32( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr i64, ptr %addr, i64 %offset @@ -111,8 +99,6 @@ define void @st2d_i64( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr double, ptr %addr, i64 %offset @@ -130,9 +116,6 @@ define void @st2d_f64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3b_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x1] ; CHECK-NEXT: ret %1 = getelementptr i8, ptr %addr, i64 %offset @@ -151,9 +134,6 @@ define void @st3b_i8( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr i16, ptr %addr, i64 %offset @@ -168,9 +148,6 @@ define void @st3h_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr half, ptr %addr, i64 %offset @@ -189,9 +166,6 @@ define void @st3h_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr i32, ptr %addr, i64 %offset @@ -206,9 +180,6 @@ define void @st3w_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr float, ptr %addr, i64 %offset @@ -227,9 +198,6 @@ define void @st3w_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr i64, ptr %addr, i64 %offset @@ -244,9 +212,6 @@ define void @st3d_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr double, ptr %addr, i64 %offset @@ -265,10 +230,6 @@ define void @st3d_f64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4b_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x1] ; CHECK-NEXT: ret %1 = getelementptr i8, ptr %addr, i64 %offset @@ -288,10 +249,6 @@ define void @st4b_i8( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr i16, ptr %addr, i64 %offset @@ -307,10 +264,6 @@ define void @st4h_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr half, ptr %addr, i64 %offset @@ -330,10 +283,6 @@ define void @st4h_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr i32, ptr %addr, i64 %offset @@ -349,10 +298,6 @@ define void @st4w_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr float, ptr %addr, i64 %offset @@ -372,10 +317,6 @@ define void @st4w_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr i64, ptr %addr, i64 %offset @@ -391,10 +332,6 @@ define void @st4d_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr double, ptr %addr, i64 %offset diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll index d07fd8785121b..e03d4379d0ee2 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -9,8 +9,6 @@ define void @st2b_i8( %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv16i8( %v0, @@ -27,8 +25,6 @@ define void @st2b_i8( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv8i16( %v0, @@ -41,8 +37,6 @@ define void @st2h_i16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv8f16( %v0, @@ -55,8 +49,6 @@ define void @st2h_f16( %v0, %v1, %v0, %v1, %pred, ptr %addr) #0 { ; CHECK-LABEL: st2h_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv8bf16( %v0, @@ -73,8 +65,6 @@ define void @st2h_bf16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv4i32( %v0, @@ -87,8 +77,6 @@ define void @st2w_i32( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv4f32( %v0, @@ -105,8 +93,6 @@ define void @st2w_f32( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv2i64( %v0, @@ -119,8 +105,6 @@ define void @st2d_i64( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv2f64( %v0, @@ -133,8 +117,6 @@ define void @st2d_f64( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_ptr: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv2p0( %v0, @@ -151,9 +133,6 @@ define void @st2d_ptr( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv16i8( %v0, @@ -171,9 +150,6 @@ define void @st3b_i8( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv8i16( %v0, @@ -187,9 +163,6 @@ define void @st3h_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv8f16( %v0, @@ -203,9 +176,6 @@ define void @st3h_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) #0 { ; CHECK-LABEL: st3h_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv8bf16( %v0, @@ -223,9 +193,6 @@ define void @st3h_bf16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv4i32( %v0, @@ -239,9 +206,6 @@ define void @st3w_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv4f32( %v0, @@ -259,9 +223,6 @@ define void @st3w_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv2i64( %v0, @@ -275,9 +236,6 @@ define void @st3d_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv2f64( %v0, @@ -291,9 +249,6 @@ define void @st3d_f64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_ptr: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv2p0( %v0, @@ -311,10 +266,6 @@ define void @st3d_ptr( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv16i8( %v0, @@ -333,10 +284,6 @@ define void @st4b_i8( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv8i16( %v0, @@ -351,10 +298,6 @@ define void @st4h_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv8f16( %v0, @@ -369,10 +312,6 @@ define void @st4h_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) #0 { ; CHECK-LABEL: st4h_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv8bf16( %v0, @@ -391,10 +330,6 @@ define void @st4h_bf16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv4i32( %v0, @@ -409,10 +344,6 @@ define void @st4w_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv4f32( %v0, @@ -431,10 +362,6 @@ define void @st4w_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv2i64( %v0, @@ -449,10 +376,6 @@ define void @st4d_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv2f64( %v0, @@ -467,10 +390,6 @@ define void @st4d_f64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_ptr: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv2p0( %v0, diff --git a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll index 47758893ce711..f6330c613de84 100644 --- a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s %complex = type { { double, double } } @@ -10,11 +11,13 @@ declare double @llvm.aarch64.sve.faddv.nxv2f64(, %pred, ptr %inptr) { ; CHECK-LABEL: foo1: -; CHECK: ld2d { z0.d, z1.d }, p0/z, [x1] -; CHECK-NEXT: faddv d2, p0, z0.d -; CHECK-NEXT: faddv d0, p0, z1.d -; CHECK-NEXT: mov v2.d[1], v0.d[0] -; CHECK-NEXT: str q2, [x0] +; CHECK: // %bb.0: +; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x1] +; CHECK-NEXT: faddv d0, p0, z0.d +; CHECK-NEXT: faddv d1, p0, z1.d +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret %imagp = getelementptr inbounds %complex, ptr %outval, i64 0, i32 0, i32 1 %1 = call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( %pred, ptr nonnull %inptr) %2 = extractvalue { , } %1, 0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll index 66d544d0acbf5..4e52258e8b5df 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll @@ -21,9 +21,9 @@ define void @alloc_v4i8(ptr %st_ptr) nounwind { ; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x20] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mov z2.b, z0.b[1] +; CHECK-NEXT: mov z1.b, z0.b[1] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: stp w8, w9, [sp, #8] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: st1b { z0.s }, p0, [x19] @@ -198,9 +198,9 @@ define void @alloc_v8f64(ptr %st_ptr) nounwind { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x20] -; CHECK-NEXT: ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3] +; CHECK-NEXT: ld2d { z1.d, z2.d }, p0/z, [x20, x8, lsl #3] ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-NEXT: stp q0, q2, [x19] +; CHECK-NEXT: stp q0, q1, [x19] ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll index b66e6d9013573..dd27097d8bdf7 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll @@ -38,8 +38,6 @@ define void @interleave_store_without_splat(ptr %a, <4 x i32> %v1, <4 x i32> %v2 ; CHECK-LABEL: interleave_store_without_splat: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0] ; CHECK-NEXT: ret ; @@ -75,13 +73,12 @@ define void @interleave_store_legalization(ptr %a, <8 x i32> %v1, <8 x i32> %v2) ; CHECK-LABEL: interleave_store_legalization: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: // kill: def $q3 killed $q3 def $z2_z3 -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov x8, #8 // =0x8 +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: st2w { z4.s, z5.s }, p0, [x0] -; CHECK-NEXT: st2w { z2.s, z3.s }, p0, [x0, x8, lsl #2] +; CHECK-NEXT: st2w { z1.s, z2.s }, p0, [x0, x8, lsl #2] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: interleave_store_legalization: diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll index 9fd1eb616c28c..b200eb3f23bf2 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll @@ -8,9 +8,8 @@ define @tbl2_b( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.b, { z1.b, z2.b }, z3.b +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.b, { z0.b, z1.b }, z3.b ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv16i8( %a, %b, @@ -21,9 +20,8 @@ define @tbl2_b( %a, %unu define @tbl2_h( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.h, { z0.h, z1.h }, z3.h ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv8i16( %a, %b, @@ -34,9 +32,8 @@ define @tbl2_h( %a, %unu define @tbl2_s( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.s, { z1.s, z2.s }, z3.s +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.s, { z0.s, z1.s }, z3.s ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv4i32( %a, %b, @@ -47,9 +44,8 @@ define @tbl2_s( %a, %unu define @tbl2_d( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.d, { z1.d, z2.d }, z3.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.d, { z0.d, z1.d }, z3.d ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv2i64( %a, %b, @@ -60,9 +56,8 @@ define @tbl2_d( %a, %unu define @tbl2_fh( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_fh: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.h, { z0.h, z1.h }, z3.h ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv8f16( %a, %b, @@ -73,9 +68,8 @@ define @tbl2_fh( %a, define @tbl2_bf16( %a, %unused, %b, %c) #0 { ; CHECK-LABEL: tbl2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.h, { z0.h, z1.h }, z3.h ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv8bf16( %a, %b, @@ -86,9 +80,8 @@ define @tbl2_bf16( %a, @tbl2_fs( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_fs: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.s, { z1.s, z2.s }, z3.s +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.s, { z0.s, z1.s }, z3.s ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv4f32( %a, %b, @@ -99,9 +92,8 @@ define @tbl2_fs( %a, @tbl2_fd( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_fd: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.d, { z1.d, z2.d }, z3.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.d, { z0.d, z1.d }, z3.d ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv2f64( %a, %b, diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll index 7934f831a7e62..5eeca5fec16f1 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll @@ -15,8 +15,6 @@ declare @llvm.aarch64.sve.fclamp.nxv8bf16(, } @test_bfclamp_single_x2_f16( %a, %b, %c, %d){ ; CHECK-LABEL: test_bfclamp_single_x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfclamp { z0.h, z1.h }, z2.h, z3.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( %a, %b, %c, %d) @@ -26,10 +24,6 @@ define { , } @test_bfclamp_single_x2_ define { , , , } @test_bfclamp_single_x4_f16( %a, %b, %c, %d, %e, %f){ ; CHECK-LABEL: test_bfclamp_single_x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfclamp { z0.h - z3.h }, z4.h, z5.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( %a, %b, %c, %d, %e, %f) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll index df6b34a3280a7..90a4927cfa5e9 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll @@ -33,8 +33,6 @@ define @test_fclamp_f64( %a, , } @test_fclamp_single_x2_f16( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_fclamp_single_x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fclamp { z0.h, z1.h }, z2.h, z3.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( %a, %b, %c, %d) @@ -44,8 +42,6 @@ define { , } @test_fclamp_single_x2_f16(< define { , } @test_fclamp_single_x2_f32( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_fclamp_single_x2_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fclamp { z0.s, z1.s }, z2.s, z3.s ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( %a, %b, %c, %d) @@ -55,8 +51,6 @@ define { , } @test_fclamp_single_x2_f32 define { , } @test_fclamp_single_x2_f64( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_fclamp_single_x2_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fclamp { z0.d, z1.d }, z2.d, z3.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( %a, %b, %c, %d) @@ -67,10 +61,6 @@ define { , } @test_fclamp_single_x2_f define { , , , } @test_fclamp_single_x4_f16( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_fclamp_single_x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fclamp { z0.h - z3.h }, z4.h, z5.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( %a, %b, %c, %d, %e, %f) @@ -80,10 +70,6 @@ define { , , , , , , } @test_fclamp_single_x4_f32( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_fclamp_single_x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fclamp { z0.s - z3.s }, z4.s, z5.s ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( %a, %b, %c, %d, %e, %f) @@ -93,10 +79,6 @@ define { , , , , , , } @test_fclamp_single_x4_f64( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_fclamp_single_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fclamp { z0.d - z3.d }, z4.d, z5.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( %a, %b, %c, %d, %e, %f) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll index 8fe0694808c8e..57e1a1e100db0 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll @@ -7,8 +7,6 @@ define void @st2q_ss_i8( %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -21,8 +19,6 @@ define void @st2q_ss_i8( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -36,8 +32,6 @@ define void @st2q_ss_i16( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -51,8 +45,6 @@ define void @st2q_ss_i32( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -66,8 +58,6 @@ define void @st2q_ss_i64( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -81,8 +71,6 @@ define void @st2q_ss_f16( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -96,8 +84,6 @@ define void @st2q_ss_f32( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -111,8 +97,6 @@ define void @st2q_ss_f64( %v0, %v1, < define void @st2q_ss_bf16( %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -127,8 +111,6 @@ define void @st2q_ss_bf16( %v0, %v1, define void @st2q_si_i8_off16( %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2q_si_i8_off16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -16 @@ -142,8 +124,6 @@ define void @st2q_si_i8_off16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2q_si_i8_off14: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 14 @@ -157,8 +137,6 @@ define void @st2q_si_i8_off14( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -172,8 +150,6 @@ define void @st2q_si_i16( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -187,8 +163,6 @@ define void @st2q_si_i32( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -202,8 +176,6 @@ define void @st2q_si_i64( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -217,8 +189,6 @@ define void @st2q_si_f16( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -232,8 +202,6 @@ define void @st2q_si_f32( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep= getelementptr , ptr %base, i64 14 @@ -247,8 +215,6 @@ define void @st2q_si_f64( %v0, %v1, < define void @st2q_si_bf16( %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -266,9 +232,6 @@ define void @st2q_si_bf16( %v0, %v1, define void @st3q_ss_i8( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -283,9 +246,6 @@ define void @st3q_ss_i8( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -300,9 +260,6 @@ define void @st3q_ss_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -317,9 +274,6 @@ define void @st3q_ss_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -334,9 +288,6 @@ define void @st3q_ss_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -351,9 +302,6 @@ define void @st3q_ss_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -368,9 +316,6 @@ define void @st3q_ss_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -385,9 +330,6 @@ define void @st3q_ss_f64( %v0, %v1, < define void @st3q_ss_bf16( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -402,9 +344,6 @@ define void @st3q_ss_bf16( %v0, %v1, define void @st3q_si_i8_off24( %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i8_off24: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -24 @@ -419,9 +358,6 @@ define void @st3q_si_i8_off24( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i8_off21: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -436,9 +372,6 @@ define void @st3q_si_i8_off21( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -453,9 +386,6 @@ define void @st3q_si_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -470,9 +400,6 @@ define void @st3q_si_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -487,9 +414,6 @@ define void @st3q_si_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -504,9 +428,6 @@ define void @st3q_si_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -521,9 +442,6 @@ define void @st3q_si_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -538,9 +456,6 @@ define void @st3q_si_f64( %v0, %v1, < define void @st3q_si_bf16( %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -558,10 +473,6 @@ define void @st3q_si_bf16( %v0, %v1, define void @st4q_ss_i8( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -577,10 +488,6 @@ define void @st4q_ss_i8( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -596,10 +503,6 @@ define void @st4q_ss_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -615,10 +518,6 @@ define void @st4q_ss_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -634,10 +533,6 @@ define void @st4q_ss_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -653,10 +548,6 @@ define void @st4q_ss_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -672,10 +563,6 @@ define void @st4q_ss_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -691,10 +578,6 @@ define void @st4q_ss_f64( %v0, %v1, < define void @st4q_ss_bf16( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -710,10 +593,6 @@ define void @st4q_ss_bf16( %v0, %v1, define void @st4q_si_i8_off32( %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i8_off32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -32 @@ -729,10 +608,6 @@ define void @st4q_si_i8_off32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i8_off28: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -748,10 +623,6 @@ define void @st4q_si_i8_off28( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -767,10 +638,6 @@ define void @st4q_si_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base1 = getelementptr , ptr %addr, i64 28 @@ -786,10 +653,6 @@ define void @st4q_si_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -805,10 +668,6 @@ define void @st4q_si_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -824,10 +683,6 @@ define void @st4q_si_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -843,10 +698,6 @@ define void @st4q_si_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -862,10 +713,6 @@ define void @st4q_si_f64( %v0, %v1, < define void @st4q_si_bf16( %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll index 912d5d853aa8d..26316caad2bbc 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll @@ -42,8 +42,6 @@ define @test_sclamp_i64( %a, , } @test_sclamp_single_x2_i8( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_sclamp_single_x2_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sclamp { z0.b, z1.b }, z2.b, z3.b ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( %a, %b, %c, %d) @@ -53,8 +51,6 @@ define { , } @test_sclamp_single_x2_i8(, } @test_sclamp_single_x2_i16( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_sclamp_single_x2_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sclamp { z0.h, z1.h }, z2.h, z3.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( %a, %b, %c, %d) @@ -64,8 +60,6 @@ define { , } @test_sclamp_single_x2_i16(, } @test_sclamp_single_x2_i32( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_sclamp_single_x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sclamp { z0.s, z1.s }, z2.s, z3.s ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( %a, %b, %c, %d) @@ -75,8 +69,6 @@ define { , } @test_sclamp_single_x2_i32(, } @test_sclamp_single_x2_i64( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_sclamp_single_x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sclamp { z0.d, z1.d }, z2.d, z3.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( %a, %b, %c, %d) @@ -86,10 +78,6 @@ define { , } @test_sclamp_single_x2_i64(, , , } @test_sclamp_single_x4_i8( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_sclamp_single_x4_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sclamp { z0.b - z3.b }, z4.b, z5.b ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( %a, %b, %c, %d, %e, %f) @@ -99,10 +87,6 @@ define { , , , , , , } @test_sclamp_single_x4_i16( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_sclamp_single_x4_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sclamp { z0.h - z3.h }, z4.h, z5.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( %a, %b, %c, %d, %e, %f) @@ -112,10 +96,6 @@ define { , , , , , , } @test_sclamp_single_x4_i32( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_sclamp_single_x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sclamp { z0.s - z3.s }, z4.s, z5.s ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( %a, %b, %c, %d, %e, %f) @@ -125,10 +105,6 @@ define { , , , , , , } @test_sclamp_single_x4_i64( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_sclamp_single_x4_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sclamp { z0.d - z3.d }, z4.d, z5.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( %a, %b, %c, %d, %e, %f) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll index 3a21eaead5f72..d64f06aaef885 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll @@ -8,18 +8,18 @@ define { , , , , , , , , , , , , , , , , , , , , , , , , %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -118,9 +118,9 @@ define void @st1_x2_f32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -136,9 +136,9 @@ define void @st1_x2_f64( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -154,11 +154,11 @@ define void @st1_x4_i8( %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -274,11 +274,11 @@ define void @st1_x4_f32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -294,11 +294,11 @@ define void @st1_x4_f64( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -316,9 +316,9 @@ define void @stnt1_x2_i8( %unused, %zn0, %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -352,9 +352,9 @@ define void @stnt1_x2_i32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -370,9 +370,9 @@ define void @stnt1_x2_i64( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -388,9 +388,9 @@ define void @stnt1_x2_f16( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -406,9 +406,9 @@ define void @stnt1_x2_bf16( %unused, %zn ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -424,9 +424,9 @@ define void @stnt1_x2_f32( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -442,9 +442,9 @@ define void @stnt1_x2_f64( %unused, %zn0 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -460,11 +460,11 @@ define void @stnt1_x4_i8( %unused, %zn0, %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -500,11 +500,11 @@ define void @stnt1_x4_i32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -520,11 +520,11 @@ define void @stnt1_x4_i64( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -540,11 +540,11 @@ define void @stnt1_x4_f16( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -560,11 +560,11 @@ define void @stnt1_x4_bf16( %unused, %zn ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -580,11 +580,11 @@ define void @stnt1_x4_f32( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -600,11 +600,11 @@ define void @stnt1_x4_f64( %unused, %zn0 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll index de1695162c98e..ca0bad16fe0e9 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll @@ -42,8 +42,6 @@ define @test_uclamp_i64( %a, , } @test_uclamp_single_x2_i8( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_uclamp_single_x2_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uclamp { z0.b, z1.b }, z2.b, z3.b ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( %a, %b, %c, %d) @@ -53,8 +51,6 @@ define { , } @test_uclamp_single_x2_i8(, } @test_uclamp_single_x2_i16( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_uclamp_single_x2_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uclamp { z0.h, z1.h }, z2.h, z3.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( %a, %b, %c, %d) @@ -64,8 +60,6 @@ define { , } @test_uclamp_single_x2_i16(, } @test_uclamp_single_x2_i32( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_uclamp_single_x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uclamp { z0.s, z1.s }, z2.s, z3.s ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( %a, %b, %c, %d) @@ -75,8 +69,6 @@ define { , } @test_uclamp_single_x2_i32(, } @test_uclamp_single_x2_i64( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_uclamp_single_x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uclamp { z0.d, z1.d }, z2.d, z3.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( %a, %b, %c, %d) @@ -86,10 +78,6 @@ define { , } @test_uclamp_single_x2_i64(, , , } @test_uclamp_single_x4_i8( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_uclamp_single_x4_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uclamp { z0.b - z3.b }, z4.b, z5.b ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( %a, %b, %c, %d, %e, %f) @@ -99,10 +87,6 @@ define { , , , , , , } @test_uclamp_single_x4_i16( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_uclamp_single_x4_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uclamp { z0.h - z3.h }, z4.h, z5.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( %a, %b, %c, %d, %e, %f) @@ -112,10 +96,6 @@ define { , , , , , , } @test_uclamp_single_x4_i32( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_uclamp_single_x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uclamp { z0.s - z3.s }, z4.s, z5.s ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( %a, %b, %c, %d, %e, %f) @@ -125,10 +105,6 @@ define { , , , , , , } @test_uclamp_single_x4_i64( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_uclamp_single_x4_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uclamp { z0.d - z3.d }, z4.d, z5.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( %a, %b, %c, %d, %e, %f) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll index fe3ddbf747ace..741afc3a49a69 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll @@ -97,11 +97,11 @@ define { , , , , , , } @uzp_x4_f64( %unused, %zn1, %zn2, %zn3, %zn4) nounwind { ; CHECK-LABEL: uzp_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z5.d -; CHECK-NEXT: mov z26.d, z4.d -; CHECK-NEXT: mov z25.d, z3.d -; CHECK-NEXT: mov z24.d, z2.d -; CHECK-NEXT: uzp { z0.d - z3.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z7.d, z5.d +; CHECK-NEXT: mov z6.d, z4.d +; CHECK-NEXT: mov z5.d, z3.d +; CHECK-NEXT: mov z4.d, z2.d +; CHECK-NEXT: uzp { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( %zn1, %zn2, %zn3, %zn4) ret { , , , } %res @@ -204,11 +204,11 @@ define { , , , , , , } @zipq_x4_f64( %unused, %zn1, %zn2, %zn3, %zn4) nounwind { ; CHECK-LABEL: zipq_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z5.d -; CHECK-NEXT: mov z26.d, z4.d -; CHECK-NEXT: mov z25.d, z3.d -; CHECK-NEXT: mov z24.d, z2.d -; CHECK-NEXT: uzp { z0.q - z3.q }, { z24.q - z27.q } +; CHECK-NEXT: mov z7.d, z5.d +; CHECK-NEXT: mov z6.d, z4.d +; CHECK-NEXT: mov z5.d, z3.d +; CHECK-NEXT: mov z4.d, z2.d +; CHECK-NEXT: uzp { z0.q - z3.q }, { z4.q - z7.q } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( %zn1, %zn2, %zn3, %zn4) ret { , , , } %res diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll index ab70f57b48874..638849605a2cb 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll @@ -7,7 +7,6 @@ define @whilege_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilege_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilege { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -18,7 +17,6 @@ define @whilege_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilege_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilege { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -29,7 +27,6 @@ define @whilege_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilege_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilege { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -40,7 +37,6 @@ define @whilege_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilege_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilege { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -54,7 +50,6 @@ define @whilegt_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilegt_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilegt { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -65,7 +60,6 @@ define @whilegt_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilegt_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilegt { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -76,7 +70,6 @@ define @whilegt_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilegt_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilegt { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -87,7 +80,6 @@ define @whilegt_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilegt_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilegt { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -101,7 +93,6 @@ define @whilehi_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehi_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehi { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -112,7 +103,6 @@ define @whilehi_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehi_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehi { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -123,7 +113,6 @@ define @whilehi_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehi_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehi { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -134,7 +123,6 @@ define @whilehi_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehi_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehi { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -148,7 +136,6 @@ define @whilehs_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehs_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehs { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -159,7 +146,6 @@ define @whilehs_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehs_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehs { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -170,7 +156,6 @@ define @whilehs_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehs_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehs { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -181,7 +166,6 @@ define @whilehs_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehs_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehs { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -195,7 +179,6 @@ define @whilele_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilele_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilele { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -206,7 +189,6 @@ define @whilele_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilele_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilele { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -217,7 +199,6 @@ define @whilele_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilele_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilele { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -228,7 +209,6 @@ define @whilele_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilele_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilele { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -242,7 +222,6 @@ define @whilelo_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelo_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -253,7 +232,6 @@ define @whilelo_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelo_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -264,7 +242,6 @@ define @whilelo_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelo_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -275,7 +252,6 @@ define @whilelo_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelo_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -289,7 +265,6 @@ define @whilels_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilels_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilels { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -300,7 +275,6 @@ define @whilels_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilels_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilels { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -311,7 +285,6 @@ define @whilels_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilels_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilels { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -322,7 +295,6 @@ define @whilels_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilels_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilels { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -336,7 +308,6 @@ define @whilelt_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelt_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelt { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -347,7 +318,6 @@ define @whilelt_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelt_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelt { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -358,7 +328,6 @@ define @whilelt_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelt_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelt { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -369,7 +338,6 @@ define @whilelt_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelt_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelt { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 diff --git a/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll b/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll index d3abc27a53dad..77415381709d1 100644 --- a/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll +++ b/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll @@ -9,6 +9,7 @@ define void @"func"(ptr swifterror %0) #0 { ; CHECK-NEXT: b {{\.?}}LBB0_2 ; CHECK-NEXT: {{\.?}}LBB0_1:{{.*}}%thirtythree ; CHECK-NEXT: {{.*}}=>This Inner Loop Header: Depth=1 +; CHECK-NEXT: {{.*}}implicit-def: $x0 ; CHECK-NEXT: b {{\.?}}LBB0_1 ; CHECK-NEXT: {{\.?}}LBB0_2:{{.*}}%thirtyeight ; CHECK-NEXT: b {{\.?}}LBB0_3 diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll index 0ad9900865518..dd5ce449bb1d2 100644 --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -203,16 +203,17 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcmgt v3.4s, v1.4s, v0.4s ; CHECK-NEXT: fcmgt v4.4s, v2.4s, v0.4s ; CHECK-NEXT: fcmlt v5.4s, v1.4s, #0.0 -; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b -; CHECK-NEXT: bsl v4.16b, v0.16b, v2.16b -; CHECK-NEXT: fcmlt v1.4s, v2.4s, #0.0 -; CHECK-NEXT: bic v2.16b, v3.16b, v5.16b -; CHECK-NEXT: bic v1.16b, v4.16b, v1.16b -; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: bit v1.16b, v0.16b, v3.16b +; CHECK-NEXT: mov v3.16b, v4.16b +; CHECK-NEXT: bsl v3.16b, v0.16b, v2.16b +; CHECK-NEXT: fcmlt v2.4s, v2.4s, #0.0 +; CHECK-NEXT: bic v1.16b, v1.16b, v5.16b +; CHECK-NEXT: bic v2.16b, v3.16b, v2.16b ; CHECK-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: trn1 v1.8b, v2.8b, v1.8b +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: trn1 v1.8b, v1.8b, v2.8b ; CHECK-NEXT: str d1, [x0], #8 ; CHECK-NEXT: b.ne .LBB1_9 ; CHECK-NEXT: // %bb.10: // %middle.block @@ -352,21 +353,22 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcmgt v6.4s, v3.4s, v0.4s ; CHECK-NEXT: fcmgt v7.4s, v4.4s, v0.4s ; CHECK-NEXT: fcmlt v16.4s, v2.4s, #0.0 -; CHECK-NEXT: fcmlt v17.4s, v3.4s, #0.0 -; CHECK-NEXT: bsl v5.16b, v0.16b, v2.16b -; CHECK-NEXT: bsl v6.16b, v0.16b, v3.16b -; CHECK-NEXT: bsl v7.16b, v0.16b, v4.16b -; CHECK-NEXT: fcmlt v2.4s, v4.4s, #0.0 -; CHECK-NEXT: bic v3.16b, v5.16b, v16.16b -; CHECK-NEXT: bic v4.16b, v6.16b, v17.16b -; CHECK-NEXT: bic v2.16b, v7.16b, v2.16b +; CHECK-NEXT: bit v2.16b, v0.16b, v5.16b +; CHECK-NEXT: fcmlt v5.4s, v3.4s, #0.0 +; CHECK-NEXT: bit v3.16b, v0.16b, v6.16b +; CHECK-NEXT: mov v6.16b, v7.16b +; CHECK-NEXT: bsl v6.16b, v0.16b, v4.16b +; CHECK-NEXT: fcmlt v4.4s, v4.4s, #0.0 +; CHECK-NEXT: bic v2.16b, v2.16b, v16.16b +; CHECK-NEXT: bic v3.16b, v3.16b, v5.16b +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: bic v4.16b, v6.16b, v4.16b ; CHECK-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-NEXT: fcvtzs v4.4s, v4.4s -; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: xtn v5.4h, v3.4s -; CHECK-NEXT: xtn v6.4h, v4.4s -; CHECK-NEXT: xtn v7.4h, v2.4s -; CHECK-NEXT: tbl v2.16b, { v5.16b, v6.16b, v7.16b }, v1.16b +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn v3.4h, v3.4s +; CHECK-NEXT: xtn v4.4h, v4.4s +; CHECK-NEXT: tbl v2.16b, { v2.16b, v3.16b, v4.16b }, v1.16b ; CHECK-NEXT: st1 { v2.s }[2], [x13] ; CHECK-NEXT: str d2, [x0], #12 ; CHECK-NEXT: b.ne .LBB2_4 @@ -605,26 +607,27 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcmgt v16.4s, v4.4s, v0.4s ; CHECK-NEXT: fcmgt v17.4s, v5.4s, v0.4s ; CHECK-NEXT: fcmlt v18.4s, v2.4s, #0.0 -; CHECK-NEXT: fcmlt v19.4s, v3.4s, #0.0 -; CHECK-NEXT: fcmlt v20.4s, v4.4s, #0.0 -; CHECK-NEXT: bsl v6.16b, v0.16b, v2.16b -; CHECK-NEXT: bsl v7.16b, v0.16b, v3.16b -; CHECK-NEXT: bsl v16.16b, v0.16b, v4.16b -; CHECK-NEXT: bsl v17.16b, v0.16b, v5.16b -; CHECK-NEXT: fcmlt v2.4s, v5.4s, #0.0 -; CHECK-NEXT: bic v3.16b, v6.16b, v18.16b -; CHECK-NEXT: bic v4.16b, v7.16b, v19.16b -; CHECK-NEXT: bic v5.16b, v16.16b, v20.16b -; CHECK-NEXT: bic v2.16b, v17.16b, v2.16b +; CHECK-NEXT: bit v2.16b, v0.16b, v6.16b +; CHECK-NEXT: fcmlt v6.4s, v3.4s, #0.0 +; CHECK-NEXT: bit v3.16b, v0.16b, v7.16b +; CHECK-NEXT: fcmlt v7.4s, v4.4s, #0.0 +; CHECK-NEXT: bit v4.16b, v0.16b, v16.16b +; CHECK-NEXT: mov v16.16b, v17.16b +; CHECK-NEXT: bsl v16.16b, v0.16b, v5.16b +; CHECK-NEXT: fcmlt v5.4s, v5.4s, #0.0 +; CHECK-NEXT: bic v2.16b, v2.16b, v18.16b +; CHECK-NEXT: bic v3.16b, v3.16b, v6.16b +; CHECK-NEXT: bic v4.16b, v4.16b, v7.16b +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: bic v5.16b, v16.16b, v5.16b ; CHECK-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-NEXT: fcvtzs v4.4s, v4.4s ; CHECK-NEXT: fcvtzs v5.4s, v5.4s -; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: xtn v16.4h, v3.4s -; CHECK-NEXT: xtn v17.4h, v4.4s -; CHECK-NEXT: xtn v18.4h, v5.4s -; CHECK-NEXT: xtn v19.4h, v2.4s -; CHECK-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn v3.4h, v3.4s +; CHECK-NEXT: xtn v4.4h, v4.4s +; CHECK-NEXT: xtn v5.4h, v5.4s +; CHECK-NEXT: tbl v2.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v1.16b ; CHECK-NEXT: str q2, [x0], #16 ; CHECK-NEXT: b.ne .LBB3_9 ; CHECK-NEXT: // %bb.10: // %middle.block diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll index c4a58ba12dc6b..575a4b2e6e0fb 100644 --- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll @@ -74,8 +74,8 @@ define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) { ; CHECK-BE-NEXT: add x10, x9, #16 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] ; CHECK-BE-NEXT: add x11, x9, #32 -; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] ; CHECK-BE-NEXT: add x9, x9, #48 +; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 @@ -363,21 +363,21 @@ define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) { ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #7 ; CHECK-BE-NEXT: add x13, x9, #64 +; CHECK-BE-NEXT: add x10, x9, #112 +; CHECK-BE-NEXT: add x11, x9, #96 ; CHECK-BE-NEXT: add x12, x9, #80 ; CHECK-BE-NEXT: add x14, x9, #16 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] ; CHECK-BE-NEXT: ld1 { v16.16b }, [x13] -; CHECK-BE-NEXT: add x11, x9, #96 ; CHECK-BE-NEXT: add x13, x9, #32 +; CHECK-BE-NEXT: add x9, x9, #48 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x14] ; CHECK-BE-NEXT: ld1 { v17.16b }, [x12] -; CHECK-BE-NEXT: add x10, x9, #112 -; CHECK-BE-NEXT: add x9, x9, #48 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x13] ; CHECK-BE-NEXT: ld1 { v18.16b }, [x11] ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] -; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 ; CHECK-BE-NEXT: ld1 { v19.16b }, [x10] +; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 ; CHECK-BE-NEXT: add x8, x8, #1 ; CHECK-BE-NEXT: cmp x8, #1000 ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b @@ -510,8 +510,8 @@ define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) { ; CHECK-BE-NEXT: add x10, x9, #16 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] ; CHECK-BE-NEXT: add x11, x9, #32 -; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] ; CHECK-BE-NEXT: add x9, x9, #48 +; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll index 3685e9cf85bd6..e453d61832522 100644 --- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll +++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll @@ -10,9 +10,9 @@ define void @vld2(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 ; CHECK-NEXT: .LBB0_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0], #32 -; CHECK-NEXT: fmul v2.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v2.4s, v1.4s, v1.4s -; CHECK-NEXT: str q2, [x1, x8] +; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s +; CHECK-NEXT: str q0, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB0_1 @@ -50,10 +50,10 @@ define void @vld3(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 ; CHECK-NEXT: .LBB1_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48 -; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v3.4s, v1.4s, v1.4s -; CHECK-NEXT: fmla v3.4s, v2.4s, v2.4s -; CHECK-NEXT: str q3, [x1, x8] +; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s +; CHECK-NEXT: fmla v0.4s, v2.4s, v2.4s +; CHECK-NEXT: str q0, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB1_1 @@ -97,11 +97,11 @@ define void @vld4(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 ; CHECK-NEXT: add x9, x1, x8 ; CHECK-NEXT: add x8, x8, #32 ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 -; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s -; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s -; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s -; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x9] +; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s +; CHECK-NEXT: fmul v1.4s, v2.4s, v2.4s +; CHECK-NEXT: fmla v1.4s, v3.4s, v3.4s +; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x9] ; CHECK-NEXT: b.ne .LBB2_1 ; CHECK-NEXT: // %bb.2: // %while.end ; CHECK-NEXT: ret @@ -149,9 +149,9 @@ define void @twosrc(ptr nocapture readonly %pSrc, ptr nocapture readonly %pSrc2, ; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x9] ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 ; CHECK-NEXT: ld2 { v2.4s, v3.4s }, [x10] -; CHECK-NEXT: fmul v4.4s, v2.4s, v0.4s -; CHECK-NEXT: fmla v4.4s, v1.4s, v3.4s -; CHECK-NEXT: str q4, [x2], #16 +; CHECK-NEXT: fmul v0.4s, v2.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v3.4s +; CHECK-NEXT: str q0, [x2], #16 ; CHECK-NEXT: b.ne .LBB3_1 ; CHECK-NEXT: // %bb.2: // %while.end ; CHECK-NEXT: ret @@ -190,9 +190,9 @@ define void @vld2_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture % ; CHECK-NEXT: .LBB4_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0], #32 -; CHECK-NEXT: fmul v2.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v2.4s, v1.4s, v1.4s -; CHECK-NEXT: str q2, [x1, x8] +; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s +; CHECK-NEXT: str q0, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB4_1 @@ -229,10 +229,10 @@ define void @vld3_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture % ; CHECK-NEXT: .LBB5_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48 -; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v3.4s, v1.4s, v1.4s -; CHECK-NEXT: fmla v3.4s, v2.4s, v2.4s -; CHECK-NEXT: str q3, [x1, x8] +; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s +; CHECK-NEXT: fmla v0.4s, v2.4s, v2.4s +; CHECK-NEXT: str q0, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB5_1 @@ -274,11 +274,11 @@ define void @vld4_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture % ; CHECK-NEXT: add x9, x1, x8 ; CHECK-NEXT: add x8, x8, #32 ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 -; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s -; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s -; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s -; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x9] +; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s +; CHECK-NEXT: fmul v1.4s, v2.4s, v2.4s +; CHECK-NEXT: fmla v1.4s, v3.4s, v3.4s +; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x9] ; CHECK-NEXT: b.ne .LBB6_1 ; CHECK-NEXT: // %bb.2: // %while.end ; CHECK-NEXT: ret @@ -369,16 +369,16 @@ define void @transpose_s16_8x8_simpler2(ptr nocapture noundef %a) { ; CHECK: .Lfunc_begin8: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: ldp q0, q2, [x0] -; CHECK-NEXT: ldp q3, q4, [x0, #64] -; CHECK-NEXT: ldp q5, q6, [x0, #32] -; CHECK-NEXT: ldp q7, q16, [x0, #96] -; CHECK-NEXT: mov v0.h[5], v2.h[4] -; CHECK-NEXT: zip1 v2.8h, v3.8h, v4.8h -; CHECK-NEXT: zip1 v3.8h, v5.8h, v6.8h -; CHECK-NEXT: mov v7.h[5], v16.h[4] -; CHECK-NEXT: mov v0.s[1], v2.s[0] -; CHECK-NEXT: uzp1 v1.4s, v3.4s, v7.4s +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x0, #64] +; CHECK-NEXT: ldp q4, q5, [x0, #32] +; CHECK-NEXT: ldp q6, q7, [x0, #96] +; CHECK-NEXT: mov v0.h[5], v1.h[4] +; CHECK-NEXT: zip1 v1.8h, v2.8h, v3.8h +; CHECK-NEXT: zip1 v2.8h, v4.8h, v5.8h +; CHECK-NEXT: mov v6.h[5], v7.h[4] +; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: uzp1 v1.4s, v2.4s, v6.4s ; CHECK-NEXT: zip2 v2.4s, v0.4s, v1.4s ; CHECK-NEXT: st2 { v0.2s, v1.2s }, [x0] ; CHECK-NEXT: str q2, [x0, #64] @@ -424,23 +424,23 @@ define void @transpose_s16_8x8(ptr nocapture noundef %0, ptr nocapture noundef % ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ldr q2, [x2] ; CHECK-NEXT: ldr q3, [x4] ; CHECK-NEXT: ldr q4, [x5] -; CHECK-NEXT: ldr q2, [x2] ; CHECK-NEXT: ldr q5, [x3] ; CHECK-NEXT: trn1 v16.8h, v0.8h, v1.8h ; CHECK-NEXT: trn2 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ldr q6, [x6] ; CHECK-NEXT: ldr q7, [x7] ; CHECK-NEXT: trn1 v17.8h, v3.8h, v4.8h -; CHECK-NEXT: trn2 v1.8h, v3.8h, v4.8h ; CHECK-NEXT: trn1 v18.8h, v2.8h, v5.8h +; CHECK-NEXT: trn2 v1.8h, v3.8h, v4.8h ; CHECK-NEXT: trn2 v2.8h, v2.8h, v5.8h ; CHECK-NEXT: trn1 v19.8h, v6.8h, v7.8h ; CHECK-NEXT: trn2 v3.8h, v6.8h, v7.8h ; CHECK-NEXT: trn1 v4.4s, v16.4s, v17.4s -; CHECK-NEXT: trn1 v6.4s, v0.4s, v1.4s ; CHECK-NEXT: trn2 v16.4s, v16.4s, v17.4s +; CHECK-NEXT: trn1 v6.4s, v0.4s, v1.4s ; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s ; CHECK-NEXT: trn1 v5.4s, v18.4s, v19.4s ; CHECK-NEXT: trn1 v7.4s, v2.4s, v3.4s @@ -668,11 +668,11 @@ define void @store_factor3(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2 ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ext v3.16b, v0.16b, v1.16b, #12 -; CHECK-NEXT: ext v6.16b, v1.16b, v2.16b, #12 +; CHECK-NEXT: ext v4.16b, v1.16b, v2.16b, #12 ; CHECK-NEXT: zip2 v3.4s, v0.4s, v3.4s +; CHECK-NEXT: zip2 v4.4s, v1.4s, v4.4s ; CHECK-NEXT: mov v3.s[0], v0.s[0] ; CHECK-NEXT: ext v0.16b, v2.16b, v0.16b, #12 -; CHECK-NEXT: zip2 v4.4s, v1.4s, v6.4s ; CHECK-NEXT: mov v4.s[0], v1.s[0] ; CHECK-NEXT: zip2 v5.4s, v2.4s, v0.4s ; CHECK-NEXT: mov v5.s[0], v2.s[0]