diff --git a/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll b/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll index 20cf792ce9c3f..56f24770659ba 100644 --- a/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll +++ b/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll @@ -1,8 +1,12 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck %s define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: qadds: -; CHECK: sqadd s0, s0, s1 +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd s0, s0, s1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %vecext = extractelement <4 x i32> %b, i32 0 %vecext1 = extractelement <4 x i32> %c, i32 0 %vqadd.i = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind @@ -11,7 +15,10 @@ define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: qaddd: -; CHECK: sqadd d0, d0, d1 +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd d0, d0, d1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %vecext = extractelement <2 x i64> %b, i32 0 %vecext1 = extractelement <2 x i64> %c, i32 0 %vqadd.i = tail call i64 @llvm.aarch64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind @@ -20,7 +27,10 @@ define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: uqadds: -; CHECK: uqadd s0, s0, s1 +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd s0, s0, s1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %vecext = extractelement <4 x i32> %b, i32 0 %vecext1 = extractelement <4 x i32> %c, i32 0 %vqadd.i = tail call i32 @llvm.aarch64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind @@ -29,7 +39,10 @@ define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: uqaddd: -; CHECK: uqadd d0, d0, d1 +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd d0, d0, d1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %vecext = extractelement <2 x i64> %b, i32 0 %vecext1 = extractelement <2 x i64> %c, i32 0 %vqadd.i = tail call i64 @llvm.aarch64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind @@ -43,7 +56,10 @@ declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) nounwind readnone define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: qsubs: -; CHECK: sqsub s0, s0, s1 +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub s0, s0, s1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %vecext = extractelement <4 x i32> %b, i32 0 %vecext1 = extractelement <4 x i32> %c, i32 0 %vqsub.i = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind @@ -52,7 +68,10 @@ define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: qsubd: -; CHECK: sqsub d0, d0, d1 +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub d0, d0, d1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %vecext = extractelement <2 x i64> %b, i32 0 %vecext1 = extractelement <2 x i64> %c, i32 0 %vqsub.i = tail call i64 @llvm.aarch64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind @@ -61,7 +80,10 @@ define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: uqsubs: -; CHECK: uqsub s0, s0, s1 +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub s0, s0, s1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %vecext = extractelement <4 x i32> %b, i32 0 %vecext1 = extractelement <4 x i32> %c, i32 0 %vqsub.i = tail call i32 @llvm.aarch64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind @@ -70,7 +92,10 @@ define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { ; CHECK-LABEL: uqsubd: -; CHECK: uqsub d0, d0, d1 +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub d0, d0, d1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %vecext = extractelement <2 x i64> %b, i32 0 %vecext1 = extractelement <2 x i64> %c, i32 0 %vqsub.i = tail call i64 @llvm.aarch64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind @@ -84,8 +109,10 @@ declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) nounwind readnone define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone { ; CHECK-LABEL: qabss: -; CHECK: sqabs s0, s0 -; CHECK: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqabs s0, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %vecext = extractelement <4 x i32> %b, i32 0 %vqabs.i = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %vecext) nounwind ret i32 %vqabs.i @@ -93,8 +120,10 @@ define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone { define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { ; CHECK-LABEL: qabsd: -; CHECK: sqabs d0, d0 -; CHECK: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqabs d0, d0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %vecext = extractelement <2 x i64> %b, i32 0 %vqabs.i = tail call i64 @llvm.aarch64.neon.sqabs.i64(i64 %vecext) nounwind ret i64 %vqabs.i @@ -102,8 +131,10 @@ define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone { ; CHECK-LABEL: qnegs: -; CHECK: sqneg s0, s0 -; CHECK: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqneg s0, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %vecext = extractelement <4 x i32> %b, i32 0 %vqneg.i = tail call i32 @llvm.aarch64.neon.sqneg.i32(i32 %vecext) nounwind ret i32 %vqneg.i @@ -111,8 +142,10 @@ define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone { define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { ; CHECK-LABEL: qnegd: -; CHECK: sqneg d0, d0 -; CHECK: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqneg d0, d0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %vecext = extractelement <2 x i64> %b, i32 0 %vqneg.i = tail call i64 @llvm.aarch64.neon.sqneg.i64(i64 %vecext) nounwind ret i64 %vqneg.i @@ -126,7 +159,10 @@ declare i32 @llvm.aarch64.neon.sqabs.i32(i32) nounwind readnone define i32 @vqmovund(<2 x i64> %b) nounwind readnone { ; CHECK-LABEL: vqmovund: -; CHECK: sqxtun s0, d0 +; CHECK: // %bb.0: +; CHECK-NEXT: sqxtun s0, d0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %vecext = extractelement <2 x i64> %b, i32 0 %vqmovun.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind ret i32 %vqmovun.i @@ -134,7 +170,10 @@ define i32 @vqmovund(<2 x i64> %b) nounwind readnone { define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone { ; CHECK-LABEL: vqmovnd_s: -; CHECK: sqxtn s0, d0 +; CHECK: // %bb.0: +; CHECK-NEXT: sqxtn s0, d0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %vecext = extractelement <2 x i64> %b, i32 0 %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind ret i32 %vqmovn.i @@ -142,12 +181,75 @@ define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone { define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone { ; CHECK-LABEL: vqmovnd_u: -; CHECK: uqxtn s0, d0 +; CHECK: // %bb.0: +; CHECK-NEXT: uqxtn s0, d0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %vecext = extractelement <2 x i64> %b, i32 0 %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind ret i32 %vqmovn.i } +define i32 @uqxtn_ext(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) { +; CHECK-LABEL: uqxtn_ext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, v3.d[1] +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: uqxtn s0, d0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %e1 = extractelement <2 x i64> %e, i64 1 + %r = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %e1) + ret i32 %r +} + +define <4 x i32> @sqxtn_ins(<4 x i32> noundef %a, i64 %c) { +; CHECK-LABEL: sqxtn_ins: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d1, x0 +; CHECK-NEXT: sqxtn s1, d1 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: ret +entry: + %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %c) + %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 3 + ret <4 x i32> %vecins +} + +define <4 x i32> @sqxtun_insext(<4 x i32> noundef %a, <2 x i64> %e) { +; CHECK-LABEL: sqxtun_insext: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, v1.d[1] +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: sqxtun s1, d1 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: ret +entry: + %c = extractelement <2 x i64> %e, i64 1 + %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %c) + %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 3 + ret <4 x i32> %vecins +} + +define <4 x i32> @saddluse(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) { +; CHECK-LABEL: saddluse: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: saddlv d1, v1.4s +; CHECK-NEXT: sqxtn s1, d1 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: ret +entry: + %vaddlvq_s32.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %b) + %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vaddlvq_s32.i) + %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 1 + ret <4 x i32> %vecins +} + declare i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone declare i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone declare i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone +declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>) diff --git a/llvm/test/CodeGen/AArch64/neon-extract.ll b/llvm/test/CodeGen/AArch64/neon-extract.ll index 0cac894246422..26c4ec4d4bdb2 100644 --- a/llvm/test/CodeGen/AArch64/neon-extract.ll +++ b/llvm/test/CodeGen/AArch64/neon-extract.ll @@ -1,8 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vext_s8: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x2|2}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #2 +; CHECK-NEXT: ret entry: %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %vext @@ -10,7 +13,9 @@ entry: define <4 x i16> @test_vext_s16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vext_s16: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x6|6}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #6 +; CHECK-NEXT: ret entry: %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> ret <4 x i16> %vext @@ -18,7 +23,9 @@ entry: define <2 x i32> @test_vext_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vext_s32: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x4|4}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4 +; CHECK-NEXT: ret entry: %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %vext @@ -26,7 +33,8 @@ entry: define <1 x i64> @test_vext_s64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: test_vext_s64: -; CHECK-NOT: ext {{v[0-9]+}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ret entry: %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> ret <1 x i64> %vext @@ -34,7 +42,9 @@ entry: define <16 x i8> @test_vextq_s8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_vextq_s8: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x2|2}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #2 +; CHECK-NEXT: ret entry: %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vext @@ -42,7 +52,9 @@ entry: define <8 x i16> @test_vextq_s16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vextq_s16: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #6 +; CHECK-NEXT: ret entry: %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vext @@ -50,7 +62,9 @@ entry: define <4 x i32> @test_vextq_s32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vextq_s32: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x4|4}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4 +; CHECK-NEXT: ret entry: %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %vext @@ -58,7 +72,9 @@ entry: define <2 x i64> @test_vextq_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vextq_s64: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x8|8}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #8 +; CHECK-NEXT: ret entry: %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %vext @@ -66,7 +82,9 @@ entry: define <8 x i8> @test_vext_u8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vext_u8: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x2|2}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #2 +; CHECK-NEXT: ret entry: %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %vext @@ -74,7 +92,9 @@ entry: define <4 x i16> @test_vext_u16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vext_u16: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x6|6}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #6 +; CHECK-NEXT: ret entry: %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> ret <4 x i16> %vext @@ -82,7 +102,9 @@ entry: define <2 x i32> @test_vext_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vext_u32: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x4|4}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4 +; CHECK-NEXT: ret entry: %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %vext @@ -90,6 +112,8 @@ entry: define <1 x i64> @test_vext_u64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: test_vext_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ret entry: %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> ret <1 x i64> %vext @@ -97,7 +121,9 @@ entry: define <16 x i8> @test_vextq_u8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_vextq_u8: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x2|2}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #2 +; CHECK-NEXT: ret entry: %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vext @@ -105,7 +131,9 @@ entry: define <8 x i16> @test_vextq_u16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vextq_u16: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #6 +; CHECK-NEXT: ret entry: %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vext @@ -113,7 +141,9 @@ entry: define <4 x i32> @test_vextq_u32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vextq_u32: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x4|4}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4 +; CHECK-NEXT: ret entry: %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %vext @@ -121,7 +151,9 @@ entry: define <2 x i64> @test_vextq_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vextq_u64: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x8|8}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #8 +; CHECK-NEXT: ret entry: %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %vext @@ -129,7 +161,9 @@ entry: define <2 x float> @test_vext_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vext_f32: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x4|4}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4 +; CHECK-NEXT: ret entry: %vext = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %vext @@ -137,7 +171,8 @@ entry: define <1 x double> @test_vext_f64(<1 x double> %a, <1 x double> %b) { ; CHECK-LABEL: test_vext_f64: -; CHECK-NOT: ext {{v[0-9]+}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ret entry: %vext = shufflevector <1 x double> %a, <1 x double> %b, <1 x i32> ret <1 x double> %vext @@ -145,7 +180,9 @@ entry: define <4 x float> @test_vextq_f32(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: test_vextq_f32: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x4|4}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4 +; CHECK-NEXT: ret entry: %vext = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %vext @@ -153,7 +190,9 @@ entry: define <2 x double> @test_vextq_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vextq_f64: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x8|8}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #8 +; CHECK-NEXT: ret entry: %vext = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %vext @@ -161,7 +200,9 @@ entry: define <8 x i8> @test_vext_p8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_vext_p8: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x2|2}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #2 +; CHECK-NEXT: ret entry: %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %vext @@ -169,7 +210,9 @@ entry: define <4 x i16> @test_vext_p16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vext_p16: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x6|6}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #6 +; CHECK-NEXT: ret entry: %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> ret <4 x i16> %vext @@ -177,7 +220,9 @@ entry: define <16 x i8> @test_vextq_p8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_vextq_p8: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x2|2}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #2 +; CHECK-NEXT: ret entry: %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vext @@ -185,7 +230,9 @@ entry: define <8 x i16> @test_vextq_p16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vextq_p16: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #6 +; CHECK-NEXT: ret entry: %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vext @@ -193,7 +240,9 @@ entry: define <8 x i8> @test_undef_vext_s8(<8 x i8> %a) { ; CHECK-LABEL: test_undef_vext_s8: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x2|2}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #2 +; CHECK-NEXT: ret entry: %vext = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> ret <8 x i8> %vext @@ -201,7 +250,9 @@ entry: define <16 x i8> @test_undef_vextq_s8(<16 x i8> %a) { ; CHECK-LABEL: test_undef_vextq_s8: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #6 +; CHECK-NEXT: ret entry: %vext = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> ret <16 x i8> %vext @@ -209,7 +260,10 @@ entry: define <4 x i16> @test_undef_vext_s16(<4 x i16> %a) { ; CHECK-LABEL: test_undef_vext_s16: -; CHECK: dup v{{[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.2s, v0.s[1] +; CHECK-NEXT: ret entry: %vext = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> ret <4 x i16> %vext @@ -217,7 +271,9 @@ entry: define <8 x i16> @test_undef_vextq_s16(<8 x i16> %a) { ; CHECK-LABEL: test_undef_vextq_s16: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #6 +; CHECK-NEXT: ret entry: %vext = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> ret <8 x i16> %vext diff --git a/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll b/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll new file mode 100644 index 0000000000000..d4aeba5870a0f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll @@ -0,0 +1,173 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define <4 x i32> @test_vins_v4i32(<4 x i32> %a, float %b) { +; CHECK-LABEL: test_vins_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: ret +entry: + %c = bitcast float %b to i32 + %d = insertelement <4 x i32> %a, i32 %c, i32 3 + ret <4 x i32> %d +} + +define <4 x i32> @test_vins_v4i32_0(<4 x i32> %a, float %b) { +; CHECK-LABEL: test_vins_v4i32_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.s[0], w8 +; CHECK-NEXT: ret +entry: + %c = bitcast float %b to i32 + %d = insertelement <4 x i32> %a, i32 %c, i32 0 + ret <4 x i32> %d +} + +define <2 x i32> @test_vins_v2i32(<2 x i32> %a, float %b) { +; CHECK-LABEL: test_vins_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %c = bitcast float %b to i32 + %d = insertelement <2 x i32> %a, i32 %c, i32 1 + ret <2 x i32> %d +} + +define <2 x i32> @test_vins_v2i32_0(<2 x i32> %a, float %b) { +; CHECK-LABEL: test_vins_v2i32_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v0.s[0], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %c = bitcast float %b to i32 + %d = insertelement <2 x i32> %a, i32 %c, i32 0 + ret <2 x i32> %d +} + +define <2 x i64> @test_vins_v2i64(<2 x i64> %a, double %b) { +; CHECK-LABEL: test_vins_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: ret +entry: + %c = bitcast double %b to i64 + %d = insertelement <2 x i64> %a, i64 %c, i32 1 + ret <2 x i64> %d +} + +define <2 x i64> @test_vins_v2i64_0(<2 x i64> %a, double %b) { +; CHECK-LABEL: test_vins_v2i64_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: mov v0.d[0], x8 +; CHECK-NEXT: ret +entry: + %c = bitcast double %b to i64 + %d = insertelement <2 x i64> %a, i64 %c, i32 0 + ret <2 x i64> %d +} + +define <1 x i64> @test_vins_v1i64(<1 x i64> %a, double %b) { +; CHECK-LABEL: test_vins_v1i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret +entry: + %c = bitcast double %b to i64 + %d = insertelement <1 x i64> %a, i64 %c, i32 0 + ret <1 x i64> %d +} + + +define float @test_vext_v4i32(<4 x i32> %a) { +; CHECK-LABEL: test_vext_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, v0.s[3] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ret +entry: + %b = extractelement <4 x i32> %a, i32 3 + %c = bitcast i32 %b to float + ret float %c +} + +define float @test_vext_v4i32_0(<4 x i32> %a) { +; CHECK-LABEL: test_vext_v4i32_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret +entry: + %b = extractelement <4 x i32> %a, i32 0 + %c = bitcast i32 %b to float + ret float %c +} + +define float @test_vext_v2i32(<2 x i32> %a) { +; CHECK-LABEL: test_vext_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ret +entry: + %b = extractelement <2 x i32> %a, i32 1 + %c = bitcast i32 %b to float + ret float %c +} + +define float @test_vext_v2i32_0(<2 x i32> %a) { +; CHECK-LABEL: test_vext_v2i32_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: ret +entry: + %b = extractelement <2 x i32> %a, i32 0 + %c = bitcast i32 %b to float + ret float %c +} + +define double @test_vext_v2i64(<2 x i64> %a) { +; CHECK-LABEL: test_vext_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, v0.d[1] +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret +entry: + %b = extractelement <2 x i64> %a, i32 1 + %c = bitcast i64 %b to double + ret double %c +} + +define double @test_vext_v2i64_0(<2 x i64> %a) { +; CHECK-LABEL: test_vext_v2i64_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %b = extractelement <2 x i64> %a, i32 0 + %c = bitcast i64 %b to double + ret double %c +} + +define double @test_vext_v1i64(<1 x i64> %a) { +; CHECK-LABEL: test_vext_v1i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %b = extractelement <1 x i64> %a, i32 0 + %c = bitcast i64 %b to double + ret double %c +}