diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 8a68af41cedfcc..70b92b4283ce16 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -763,8 +763,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) // to be the same size as the dest. if (DstTy != SrcTy) return false; - return llvm::is_contained({v2s32, v4s32, v2s64, v2p0, v16s8, v8s16}, - DstTy); + return llvm::is_contained( + {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy); }) // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we // just want those lowered into G_BUILD_VECTOR diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index feeef91bce19a0..09389bda94bc06 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -720,9 +720,13 @@ bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, case 4: if (ScalarSize == 32) Opc = AArch64::G_DUPLANE32; + else if (ScalarSize == 16) + Opc = AArch64::G_DUPLANE16; break; case 8: - if (ScalarSize == 16) + if (ScalarSize == 8) + Opc = AArch64::G_DUPLANE8; + else if (ScalarSize == 16) Opc = AArch64::G_DUPLANE16; break; case 16: @@ -752,13 +756,10 @@ void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, Register DupSrc = MI.getOperand(1).getReg(); // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source. // To do this, we can use a G_CONCAT_VECTORS to do the widening. - if (SrcTy == LLT::fixed_vector(2, LLT::scalar(32))) { - assert(MRI.getType(MI.getOperand(0).getReg()).getNumElements() == 2 && - "Unexpected dest elements"); + if (SrcTy.getSizeInBits() == 64) { auto Undef = B.buildUndef(SrcTy); - DupSrc = B.buildConcatVectors( - SrcTy.changeElementCount(ElementCount::getFixed(4)), - {Src1Reg, Undef.getReg(0)}) + DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2), + {Src1Reg, Undef.getReg(0)}) .getReg(0); } B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane}); diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll index 75e6aa1f19bfd0..90dbd618919e2b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-dup.ll +++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll @@ -1,13 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for v_shuffledup8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v_shuffledup16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vduplane8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vduplane16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_perfectshuffle_dupext_v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_perfectshuffle_dupext_v4f16 +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <8 x i8> @v_dup8(i8 %A) nounwind { ; CHECK-LABEL: v_dup8: @@ -417,25 +410,47 @@ entry: ; Also test the DUP path in the PerfectShuffle generator. define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind { -; CHECK-LABEL: test_perfectshuffle_dupext_v4i16: -; CHECK: // %bb.0: -; CHECK-NEXT: trn1.4h v0, v0, v0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov.s v0[1], v1[0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_perfectshuffle_dupext_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: trn1.4h v0, v0, v0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: mov.s v0[1], v1[0] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI33_0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov.d v0[1], v1[0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI33_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> ret <4 x i16> %r } define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b) nounwind { -; CHECK-LABEL: test_perfectshuffle_dupext_v4f16: -; CHECK: // %bb.0: -; CHECK-NEXT: trn1.4h v0, v0, v0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov.s v0[1], v1[0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_perfectshuffle_dupext_v4f16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: trn1.4h v0, v0, v0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: mov.s v0[1], v1[0] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov.d v0[1], v1[0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %r = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> ret <4 x half> %r } diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll index 0081a28ab10fda..dd9ccd771c7a1d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -1,13 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for test_vrev64D8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vrev64D16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vrev32D8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vrev32D16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vrev16D8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vrev64D8_undef +; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i32 @test_rev_w(i32 %a) nounwind { ; CHECK-LABEL: test_rev_w: @@ -303,22 +296,42 @@ define <4 x float> @test_vrev64Qf(ptr %A) nounwind { } define <8 x i8> @test_vrev32D8(ptr %A) nounwind { -; CHECK-LABEL: test_vrev32D8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: rev32.8b v0, v0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vrev32D8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: rev32.8b v0, v0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vrev32D8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: adrp x8, .LCPI19_0 +; CHECK-GI-NEXT: mov.d v0[1], v0[0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI19_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> ret <8 x i8> %tmp2 } define <4 x i16> @test_vrev32D16(ptr %A) nounwind { -; CHECK-LABEL: test_vrev32D16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: rev32.4h v0, v0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vrev32D16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: rev32.4h v0, v0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vrev32D16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: adrp x8, .LCPI20_0 +; CHECK-GI-NEXT: mov.d v0[1], v0[0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI20_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> ret <4 x i16> %tmp2 @@ -363,11 +376,21 @@ define <8 x i16> @test_vrev32Q16(ptr %A) nounwind { } define <8 x i8> @test_vrev16D8(ptr %A) nounwind { -; CHECK-LABEL: test_vrev16D8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: rev16.8b v0, v0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vrev16D8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: rev16.8b v0, v0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vrev16D8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: adrp x8, .LCPI23_0 +; CHECK-GI-NEXT: mov.d v0[1], v0[0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI23_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> ret <8 x i8> %tmp2