Skip to content

Commit

Permalink
[AArch64][GISel] Expand G_DUP and G_DUPLANE to v8s8 and v4s16
Browse files Browse the repository at this point in the history
This fills in the gaps with v8s8 and v4s8 vectors for G_DUP and G_DUPLANE,
using the existing code that is generalized to more types.
  • Loading branch information
davemgreen committed Aug 4, 2023
1 parent 19d7ab1 commit bbe945b
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 54 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -763,8 +763,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// to be the same size as the dest.
if (DstTy != SrcTy)
return false;
return llvm::is_contained({v2s32, v4s32, v2s64, v2p0, v16s8, v8s16},
DstTy);
return llvm::is_contained(
{v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
})
// G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
// just want those lowered into G_BUILD_VECTOR
Expand Down
15 changes: 8 additions & 7 deletions llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -720,9 +720,13 @@ bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
case 4:
if (ScalarSize == 32)
Opc = AArch64::G_DUPLANE32;
else if (ScalarSize == 16)
Opc = AArch64::G_DUPLANE16;
break;
case 8:
if (ScalarSize == 16)
if (ScalarSize == 8)
Opc = AArch64::G_DUPLANE8;
else if (ScalarSize == 16)
Opc = AArch64::G_DUPLANE16;
break;
case 16:
Expand Down Expand Up @@ -752,13 +756,10 @@ void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
Register DupSrc = MI.getOperand(1).getReg();
// For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
// To do this, we can use a G_CONCAT_VECTORS to do the widening.
if (SrcTy == LLT::fixed_vector(2, LLT::scalar(32))) {
assert(MRI.getType(MI.getOperand(0).getReg()).getNumElements() == 2 &&
"Unexpected dest elements");
if (SrcTy.getSizeInBits() == 64) {
auto Undef = B.buildUndef(SrcTy);
DupSrc = B.buildConcatVectors(
SrcTy.changeElementCount(ElementCount::getFixed(4)),
{Src1Reg, Undef.getReg(0)})
DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
{Src1Reg, Undef.getReg(0)})
.getReg(0);
}
B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
Expand Down
59 changes: 37 additions & 22 deletions llvm/test/CodeGen/AArch64/arm64-dup.ll
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; CHECK-GI: warning: Instruction selection used fallback path for v_shuffledup8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v_shuffledup16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vduplane8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vduplane16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_perfectshuffle_dupext_v4i16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_perfectshuffle_dupext_v4f16
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define <8 x i8> @v_dup8(i8 %A) nounwind {
; CHECK-LABEL: v_dup8:
Expand Down Expand Up @@ -417,25 +410,47 @@ entry:
; Also test the DUP path in the PerfectShuffle generator.

define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
; CHECK-LABEL: test_perfectshuffle_dupext_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: trn1.4h v0, v0, v0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mov.s v0[1], v1[0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
; CHECK-SD-LABEL: test_perfectshuffle_dupext_v4i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: trn1.4h v0, v0, v0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: mov.s v0[1], v1[0]
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI33_0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI33_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
ret <4 x i16> %r
}

define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b) nounwind {
; CHECK-LABEL: test_perfectshuffle_dupext_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: trn1.4h v0, v0, v0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mov.s v0[1], v1[0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
; CHECK-SD-LABEL: test_perfectshuffle_dupext_v4f16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: trn1.4h v0, v0, v0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: mov.s v0[1], v1[0]
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%r = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
ret <4 x half> %r
}
Expand Down
69 changes: 46 additions & 23 deletions llvm/test/CodeGen/AArch64/arm64-rev.ll
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; CHECK-GI: warning: Instruction selection used fallback path for test_vrev64D8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vrev64D16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vrev32D8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vrev32D16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vrev16D8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vrev64D8_undef
; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define i32 @test_rev_w(i32 %a) nounwind {
; CHECK-LABEL: test_rev_w:
Expand Down Expand Up @@ -303,22 +296,42 @@ define <4 x float> @test_vrev64Qf(ptr %A) nounwind {
}

define <8 x i8> @test_vrev32D8(ptr %A) nounwind {
; CHECK-LABEL: test_vrev32D8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: rev32.8b v0, v0
; CHECK-NEXT: ret
; CHECK-SD-LABEL: test_vrev32D8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: rev32.8b v0, v0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_vrev32D8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: adrp x8, .LCPI19_0
; CHECK-GI-NEXT: mov.d v0[1], v0[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI19_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x i8> %tmp2
}

define <4 x i16> @test_vrev32D16(ptr %A) nounwind {
; CHECK-LABEL: test_vrev32D16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: rev32.4h v0, v0
; CHECK-NEXT: ret
; CHECK-SD-LABEL: test_vrev32D16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: rev32.4h v0, v0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_vrev32D16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: adrp x8, .LCPI20_0
; CHECK-GI-NEXT: mov.d v0[1], v0[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI20_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%tmp1 = load <4 x i16>, ptr %A
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x i16> %tmp2
Expand Down Expand Up @@ -363,11 +376,21 @@ define <8 x i16> @test_vrev32Q16(ptr %A) nounwind {
}

define <8 x i8> @test_vrev16D8(ptr %A) nounwind {
; CHECK-LABEL: test_vrev16D8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: rev16.8b v0, v0
; CHECK-NEXT: ret
; CHECK-SD-LABEL: test_vrev16D8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: rev16.8b v0, v0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_vrev16D8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: adrp x8, .LCPI23_0
; CHECK-GI-NEXT: mov.d v0[1], v0[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI23_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%tmp1 = load <8 x i8>, ptr %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x i8> %tmp2
Expand Down

0 comments on commit bbe945b

Please sign in to comment.