-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][GlobalISel] Full reverse shuffles. #119083
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesA full shuffle reverse needs to use EXT+REV64. This adds handling for v8s16 and v16s8 types to match SDAG. Other types should be handled by perfect shuffles. Full diff: https://github.com/llvm/llvm-project/pull/119083.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1b1d81fcd07a2b..3d2507fc1f25c4 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -131,6 +131,13 @@ def ext: GICombineRule <
(apply [{ applyEXT(*${root}, ${matchinfo}); }])
>;
+def fullrev: GICombineRule <
+ (defs root:$root, shuffle_matchdata:$matchinfo),
+ (match (G_SHUFFLE_VECTOR $src, $src1, $src2, $mask):$root,
+ [{ return matchFullRev(*${root}, MRI); }]),
+ (apply [{ applyFullRev(*${root}, MRI); }])
+>;
+
def insertelt_nonconst: GICombineRule <
(defs root:$root, shuffle_matchdata:$matchinfo),
(match (wip_match_opcode G_INSERT_VECTOR_ELT):$root,
@@ -163,7 +170,7 @@ def form_duplane : GICombineRule <
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
>;
-def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
+def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
form_duplane, shuf_to_ins]>;
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 56d70ffdece713..244c0686750837 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -405,6 +405,28 @@ void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
MI.eraseFromParent();
}
+bool matchFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ Register V1 = MI.getOperand(1).getReg();
+ auto Mask = MI.getOperand(3).getShuffleMask();
+ return (DstTy == LLT::fixed_vector(16, 8) ||
+ DstTy == LLT::fixed_vector(8, 16)) &&
+ DstTy == MRI.getType(V1) &&
+ ShuffleVectorInst::isReverseMask(Mask, Mask.size());
+}
+
+void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ MachineIRBuilder MIRBuilder(MI);
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);
+ auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {MRI.getType(Dst)}, {Src});
+ MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
+ MI.eraseFromParent();
+}
+
bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
index 0ce92a20fb3a17..a3bc2b58d708f3 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -407,11 +407,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
;
; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI13_0
-; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
entry:
@@ -460,11 +459,10 @@ define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
;
; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI15_0
-; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI15_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
+; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
index 89838391956f29..7b0e0cfe04c010 100644
--- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
@@ -60,19 +60,11 @@ entry:
}
define <8 x i16> @v8i16(<8 x i16> %a) {
-; CHECK-SD-LABEL: v8i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI4_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.8h, v0.8h
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x i16> %V128
@@ -112,19 +104,11 @@ entry:
}
define <16 x i8> @v16i8(<16 x i8> %a) {
-; CHECK-SD-LABEL: v16i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.16b, v0.16b
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI7_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI7_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.16b, v0.16b
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <16 x i8> %V128
@@ -203,19 +187,11 @@ entry:
}
define <8 x half> @v8f16(<8 x half> %a) {
-; CHECK-SD-LABEL: v8f16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v8f16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI13_0
-; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
-; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v8f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: rev64 v0.8h, v0.8h
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: ret
entry:
%V128 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x half> %V128
|
| @@ -405,6 +405,28 @@ void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { | |||
| MI.eraseFromParent(); | |||
| } | |||
|
|
|||
| bool matchFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) { | |||
| assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GShuffleVector *Shuffle = cast<GShuffleVector>(&MI);
| def fullrev: GICombineRule < | ||
| (defs root:$root, shuffle_matchdata:$matchinfo), | ||
| (match (G_SHUFFLE_VECTOR $src, $src1, $src2, $mask):$root, | ||
| [{ return matchFullRev(*${root}, MRI); }]), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Predicate seems like it can go in the generic combiner, other than the type restrictions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the types should be OK - v4i32 are either handled here or by perfect shuffles and the other types should be handled by other combines as far as I can tell.
A full shuffle reverse needs to use EXT+REV64. This adds handling for v8s16 and v16s8 types to match SDAG. Other types should be handled by perfect shuffles.
dda0d62 to
eb4bc5e
Compare
A full shuffle reverse needs to use EXT+REV64. This adds handling for v8s16 and v16s8 types to match SDAG. Other types should be handled by perfect shuffles.