-
Notifications
You must be signed in to change notification settings - Fork 10.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][GlobalISel] Improve legalization of G_PTR_ADD #91763
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: David Green (davemgreen) ChangesThe testing we have for vector ptradd was a bit lacking. In adding tests this patch found a couple of issues mostly with the way v3 vectors of ptrs were sometimes legalized via i64, and with non-i64 additions. It does not attempt to fix the issue with mergevalues from returning vector ptrs. Full diff: https://github.com/llvm/llvm-project/pull/91763.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 6a76ad7f5db74..7fb09f768a71b 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -69,8 +69,9 @@ getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
unsigned EltSize = OrigTy.getScalarSizeInBits();
if (LeftoverSize % EltSize != 0)
return {-1, -1};
- LeftoverTy = LLT::scalarOrVector(
- ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
+ LeftoverTy =
+ LLT::scalarOrVector(ElementCount::getFixed(LeftoverSize / EltSize),
+ OrigTy.getElementType());
} else {
LeftoverTy = LLT::scalar(LeftoverSize);
}
@@ -212,7 +213,7 @@ void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
appendVectorElts(AllElts, PartRegs[i]);
Register Leftover = PartRegs[PartRegs.size() - 1];
- if (MRI.getType(Leftover).isScalar())
+ if (MRI.getType(Leftover).isScalar() || MRI.getType(Leftover).isPointer())
AllElts.push_back(Leftover);
else
appendVectorElts(AllElts, Leftover);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index d4aac94d24f12..9772f67bf9f30 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -177,9 +177,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_PTR_ADD)
.legalFor({{p0, s64}, {v2p0, v2s64}})
- .clampScalar(1, s64, s64)
- .clampNumElements(0, v2p0, v2p0)
- .clampNumElements(1, v2s64, v2s64);
+ .clampScalarOrElt(1, s64, s64)
+ .clampNumElements(0, v2p0, v2p0);
getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
diff --git a/llvm/test/CodeGen/AArch64/ptradd.ll b/llvm/test/CodeGen/AArch64/ptradd.ll
new file mode 100644
index 0000000000000..9b46eca79f23e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptradd.ll
@@ -0,0 +1,221 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64-none-eabi -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; Note: these tests use stores instead of returns as the return handling for
+; vector ptrs is currently sometimes create invalid merge values.
+
+define void @vector_gep_i32(ptr %b, i32 %off, ptr %p) {
+; CHECK-LABEL: vector_gep_i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add x8, x0, w1, sxtw
+; CHECK-NEXT: str x8, [x2]
+; CHECK-NEXT: ret
+entry:
+ %g = getelementptr i8, ptr %b, i32 %off
+ store ptr %g, ptr %p
+ ret void
+}
+
+define void @vector_gep_i64(ptr %b, i64 %off, ptr %p) {
+; CHECK-LABEL: vector_gep_i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add x8, x0, x1
+; CHECK-NEXT: str x8, [x2]
+; CHECK-NEXT: ret
+entry:
+ %g = getelementptr i8, ptr %b, i64 %off
+ store ptr %g, ptr %p
+ ret void
+}
+
+define void @vector_gep_v1i32(<1 x ptr> %b, <1 x i32> %off, ptr %p) {
+; CHECK-SD-LABEL: vector_gep_v1i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: shl d1, d1, #32
+; CHECK-SD-NEXT: ssra d0, d1, #32
+; CHECK-SD-NEXT: str d0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vector_gep_v1i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov w8, s1
+; CHECK-GI-NEXT: fmov x9, d0
+; CHECK-GI-NEXT: add x8, x9, w8, sxtw
+; CHECK-GI-NEXT: str x8, [x0]
+; CHECK-GI-NEXT: ret
+entry:
+ %g = getelementptr i8, <1 x ptr> %b, <1 x i32> %off
+ store <1 x ptr> %g, ptr %p
+ ret void
+}
+
+define void @vector_gep_v2i32(<2 x ptr> %b, <2 x i32> %off, ptr %p) {
+; CHECK-LABEL: vector_gep_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: saddw v0.2d, v0.2d, v1.2s
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %g = getelementptr i8, <2 x ptr> %b, <2 x i32> %off
+ store <2 x ptr> %g, ptr %p
+ ret void
+}
+
+define void @vector_gep_v3i32(<3 x ptr> %b, <3 x i32> %off, ptr %p) {
+; CHECK-SD-LABEL: vector_gep_v3i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: saddw2 v2.2d, v2.2d, v3.4s
+; CHECK-SD-NEXT: str d2, [x0, #16]
+; CHECK-SD-NEXT: saddw v0.2d, v0.2d, v3.2s
+; CHECK-SD-NEXT: str q0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vector_gep_v3i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: smov x8, v3.s[0]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: smov x9, v3.s[1]
+; CHECK-GI-NEXT: mov s3, v3.s[2]
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: fmov d1, x8
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: fmov w9, s3
+; CHECK-GI-NEXT: add x8, x8, w9, sxtw
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: str x8, [x0, #16]
+; CHECK-GI-NEXT: str q0, [x0]
+; CHECK-GI-NEXT: ret
+entry:
+ %g = getelementptr i8, <3 x ptr> %b, <3 x i32> %off
+ store <3 x ptr> %g, ptr %p
+ ret void
+}
+
+define void @vector_gep_v4i32(<4 x ptr> %b, <4 x i32> %off, ptr %p) {
+; CHECK-SD-LABEL: vector_gep_v4i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: saddw2 v1.2d, v1.2d, v2.4s
+; CHECK-SD-NEXT: saddw v0.2d, v0.2d, v2.2s
+; CHECK-SD-NEXT: stp q0, q1, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vector_gep_v4i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: saddw v0.2d, v0.2d, v2.2s
+; CHECK-GI-NEXT: saddw2 v1.2d, v1.2d, v2.4s
+; CHECK-GI-NEXT: stp q0, q1, [x0]
+; CHECK-GI-NEXT: ret
+entry:
+ %g = getelementptr i8, <4 x ptr> %b, <4 x i32> %off
+ store <4 x ptr> %g, ptr %p
+ ret void
+}
+
+define void @vector_gep_v1i64(<1 x ptr> %b, <1 x i64> %off, ptr %p) {
+; CHECK-SD-LABEL: vector_gep_v1i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: add d0, d0, d1
+; CHECK-SD-NEXT: str d0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vector_gep_v1i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: add x8, x8, x9
+; CHECK-GI-NEXT: str x8, [x0]
+; CHECK-GI-NEXT: ret
+entry:
+ %g = getelementptr i8, <1 x ptr> %b, <1 x i64> %off
+ store <1 x ptr> %g, ptr %p
+ ret void
+}
+
+define void @vector_gep_v2i64(<2 x ptr> %b, <2 x i64> %off, ptr %p) {
+; CHECK-LABEL: vector_gep_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %g = getelementptr i8, <2 x ptr> %b, <2 x i64> %off
+ store <2 x ptr> %g, ptr %p
+ ret void
+}
+
+define void @vector_gep_v3i64(<3 x ptr> %b, <3 x i64> %off, ptr %p) {
+; CHECK-SD-LABEL: vector_gep_v3i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: add d1, d2, d5
+; CHECK-SD-NEXT: str d1, [x0, #16]
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v3.2d
+; CHECK-SD-NEXT: str q0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vector_gep_v3i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: fmov x9, d5
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT: add x8, x8, x9
+; CHECK-GI-NEXT: str x8, [x0, #16]
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v3.2d
+; CHECK-GI-NEXT: str q0, [x0]
+; CHECK-GI-NEXT: ret
+entry:
+ %g = getelementptr i8, <3 x ptr> %b, <3 x i64> %off
+ store <3 x ptr> %g, ptr %p
+ ret void
+}
+
+define void @vector_gep_v4i64(<4 x ptr> %b, <4 x i64> %off, ptr %p) {
+; CHECK-SD-LABEL: vector_gep_v4i64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: stp q0, q1, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: vector_gep_v4i64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT: stp q0, q1, [x0]
+; CHECK-GI-NEXT: ret
+entry:
+ %g = getelementptr i8, <4 x ptr> %b, <4 x i64> %off
+ store <4 x ptr> %g, ptr %p
+ ret void
+}
+
+define void @vector_gep_v4i128(<2 x ptr> %b, <2 x i128> %off, ptr %p) {
+; CHECK-LABEL: vector_gep_v4i128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d1, x0
+; CHECK-NEXT: mov v1.d[1], x2
+; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: str q0, [x4]
+; CHECK-NEXT: ret
+entry:
+ %g = getelementptr i8, <2 x ptr> %b, <2 x i128> %off
+ store <2 x ptr> %g, ptr %p
+ ret void
+}
|
Thanks! |
The testing we have for vector ptradd was a bit lacking. In adding tests this patch found a couple of issues mostly with the way v3 vectors of ptrs were sometimes legalized via i64, and with non-i64 additions.
The testing we have for vector ptradd was a bit lacking. In adding tests this patch found a couple of issues mostly with the way v3 vectors of ptrs were sometimes legalized via i64, and with non-i64 additions. It does not attempt to fix the issue with mergevalues from returning vector ptrs.
The testing we have for vector ptradd was a bit lacking. In adding tests this patch found a couple of issues mostly with the way v3 vectors of ptrs were sometimes legalized via i64, and with non-i64 additions. It does not attempt to fix the issue with mergevalues from returning vector ptrs.
The testing we have for vector ptradd was a bit lacking. In adding tests this patch found a couple of issues mostly with the way v3 vectors of ptrs were sometimes legalized via i64, and with non-i64 additions. It does not attempt to fix the issue with mergevalues from returning vector ptrs.