-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BasicAA] Scalable offset with scalable typesize. #80818
Conversation
@llvm/pr-subscribers-llvm-analysis Author: David Green (davemgreen) ChangesThis patch adds a simple alias analysis check for accesses that are scalable Full diff: https://github.com/llvm/llvm-project/pull/80818.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 19c4393add6ab..4898dae3dbe08 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1170,6 +1170,25 @@ AliasResult BasicAAResult::aliasGEP(
}
}
+ // VScale Alias Analysis - Given one scalable offset between accesses and a
+ // scalable typesize, we can divide each side by vscale, treating both values
+ // as a constant. We prove that Offset/vscale >= TypeSize/vscale.
+ if (DecompGEP1.VarIndices.size() == 1 && DecompGEP1.Offset.isZero() &&
+ PatternMatch::match(DecompGEP1.VarIndices[0].Val.V,
+ PatternMatch::m_VScale())) {
+ const VariableGEPIndex &ScalableVar = DecompGEP1.VarIndices[0];
+ APInt Scale =
+ ScalableVar.IsNegated ? -ScalableVar.Scale : ScalableVar.Scale;
+ LocationSize VLeftSize = Scale.isNegative() ? V1Size : V2Size;
+
+ // Note that we do not check that the typesize is scalable, as vscale >= 1
+ // so noalias still holds so long as the dependency distance is at least as
+ // big as the typesize.
+ if (VLeftSize.hasValue() &&
+ Scale.uge(VLeftSize.getValue().getKnownMinValue()))
+ return AliasResult::NoAlias;
+ }
+
// Bail on analysing scalable LocationSize
if (V1Size.isScalable() || V2Size.isScalable())
return AliasResult::MayAlias;
diff --git a/llvm/test/Analysis/BasicAA/vscale.ll b/llvm/test/Analysis/BasicAA/vscale.ll
index 3fff435463a6d..a8ed93be90ef4 100644
--- a/llvm/test/Analysis/BasicAA/vscale.ll
+++ b/llvm/test/Analysis/BasicAA/vscale.ll
@@ -309,6 +309,174 @@ define void @v1v2types(ptr %p) vscale_range(1,16) {
ret void
}
+; VScale intrinsic offset tests
+
+; CHECK-LABEL: vscale_neg_notscalable
+; CHECK-DAG: NoAlias: <4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG: MayAlias: <4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG: MayAlias: <4 x i32>* %p, <4 x i32>* %vm16m16
+; CHECK-DAG: NoAlias: <4 x i32>* %vm16, <4 x i32>* %vm16m16
+; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %vm16m16
+; CHECK-DAG: MayAlias: <4 x i32>* %m16pv16, <4 x i32>* %p
+; CHECK-DAG: NoAlias: <4 x i32>* %m16pv16, <4 x i32>* %vm16
+; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %m16pv16
+; CHECK-DAG: NoAlias: <4 x i32>* %m16pv16, <4 x i32>* %vm16m16
+define void @vscale_neg_notscalable(ptr %p) {
+ %v = call i64 @llvm.vscale.i64()
+ %vp = mul i64 %v, 16
+ %vm = mul i64 %v, -16
+ %vm16 = getelementptr i8, ptr %p, i64 %vm
+ %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
+ %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 -1
+ %m16pv16 = getelementptr i8, ptr %m16, i64 %vp
+ load <4 x i32>, ptr %p
+ load <4 x i32>, ptr %vm16
+ load <4 x i32>, ptr %m16
+ load <4 x i32>, ptr %vm16m16
+ load <4 x i32>, ptr %m16pv16
+ ret void
+}
+
+; CHECK-LABEL: vscale_neg_scalable
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %vm16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %p
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %m16pv16
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16m16
+define void @vscale_neg_scalable(ptr %p) {
+ %v = call i64 @llvm.vscale.i64()
+ %vp = mul i64 %v, 16
+ %vm = mul i64 %v, -16
+ %vm16 = getelementptr i8, ptr %p, i64 %vm
+ %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
+ %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 -1
+ %m16pv16 = getelementptr i8, ptr %m16, i64 %vp
+ load <vscale x 4 x i32>, ptr %p
+ load <vscale x 4 x i32>, ptr %vm16
+ load <vscale x 4 x i32>, ptr %m16
+ load <vscale x 4 x i32>, ptr %vm16m16
+ load <vscale x 4 x i32>, ptr %m16pv16
+ ret void
+}
+
+; CHECK-LABEL: vscale_pos_notscalable
+; CHECK-DAG: NoAlias: <4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG: MayAlias: <4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG: MayAlias: <4 x i32>* %p, <4 x i32>* %vm16m16
+; CHECK-DAG: NoAlias: <4 x i32>* %vm16, <4 x i32>* %vm16m16
+; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %vm16m16
+; CHECK-DAG: MayAlias: <4 x i32>* %m16pv16, <4 x i32>* %p
+; CHECK-DAG: NoAlias: <4 x i32>* %m16pv16, <4 x i32>* %vm16
+; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %m16pv16
+; CHECK-DAG: NoAlias: <4 x i32>* %m16pv16, <4 x i32>* %vm16m16
+define void @vscale_pos_notscalable(ptr %p) {
+ %v = call i64 @llvm.vscale.i64()
+ %vp = mul i64 %v, 16
+ %vm = mul i64 %v, -16
+ %vm16 = getelementptr i8, ptr %p, i64 %vp
+ %m16 = getelementptr <4 x i32>, ptr %p, i64 1
+ %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 1
+ %m16pv16 = getelementptr i8, ptr %m16, i64 %vm
+ load <4 x i32>, ptr %p
+ load <4 x i32>, ptr %vm16
+ load <4 x i32>, ptr %m16
+ load <4 x i32>, ptr %vm16m16
+ load <4 x i32>, ptr %m16pv16
+ ret void
+}
+
+; CHECK-LABEL: vscale_pos_scalable
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %vm16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16m16
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %p
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %m16pv16
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %m16pv16, <vscale x 4 x i32>* %vm16m16
+define void @vscale_pos_scalable(ptr %p) {
+ %v = call i64 @llvm.vscale.i64()
+ %vp = mul i64 %v, 16
+ %vm = mul i64 %v, -16
+ %vm16 = getelementptr i8, ptr %p, i64 %vp
+ %m16 = getelementptr <4 x i32>, ptr %p, i64 1
+ %vm16m16 = getelementptr <4 x i32>, ptr %vm16, i64 1
+ %m16pv16 = getelementptr i8, ptr %m16, i64 %vm
+ load <vscale x 4 x i32>, ptr %p
+ load <vscale x 4 x i32>, ptr %vm16
+ load <vscale x 4 x i32>, ptr %m16
+ load <vscale x 4 x i32>, ptr %vm16m16
+ load <vscale x 4 x i32>, ptr %m16pv16
+ ret void
+}
+
+; CHECK-LABEL: vscale_v1v2types
+; CHECK-DAG: MustAlias: <4 x i32>* %p, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG: NoAlias: <4 x i32>* %p, <vscale x 4 x i32>* %vm16
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG: NoAlias: <4 x i32>* %p, <4 x i32>* %vm16
+; CHECK-DAG: MustAlias: <4 x i32>* %vm16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG: NoAlias: <4 x i32>* %m16, <vscale x 4 x i32>* %p
+; CHECK-DAG: NoAlias: <4 x i32>* %m16, <4 x i32>* %p
+; CHECK-DAG: MayAlias: <4 x i32>* %m16, <vscale x 4 x i32>* %vm16
+; CHECK-DAG: MayAlias: <4 x i32>* %m16, <4 x i32>* %vm16
+; CHECK-DAG: MustAlias: <4 x i32>* %m16, <vscale x 4 x i32>* %m16
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %p, <vscale x 4 x i32>* %vp16
+; CHECK-DAG: NoAlias: <4 x i32>* %p, <vscale x 4 x i32>* %vp16
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %vm16, <vscale x 4 x i32>* %vp16
+; CHECK-DAG: NoAlias: <4 x i32>* %vm16, <vscale x 4 x i32>* %vp16
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %m16, <vscale x 4 x i32>* %vp16
+; CHECK-DAG: MayAlias: <4 x i32>* %m16, <vscale x 4 x i32>* %vp16
+define void @vscale_v1v2types(ptr %p) {
+ %v = call i64 @llvm.vscale.i64()
+ %vp = mul i64 %v, 16
+ %vm = mul i64 %v, -16
+ %vp16 = getelementptr i8, ptr %p, i64 %vp
+ %vm16 = getelementptr i8, ptr %p, i64 %vm
+ %m16 = getelementptr <4 x i32>, ptr %p, i64 -1
+ load <vscale x 4 x i32>, ptr %p
+ load <4 x i32>, ptr %p
+ load <vscale x 4 x i32>, ptr %vm16
+ load <4 x i32>, ptr %vm16
+ load <vscale x 4 x i32>, ptr %m16
+ load <4 x i32>, ptr %m16
+ load <vscale x 4 x i32>, ptr %vp16
+ ret void
+}
+
+; CHECK-LABEL: twovscales
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %vp161, <vscale x 4 x i32>* %vp162
+; CHECK-DAG: NoAlias: <vscale x 4 x i32>* %vp161, <vscale x 4 x i32>* %vp161b
+; CHECK-DAG: MayAlias: <vscale x 4 x i32>* %vp161b, <vscale x 4 x i32>* %vp162
+define void @twovscales(ptr %p) {
+ %v1 = call i64 @llvm.vscale.i64()
+ %v2 = call i64 @llvm.vscale.i64()
+ %vp1 = mul i64 %v1, 16
+ %vp2 = mul i64 %v2, 16
+ %vp3 = mul i64 %v1, 17
+ %vp161 = getelementptr i8, ptr %p, i64 %vp1
+ %vp162 = getelementptr i8, ptr %p, i64 %vp2
+ %vp161b = getelementptr i8, ptr %vp161, i64 %vp3
+ load <vscale x 4 x i32>, ptr %vp161
+ load <vscale x 4 x i32>, ptr %vp162
+ load <vscale x 4 x i32>, ptr %vp161b
+ ret void
+}
+
; getelementptr recursion
; CHECK-LABEL: gep_recursion_level_1
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We essentially divide each side by vscale and are left needing to check that the offset >= typesize.
Is that correct without the IsNSW flag?
Hmm. Yeah maybe. I was thinking of offsets similar to typesizes, and it would be a strange access than might wrap around depending on the vscale. I'll add a IsNSW check if you think that's better - it looks like a few cases in the tests get a little worse, but we might be able to improve it in the future with vscale_range to show the extreme would not overflow. |
4004e4d
to
b1a2dbe
Compare
Added a check for |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
if (DecompGEP1.VarIndices.size() == 1 && DecompGEP1.VarIndices[0].IsNSW && | ||
DecompGEP1.Offset.isZero() && | ||
PatternMatch::match(DecompGEP1.VarIndices[0].Val.V, | ||
PatternMatch::m_VScale())) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we also need DecompGEP1.VarIndices[0].Val.TruncBits == 0
.
This patch adds a simple alias analysis check for accesses that are scalable with a offset between them that is also trivially scalable (there are no other constant/variable offsets). We essentially divide each side by vscale and are left needing to check that the offset >= typesize.
b1a2dbe
to
399eee3
Compare
This extends llvm#80818 when IsNSW is lost (possibly due to looking through multiple GEPs), to check the vscale_range for an access that will not overflow even with the maximum range.
This is a fix for llvm#80818, as pointed out in llvm#81144 it should be checking the abs of Scale. The added test changes from NoAlias to MayAlias.
This extends llvm#80818 when IsNSW is lost (possibly due to looking through multiple GEPs), to check the vscale_range for an access that will not overflow even with the maximum range.
This extends llvm#80818 when IsNSW is lost (possibly due to looking through multiple GEPs), to check the vscale_range for an access that will not overflow even with the maximum range.
This extends #80818 when IsNSW is lost (possibly due to looking through multiple GEPs), to check the vscale_range for an access that will not overflow even with the maximum range.
This patch adds a simple alias analysis check for accesses that are scalable
with a offset between them that is also trivially scalable (there are no other
constant/variable offsets). We essentially divide each side by vscale and are
left needing to check that the offset >= typesize.