-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Handle more scalable geps in EmitGEPOffset #71699
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: David Green (davemgreen) ChangesFollowing up on #71565, this makes scalable splats in EmitGEPOffset use the ElementCount as opposed to assuming it is fixed width, and attempts to handle scalable offsets with vector geps by splatting the vscale to each vector lane. This meant performing the mul by scale separately from CreateVScale. Full diff: https://github.com/llvm/llvm-project/pull/71699.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/Local.cpp b/llvm/lib/Analysis/Local.cpp
index ded6007663845e0..080b461faa53481 100644
--- a/llvm/lib/Analysis/Local.cpp
+++ b/llvm/lib/Analysis/Local.cpp
@@ -66,16 +66,26 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
// Splat the index if needed.
if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy())
Op = Builder->CreateVectorSplat(
- cast<FixedVectorType>(IntIdxTy)->getNumElements(), Op);
+ cast<VectorType>(IntIdxTy)->getElementCount(), Op);
// Convert to correct type.
if (Op->getType() != IntIdxTy)
Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName() + ".c");
if (Size != 1 || TSize.isScalable()) {
// We'll let instcombine(mul) convert this to a shl if possible.
- auto *ScaleC = ConstantInt::get(IntIdxTy, Size);
- Value *Scale =
- !TSize.isScalable() ? ScaleC : Builder->CreateVScale(ScaleC);
+ Value *Scale = ConstantInt::get(IntIdxTy, Size);
+ if (TSize.isScalable()) {
+ Value *VScale;
+ if (IntIdxTy->isVectorTy()) {
+ VScale = Builder->CreateVScale(ConstantInt::get(
+ cast<VectorType>(IntIdxTy)->getElementType(), 1));
+ VScale = Builder->CreateVectorSplat(
+ cast<VectorType>(IntIdxTy)->getElementCount(), VScale);
+ } else {
+ VScale = Builder->CreateVScale(ConstantInt::get(IntIdxTy, 1));
+ }
+ Scale = Builder->CreateMul(VScale, Scale);
+ }
Op = Builder->CreateMul(Op, Scale, GEP->getName() + ".idx", false /*NUW*/,
isInBounds /*NSW*/);
}
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 752dd6f6877dd58..b7ee5bba6eef7b0 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -233,6 +233,59 @@ define <2 x i1> @test13_vector2(i64 %X, <2 x ptr> %P) nounwind {
ret <2 x i1> %C
}
+define <2 x i1> @test13_fixed_fixed(i64 %X, ptr %P, <2 x i64> %y) nounwind {
+; CHECK-LABEL: @test13_fixed_fixed(
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], <i64 3, i64 0>
+; CHECK-NEXT: [[A_IDX:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw <2 x i64> [[Y:%.*]], <i64 4, i64 4>
+; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[A_IDX]], [[B_IDX]]
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %A = getelementptr inbounds <2 x i64>, ptr %P, <2 x i64> zeroinitializer, i64 %X
+ %B = getelementptr inbounds <2 x i64>, ptr %P, <2 x i64> %y
+ %C = icmp eq <2 x ptr> %A, %B
+ ret <2 x i1> %C
+}
+
+define <2 x i1> @test13_fixed_scalable(i64 %X, ptr %P, <2 x i64> %y) nounwind {
+; CHECK-LABEL: @test13_fixed_scalable(
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], <i64 3, i64 0>
+; CHECK-NEXT: [[A_IDX:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[DOTSPLATINSERT1]], <i64 4, i64 0>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[B_IDX:%.*]] = mul nsw <2 x i64> [[TMP4]], [[Y:%.*]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[A_IDX]], [[B_IDX]]
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %A = getelementptr inbounds <vscale x 2 x i64>, ptr %P, <2 x i64> zeroinitializer, i64 %X
+ %B = getelementptr inbounds <vscale x 2 x i64>, ptr %P, <2 x i64> %y
+ %C = icmp eq <2 x ptr> %A, %B
+ ret <2 x i1> %C
+}
+
+define <vscale x 2 x i1> @test13_scalable_scalable(i64 %X, ptr %P, <vscale x 2 x i64> %y) nounwind {
+; CHECK-LABEL: @test13_scalable_scalable(
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[X:%.*]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[A_IDX:%.*]] = shl nsw <vscale x 2 x i64> [[DOTSPLAT]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 3, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP1]], i64 0
+; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = shl <vscale x 2 x i64> [[DOTSPLAT2]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 4, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[B_IDX:%.*]] = mul nsw <vscale x 2 x i64> [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq <vscale x 2 x i64> [[A_IDX]], [[B_IDX]]
+; CHECK-NEXT: ret <vscale x 2 x i1> [[C]]
+;
+ %A = getelementptr inbounds <vscale x 2 x i64>, ptr %P, <vscale x 2 x i64> zeroinitializer, i64 %X
+ %B = getelementptr inbounds <vscale x 2 x i64>, ptr %P, <vscale x 2 x i64> %y
+ %C = icmp eq <vscale x 2 x ptr> %A, %B
+ ret <vscale x 2 x i1> %C
+}
+
; This is a test of icmp + shl nuw in disguise - 4611... is 0x3fff...
define <2 x i1> @test13_vector3(i64 %X, <2 x ptr> %P) nounwind {
; CHECK-LABEL: @test13_vector3(
|
llvm/lib/Analysis/Local.cpp
Outdated
VScale = Builder->CreateVScale(ConstantInt::get( | ||
cast<VectorType>(IntIdxTy)->getElementType(), 1)); | ||
VScale = Builder->CreateVectorSplat( | ||
cast<VectorType>(IntIdxTy)->getElementCount(), VScale); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we instead move the above splat creation until after the multiply? I.e. calculate on scalars and then splat?
23f73b3
to
b6bab71
Compare
It appears that the |
✅ With the latest revision this PR passed the C/C++ code formatter. |
Following up on llvm#71565, this makes scalable splats in EmitGEPOffset use the ElementCount as opposed to assuming it is fixed width, and attempts to handle scalable offsets with vector geps by splatting the vscale to each vector lane. It appears that the `& PtrSizeMask` can be removed without altering any of the tests or any of the test I tried across AArch64/Arm.
b6bab71
to
cce0982
Compare
I think this is okay. I confirmed that the APInt uint64_t ctor will implicitly truncate the value, rather than asserting. As such, an explicit mask should not be necessary. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
llvm/lib/Analysis/Local.cpp
Outdated
if (Constant *OpC = dyn_cast<Constant>(Op)) { | ||
if (OpC->isZeroValue()) | ||
continue; | ||
|
||
// Handle a struct index, which adds its field offset to the pointer. | ||
if (StructType *STy = GTI.getStructTypeOrNull()) { | ||
uint64_t OpValue = OpC->getUniqueInteger().getZExtValue(); | ||
Size = DL.getStructLayout(STy)->getElementOffset(OpValue); | ||
unsigned Size = DL.getStructLayout(STy)->getElementOffset(OpValue); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unsigned Size = DL.getStructLayout(STy)->getElementOffset(OpValue); | |
uint64_t Size = DL.getStructLayout(STy)->getElementOffset(OpValue); |
Following up on llvm#71565, this makes scalable splats in EmitGEPOffset use the ElementCount as opposed to assuming it is fixed width, and attempts to handle scalable offsets with vector geps by splatting the vscale to each vector lane.
Following up on #71565, this makes scalable splats in EmitGEPOffset use the ElementCount as opposed to assuming it is fixed width, and attempts to handle scalable offsets with vector geps by splatting the vscale to each vector lane. This meant performing the mul by scale separately from CreateVScale.