Skip to content

Commit

Permalink
[SLP] Check with target before vectorizing GEP Indices.
Browse files Browse the repository at this point in the history
The target hook prefersVectorizedAddressing() already exists to check with
target if address computations should be vectorized, so it seems like this
should be used in SLPVectorizer as well.

Reviewed By: ABataev, RKSimon

Differential Revision: https://reviews.llvm.org/D144128
  • Loading branch information
JonPsson committed Feb 23, 2023
1 parent 8347ca7 commit 1387a13
Show file tree
Hide file tree
Showing 10 changed files with 775 additions and 397 deletions.
25 changes: 15 additions & 10 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Expand Up @@ -5191,6 +5191,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Depth](ArrayRef<Value *> VL) {
if (!S.getOpcode() || !S.isAltShuffle() || VL.size() > 2)
return false;
if (S.getOpcode() == Instruction::GetElementPtr &&
!TTI->prefersVectorizedAddressing())
return true;
if (VectorizableTree.size() < MinTreeSize)
return false;
if (Depth >= RecursionMaxDepth - 1)
Expand Down Expand Up @@ -11873,21 +11876,23 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
if (!isValidElementType(SI->getValueOperand()->getType()))
continue;
Stores[getUnderlyingObject(SI->getPointerOperand())].push_back(SI);
continue;
}

// Ignore getelementptr instructions that have more than one index, a
// constant index, or a pointer operand that doesn't point to a scalar
// type.
else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
auto Idx = GEP->idx_begin()->get();
if (GEP->getNumIndices() > 1 || isa<Constant>(Idx))
continue;
if (!isValidElementType(Idx->getType()))
continue;
if (GEP->getType()->isVectorTy())
continue;
GEPs[GEP->getPointerOperand()].push_back(GEP);
}
if (TTI->prefersVectorizedAddressing())
if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
auto Idx = GEP->idx_begin()->get();
if (GEP->getNumIndices() > 1 || isa<Constant>(Idx))
continue;
if (!isValidElementType(Idx->getType()))
continue;
if (GEP->getType()->isVectorTy())
continue;
GEPs[GEP->getPointerOperand()].push_back(GEP);
}
}
}

Expand Down
51 changes: 28 additions & 23 deletions llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
Expand Up @@ -12,18 +12,21 @@ define void @test1(<4 x i16> %a, <4 x i16> %b, ptr %p) {
; CHECK-NEXT: [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32>
; CHECK-NEXT: [[SUB0:%.*]] = sub <4 x i32> [[Z0]], [[Z1]]
; CHECK-NEXT: [[TMP0:%.*]] = sext <4 x i32> [[SUB0]] to <4 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP1]]
; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x i32> [[SUB0]], i32 0
; CHECK-NEXT: [[S0:%.*]] = sext i32 [[E0]] to i64
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[S0]]
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP0]], i32 1
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP2]]
; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[SUB0]], i32 1
; CHECK-NEXT: [[S1:%.*]] = sext i32 [[E1]] to i64
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[S1]]
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP0]], i32 2
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP3]]
; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[SUB0]], i32 2
; CHECK-NEXT: [[S2:%.*]] = sext i32 [[E2]] to i64
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[S2]]
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP0]], i32 3
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP4]]
; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3
; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[S3]]
; CHECK-NEXT: [[LOAD3:%.*]] = load i64, ptr [[GEP3]], align 4
; CHECK-NEXT: call void @foo(i64 [[LOAD0]], i64 [[LOAD1]], i64 [[LOAD2]], i64 [[LOAD3]])
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -58,23 +61,25 @@ define void @test2(<4 x i16> %a, <4 x i16> %b, i64 %c0, i64 %c1, i64 %c2, i64 %c
; CHECK-NEXT: [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32>
; CHECK-NEXT: [[SUB0:%.*]] = sub <4 x i32> [[Z0]], [[Z1]]
; CHECK-NEXT: [[TMP0:%.*]] = sext <4 x i32> [[SUB0]] to <4 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[C0:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[C1:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[C2:%.*]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[C3:%.*]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP6]]
; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x i32> [[SUB0]], i32 0
; CHECK-NEXT: [[S0:%.*]] = sext i32 [[E0]] to i64
; CHECK-NEXT: [[A0:%.*]] = add i64 [[S0]], [[C0:%.*]]
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[A0]]
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP7]]
; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[SUB0]], i32 1
; CHECK-NEXT: [[S1:%.*]] = sext i32 [[E1]] to i64
; CHECK-NEXT: [[A1:%.*]] = add i64 [[S1]], [[C1:%.*]]
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[A1]]
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP8]]
; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[SUB0]], i32 2
; CHECK-NEXT: [[S2:%.*]] = sext i32 [[E2]] to i64
; CHECK-NEXT: [[A2:%.*]] = add i64 [[S2]], [[C2:%.*]]
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[A2]]
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP9]]
; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3
; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64
; CHECK-NEXT: [[A3:%.*]] = add i64 [[S3]], [[C3:%.*]]
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[A3]]
; CHECK-NEXT: [[LOAD3:%.*]] = load i64, ptr [[GEP3]], align 4
; CHECK-NEXT: call void @foo(i64 [[LOAD0]], i64 [[LOAD1]], i64 [[LOAD2]], i64 [[LOAD3]])
; CHECK-NEXT: ret void
Expand Down

0 comments on commit 1387a13

Please sign in to comment.