Skip to content

Commit

Permalink
[SLP]Fix PR85082: PHI node has multiple entries.
Browse files Browse the repository at this point in the history
Need to record casted extractelement for the externally used scalar, not
original extract instruction.
  • Loading branch information
alexey-bataev committed Mar 13, 2024
1 parent b966b22 commit 4dd186a
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 10 deletions.
30 changes: 20 additions & 10 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12539,7 +12539,9 @@ Value *BoUpSLP::vectorizeTree(
DenseMap<Value *, InsertElementInst *> VectorToInsertElement;
// Maps extract Scalar to the corresponding extractelement instruction in the
// basic block. Only one extractelement per block should be emitted.
DenseMap<Value *, DenseMap<BasicBlock *, Instruction *>> ScalarToEEs;
DenseMap<Value *,
DenseMap<BasicBlock *, std::pair<Instruction *, Instruction *>>>
ScalarToEEs;
SmallDenseSet<Value *, 4> UsedInserts;
DenseMap<std::pair<Value *, Type *>, Value *> VectorCasts;
SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
Expand Down Expand Up @@ -12568,18 +12570,23 @@ Value *BoUpSLP::vectorizeTree(
auto ExtractAndExtendIfNeeded = [&](Value *Vec) {
if (Scalar->getType() != Vec->getType()) {
Value *Ex = nullptr;
Value *ExV = nullptr;
auto It = ScalarToEEs.find(Scalar);
if (It != ScalarToEEs.end()) {
// No need to emit many extracts, just move the only one in the
// current block.
auto EEIt = It->second.find(Builder.GetInsertBlock());
if (EEIt != It->second.end()) {
Instruction *I = EEIt->second;
Instruction *I = EEIt->second.first;
if (Builder.GetInsertPoint() != Builder.GetInsertBlock()->end() &&
Builder.GetInsertPoint()->comesBefore(I))
Builder.GetInsertPoint()->comesBefore(I)) {
I->moveBefore(*Builder.GetInsertPoint()->getParent(),
Builder.GetInsertPoint());
if (auto *CI = EEIt->second.second)
CI->moveAfter(I);
}
Ex = I;
ExV = EEIt->second.second ? EEIt->second.second : Ex;
}
}
if (!Ex) {
Expand All @@ -12592,21 +12599,24 @@ Value *BoUpSLP::vectorizeTree(
} else {
Ex = Builder.CreateExtractElement(Vec, Lane);
}
// If necessary, sign-extend or zero-extend ScalarRoot
// to the larger type.
ExV = Ex;
if (Scalar->getType() != Ex->getType())
ExV = Builder.CreateIntCast(Ex, Scalar->getType(),
MinBWs.find(E)->second.second);
if (auto *I = dyn_cast<Instruction>(Ex))
ScalarToEEs[Scalar].try_emplace(Builder.GetInsertBlock(), I);
ScalarToEEs[Scalar].try_emplace(
Builder.GetInsertBlock(),
std::make_pair(I, cast<Instruction>(ExV)));
}
// The then branch of the previous if may produce constants, since 0
// operand might be a constant.
if (auto *ExI = dyn_cast<Instruction>(Ex)) {
GatherShuffleExtractSeq.insert(ExI);
CSEBlocks.insert(ExI->getParent());
}
// If necessary, sign-extend or zero-extend ScalarRoot
// to the larger type.
if (Scalar->getType() != Ex->getType())
return Builder.CreateIntCast(Ex, Scalar->getType(),
MinBWs.find(E)->second.second);
return Ex;
return ExV;
}
assert(isa<FixedVectorType>(Scalar->getType()) &&
isa<InsertElementInst>(Scalar) &&
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s

define void @test(i32 %arg) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: i32 [[ARG:%.*]]) {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
; CHECK-NEXT: switch i32 0, label [[BB10:%.*]] [
; CHECK-NEXT: i32 0, label [[BB9:%.*]]
; CHECK-NEXT: i32 11, label [[BB9]]
; CHECK-NEXT: i32 1, label [[BB4:%.*]]
; CHECK-NEXT: ]
; CHECK: bb3:
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: switch i32 0, label [[BB10]] [
; CHECK-NEXT: i32 18, label [[BB7:%.*]]
; CHECK-NEXT: i32 1, label [[BB7]]
; CHECK-NEXT: i32 0, label [[BB10]]
; CHECK-NEXT: ]
; CHECK: bb4:
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP0]], [[BB2]] ]
; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr null, i64 [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
; CHECK-NEXT: [[GETELEMENTPTR6:%.*]] = getelementptr i32, ptr null, i64 [[TMP6]]
; CHECK-NEXT: ret void
; CHECK: bb7:
; CHECK-NEXT: [[PHI8:%.*]] = phi i64 [ [[TMP2]], [[BB3:%.*]] ], [ [[TMP2]], [[BB3]] ]
; CHECK-NEXT: br label [[BB9]]
; CHECK: bb9:
; CHECK-NEXT: ret void
; CHECK: bb10:
; CHECK-NEXT: ret void
;
bb:
%zext = zext i32 %arg to i64
%zext1 = zext i32 0 to i64
br label %bb2

bb2:
switch i32 0, label %bb10 [
i32 0, label %bb9
i32 11, label %bb9
i32 1, label %bb4
]

bb3:
switch i32 0, label %bb10 [
i32 18, label %bb7
i32 1, label %bb7
i32 0, label %bb10
]

bb4:
%phi = phi i64 [ %zext, %bb2 ]
%phi5 = phi i64 [ %zext1, %bb2 ]
%getelementptr = getelementptr i32, ptr null, i64 %phi
%getelementptr6 = getelementptr i32, ptr null, i64 %phi5
ret void

bb7:
%phi8 = phi i64 [ %zext, %bb3 ], [ %zext, %bb3 ]
br label %bb9

bb9:
ret void

bb10:
ret void
}

0 comments on commit 4dd186a

Please sign in to comment.