Skip to content

Commit

Permalink
[SLP]Fix handling of -slp-vectorize-hor-store for values with many uses.
Browse files Browse the repository at this point in the history
  • Loading branch information
alexey-bataev committed Nov 1, 2023
1 parent 68da743 commit c28b7eb
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 11 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15822,8 +15822,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// to investigate if we can safely turn on slp-vectorize-hor-store
// instead to allow lookup for reduction chains in all non-vectorized
// stores (need to check side effects and compile time).
TryToVectorizeRoot = (I == Stores.end() || I->second.size() == 1) &&
SI->getValueOperand()->hasOneUse();
TryToVectorizeRoot |= (I == Stores.end() || I->second.size() == 1) &&
SI->getValueOperand()->hasOneUse();
}
if (TryToVectorizeRoot) {
for (auto *V : it->operand_values()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,10 @@ define void @test(ptr noalias %pl, ptr noalias %res, ptr noalias %p2) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr noalias [[PL:%.*]], ptr noalias [[RES:%.*]], ptr noalias [[P2:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @arr_i32, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr_i32, i64 0, i64 1), align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr_i32, i64 0, i64 2), align 8
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr_i32, i64 0, i64 3), align 4
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]]
; CHECK-NEXT: store i32 [[ADD_2]], ptr [[P2]], align 16
; CHECK-NEXT: store i32 [[ADD_2]], ptr [[RES]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @arr_i32, align 16
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]])
; CHECK-NEXT: store i32 [[TMP1]], ptr [[P2]], align 16
; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES]], align 16
; CHECK-NEXT: ret void
;
entry:
Expand Down

0 comments on commit c28b7eb

Please sign in to comment.