Skip to content

Commit

Permalink
[SLP]Reorder counters for same values, if the root node is reordered.
Browse files Browse the repository at this point in the history
The counters for the repeated scalars are ordered in the natural order,
but the original scalars might be reordered during SLP graph reordering
and this order can be dropped. Need to use the scalars after the
reordering, not the original ones, to emit correct code for same value
counters.
  • Loading branch information
alexey-bataev committed Apr 3, 2023
1 parent b15a946 commit c166000
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
11 changes: 9 additions & 2 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,12 @@ class BoUpSLP {
!VectorizableTree.front()->UserTreeIndices.empty();
}

/// Return the scalars of the root node.
ArrayRef<Value *> getRootNodeScalars() const {
assert(!VectorizableTree.empty() && "No graph to get the first node from");
return VectorizableTree.front()->Scalars;
}

/// Builds external uses of the vectorized scalars, i.e. the list of
/// vectorized scalars to be extracted, their lanes and their scalar users. \p
/// ExternallyUsedValues contains additional list of external uses to handle
Expand Down Expand Up @@ -13355,8 +13361,9 @@ class HorizontalReduction {

// Emit code to correctly handle reused reduced values, if required.
if (OptReusedScalars && !SameScaleFactor) {
VectorizedRoot = emitReusedOps(VectorizedRoot, Builder, VL,
SameValuesCounter, TrackedToOrig);
VectorizedRoot =
emitReusedOps(VectorizedRoot, Builder, V.getRootNodeScalars(),
SameValuesCounter, TrackedToOrig);
}

Value *ReducedSubTree =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ define i32 @test() {
; CHECK-NEXT: [[SQ:%.*]] = alloca [64 x i32], i32 0, align 16
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [64 x i32], ptr [[SQ]], i64 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 3, i32 2, i32 2, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 2, i32 3, i32 2, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand Down

0 comments on commit c166000

Please sign in to comment.