Skip to content

Commit

Permalink
[LV] Re-use already computed runtime VF in fixFixedOrderRecurrence.
Browse files Browse the repository at this point in the history
This was suggested as independent cleanup in D147472.

This removes a redundant runtime VF computation when using scalable
vectors.
  • Loading branch information
fhahn committed Apr 10, 2023
1 parent 954befe commit f9d0b35
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 10 deletions.
8 changes: 4 additions & 4 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3861,13 +3861,14 @@ void InnerLoopVectorizer::fixFixedOrderRecurrence(
Value *Incoming = State.get(PreviousDef, UF - 1);
auto *ExtractForScalar = Incoming;
auto *IdxTy = Builder.getInt32Ty();
Value *RuntimeVF = nullptr;
if (VF.isVector()) {
auto *One = ConstantInt::get(IdxTy, 1);
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
ExtractForScalar = Builder.CreateExtractElement(ExtractForScalar, LastIdx,
"vector.recur.extract");
ExtractForScalar =
Builder.CreateExtractElement(Incoming, LastIdx, "vector.recur.extract");
}

auto RecurSplice = cast<VPInstruction>(*PhiR->user_begin());
Expand All @@ -3888,7 +3889,6 @@ void InnerLoopVectorizer::fixFixedOrderRecurrence(
// LoopMiddleBlock, when the scalar loop is not run at all.
Value *ExtractForPhiUsedOutsideLoop = nullptr;
if (VF.isVector()) {
auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
auto *Idx = Builder.CreateSub(RuntimeVF, ConstantInt::get(IdxTy, 2));
ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement(
Incoming, Idx, "vector.recur.extract.for.phi");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
; CHECK-VF4UF1: %[[MUL2:.*]] = mul i32 %[[VSCALE2]], 4
; CHECK-VF4UF1: %[[SUB2:.*]] = sub i32 %[[MUL2]], 1
; CHECK-VF4UF1: %[[VEC_RECUR_EXT:.*]] = extractelement <vscale x 4 x i32> %[[LOAD]], i32 %[[SUB2]]
; CHECK-VF4UF1: %[[VSCALE3:.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF1: %[[MUL3:.*]] = mul i32 %[[VSCALE3]], 4
; CHECK-VF4UF1: %[[SUB3:.*]] = sub i32 %[[MUL3]], 2
; CHECK-VF4UF1: %[[SUB3:.*]] = sub i32 %[[MUL2]], 2
; CHECK-VF4UF1: %[[VEC_RECUR_FOR_PHI:.*]] = extractelement <vscale x 4 x i32> %[[LOAD]], i32 %[[SUB3]]
entry:
br label %for.preheader
Expand Down Expand Up @@ -216,9 +214,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) {
; CHECK-VF4UF2: %[[MUL2:.*]] = mul i32 %[[VSCALE2]], 4
; CHECK-VF4UF2: %[[SUB2:.*]] = sub i32 %[[MUL2]], 1
; CHECK-VF4UF2: %vector.recur.extract = extractelement <vscale x 4 x i32> %[[ADD2]], i32 %[[SUB2]]
; CHECK-VF4UF2: %[[VSCALE3:.*]] = call i32 @llvm.vscale.i32()
; CHECK-VF4UF2: %[[MUL3:.*]] = mul i32 %[[VSCALE3]], 4
; CHECK-VF4UF2: %[[SUB3:.*]] = sub i32 %[[MUL3]], 2
; CHECK-VF4UF2: %[[SUB3:.*]] = sub i32 %[[MUL2]], 2
; CHECK-VF4UF2: %vector.recur.extract.for.phi = extractelement <vscale x 4 x i32> %[[ADD2]], i32 %[[SUB3]]
entry:
br label %for.body
Expand Down

0 comments on commit f9d0b35

Please sign in to comment.