Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1847,6 +1847,10 @@ class TargetTransformInfo {
/// otherwise scalar epilogue loop.
LLVM_ABI bool preferEpilogueVectorization() const;

/// \returns True if the loop vectorizer should discard any VFs where the
/// maximum register pressure exceeds getNumberOfRegisters.
LLVM_ABI bool shouldConsiderVectorizationRegPressure() const;

/// \returns True if the target wants to expand the given reduction intrinsic
/// into a shuffle sequence.
LLVM_ABI bool shouldExpandReduction(const IntrinsicInst *II) const;
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,8 @@ class TargetTransformInfoImplBase {

virtual bool preferEpilogueVectorization() const { return true; }

virtual bool shouldConsiderVectorizationRegPressure() const { return false; }

virtual bool shouldExpandReduction(const IntrinsicInst *II) const {
return true;
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1425,6 +1425,10 @@ bool TargetTransformInfo::preferEpilogueVectorization() const {
return TTIImpl->preferEpilogueVectorization();
}

bool TargetTransformInfo::shouldConsiderVectorizationRegPressure() const {
return TTIImpl->shouldConsiderVectorizationRegPressure();
}

TargetTransformInfo::VPLegalization
TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
return TTIImpl->getVPLegalizationStrategy(VPI);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
return false;
}

bool shouldConsiderVectorizationRegPressure() const override { return true; }

InstructionCost
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,10 @@ static cl::opt<bool> EnableEarlyExitVectorization(
cl::desc(
"Enable vectorization of early exit loops with uncountable exits."));

static cl::opt<bool> ConsiderRegPressure(
"vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
cl::desc("Discard VFs if their register pressure is too high."));

// Likelyhood of bypassing the vectorized loop because there are zero trips left
// after prolog. See `emitIterationCountCheck`.
static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
Expand Down Expand Up @@ -3693,6 +3697,14 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {

bool LoopVectorizationCostModel::shouldConsiderRegPressureForVF(
ElementCount VF) {
if (ConsiderRegPressure.getNumOccurrences())
return ConsiderRegPressure;

// TODO: We should eventually consider register pressure for all targets. The
// TTI hook is temporary whilst target-specific issues are being fixed.
if (TTI.shouldConsiderVectorizationRegPressure())
return true;

if (!useMaxBandwidth(VF.isScalable()
? TargetTransformInfo::RGK_ScalableVector
: TargetTransformInfo::RGK_FixedWidthVector))
Expand Down
11 changes: 4 additions & 7 deletions llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
; REQUIRES: asserts
; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s

; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A secondary benefit to always computing register pressure is that we don't need to fall back to scalar epilogues to get the debug output anymore and can use the default EVL tail folding.

EVL tail folding doesn't support fixed-length VFs so I've updated the check lines to be scalable.

; unrolling. Calculate register pressure for all VPlans, not just unrolled ones,
; and remove.
; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s

define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
; CHECK-LABEL: add
; CHECK: LV(REG): Found max usage: 2 item
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
; CHECK: LV(REG): VF = vscale x 4
; CHECK-NEXT: LV(REG): Found max usage: 2 item
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
Expand Down
21 changes: 10 additions & 11 deletions llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
; REQUIRES: asserts
; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN

; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow
; unrolling. Calculate register pressure for all VPlans, not just unrolled ones,
; and remove.
; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN

define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
; CHECK-LABEL: add
; ZVFH: LV(REG): Found max usage: 2 item
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
; ZVFH-LABEL: add
; ZVFH: LV(REG): VF = vscale x 4
; ZVFH-NEXT: LV(REG): Found max usage: 2 item
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
; ZVFHMIN: LV(REG): Found max usage: 2 item
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
; ZVFHMIN-LABEL: add
; ZVFHMIN: LV(REG): VF = vscale x 4
; ZVFHMIN-NEXT: LV(REG): Found max usage: 2 item
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
Expand Down
Loading