Skip to content

Commit

Permalink
[LV] Reland "Update logic for calculating register usage due to invar…
Browse files Browse the repository at this point in the history
…iants"

Previously, while calculating register usage due to invariants, it was assumed that invariant would always be part of widening
instructions. This resulted in calculating vector register types for vectors which cant be legalized(check the newly added test for more details).

An invariant might not always need a vector register. For e.g., invariant might just be used for iteration check.

This patch checks if the invariant is part of any widening instruction and considers register usage accordingly. Fixes issue 60493

Differential Revision: https://reviews.llvm.org/D143422

(cherry picked from commit 4f9a544)
  • Loading branch information
sushgokh authored and tstellar committed Mar 4, 2023
1 parent f5f4825 commit c915974
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 14 deletions.
15 changes: 10 additions & 5 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5953,7 +5953,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
// Saves the list of values that are used in the loop but are defined outside
// the loop (not including non-instruction values such as arguments and
// constants).
SmallPtrSet<Value *, 8> LoopInvariants;
SmallPtrSet<Instruction *, 8> LoopInvariants;

for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
for (Instruction &I : BB->instructionsWithoutDebug()) {
Expand Down Expand Up @@ -6079,11 +6079,16 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
for (auto *Inst : LoopInvariants) {
// FIXME: The target might use more than one register for the type
// even in the scalar case.
unsigned Usage =
VFs[i].isScalar() ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
bool IsScalar = all_of(Inst->users(), [&](User *U) {
auto *I = cast<Instruction>(U);
return TheLoop != LI->getLoopFor(I->getParent()) ||
isScalarAfterVectorization(I, VFs[i]);
});

ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[i];
unsigned ClassID =
TTI.getRegisterClassForType(VFs[i].isVector(), Inst->getType());
Invariant[ClassID] += Usage;
TTI.getRegisterClassForType(VF.isVector(), Inst->getType());
Invariant[ClassID] += GetRegUsage(Inst->getType(), VF);
}

LLVM_DEBUG({
Expand Down
41 changes: 41 additions & 0 deletions llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
; REQUIRES: asserts

; RUN: opt -mtriple arm64-linux -passes=loop-vectorize -mattr=+sve -debug-only=loop-vectorize -disable-output <%s 2>&1 | FileCheck %s

; Invariant register usage calculation should take into account if the
; invariant would be used in widened instructions. Only in such cases, a vector
; register would be required for holding the invariant. For all other cases
; such as below(where usage of %0 in loop doesnt require vector register), a
; general purpose register suffices.
; Check that below test doesn't crash while calculating register usage for
; invariant %0

@string = internal unnamed_addr constant [5 x i8] c"abcd\00", align 1
define void @get_invariant_reg_usage(ptr %z) {
; CHECK: LV: Checking a loop in 'get_invariant_reg_usage'
; CHECK: LV(REG): VF = vscale x 16
; CHECK: LV(REG): Found max usage: 1 item
; CHECK: LV(REG): RegisterClass: Generic::ScalarRC, 3 registers
; CHECK: LV(REG): Found invariant usage: 2 item
; CHECK: LV(REG): RegisterClass: Generic::VectorRC, 8 registers
; CHECK: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers

L.entry:
%0 = load i128, ptr %z, align 16
%1 = icmp slt i128 %0, 1
%a = getelementptr i8, ptr %z, i64 1
br i1 %1, label %return, label %loopbody

loopbody: ;preds = %L.entry, %loopbody
%b = phi ptr [ %2, %loopbody ], [ @string, %L.entry ]
%len_input = phi i128 [ %len, %loopbody ], [ %0, %L.entry ]
%len = add nsw i128 %len_input, -1
%2 = getelementptr i8, ptr %b, i64 1
%3 = load i8, ptr %b, align 1
store i8 %3, ptr %a, align 4
%.not = icmp eq i128 %len, 0
br i1 %.not, label %return, label %loopbody

return: ;preds = %loopexit, %L.entry
ret void
}
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ define void @double_(ptr nocapture %A, i32 %n) nounwind uwtable ssp {
;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers
;CHECK-PWR8: LV(REG): Found invariant usage: 1 item
;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 1 registers
;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers

;CHECK-PWR9: LV(REG): VF = 1
;CHECK-PWR9: LV(REG): Found max usage: 2 item
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,22 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
; CHECK-LMUL2: LV(REG): Found max usage: 2 item
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
; CHECK-LMUL4: LV(REG): Found max usage: 2 item
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
; CHECK-LMUL8: LV(REG): Found max usage: 2 item
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers
; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers

entry:
%conv = zext i32 %size to i64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class
; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class
; CHECK-NEXT: LV: Loop cost is 23
Expand Down Expand Up @@ -234,7 +234,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class
; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class
; CHECK-NEXT: LV: Loop cost is 23
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers

define i32 @test_g(ptr nocapture readonly %a, i32 %n) local_unnamed_addr !dbg !6 {
entry:
Expand Down Expand Up @@ -68,7 +68,7 @@ for.end: ; preds = %for.end.loopexit, %
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers

define i32 @test(ptr nocapture readonly %a, i32 %n) local_unnamed_addr {
entry:
Expand Down

0 comments on commit c915974

Please sign in to comment.