Skip to content

Commit

Permalink
LoopVectorize: guard appending InstsToScalarize; fix bug (#88720)
Browse files Browse the repository at this point in the history
In the process of collecting instructions to scalarize, LoopVectorize
uses faulty reasoning whereby it also adds instructions that will be
scalar after vectorization. If an instruction satisfies
isScalarAfterVectorization() for the given VF, it should not be appended
to InstsToScalarize. Add this extra guard, fixing a crash.

Fixes #55096.
  • Loading branch information
artagnon committed Apr 18, 2024
1 parent 61f4001 commit 63d8058
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 9 deletions.
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -5814,7 +5814,8 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
// invalid scalarization costs.
// Do not apply discount logic if hacked cost is needed
// for emulated masked memrefs.
if (!VF.isScalable() && !useEmulatedMaskMemRefHack(&I, VF) &&
if (!isScalarAfterVectorization(&I, VF) && !VF.isScalable() &&
!useEmulatedMaskMemRefHack(&I, VF) &&
computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
// Remember that BB will remain after vectorization.
Expand Down
56 changes: 48 additions & 8 deletions llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
@@ -1,13 +1,48 @@
; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s

; REQUIRES: asserts
; XFAIL: *

target triple = "x86_64-apple-macosx"

; CHECK: vector.body
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -mtriple=x86_64-apple-macosx -passes=loop-vectorize,simplifycfg,dce -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s

define void @test_pr55096(i64 %c, ptr %p) {
; CHECK-LABEL: define void @test_pr55096(
; CHECK-SAME: i64 [[C:%.*]], ptr [[P:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 122, i64 123>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i16
; CHECK-NEXT: [[TMP0:%.*]] = mul i16 [[DOTCAST]], 2008
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 6229, [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], <i1 true, i1 true>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[TMP4]], 2008
; CHECK-NEXT: [[TMP6:%.*]] = udiv i16 4943, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[P]], i16 [[TMP6]]
; CHECK-NEXT: store i16 0, ptr [[TMP7]], align 2
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
; CHECK: pred.store.if2:
; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 2008
; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[TMP10]], 2008
; CHECK-NEXT: [[TMP12:%.*]] = udiv i16 4943, [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[P]], i16 [[TMP12]]
; CHECK-NEXT: store i16 0, ptr [[TMP13]], align 2
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]]
; CHECK: pred.store.continue3:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 340
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br label %loop.header

Expand All @@ -32,3 +67,8 @@ loop.latch:
exit:
ret void
}
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
;.

0 comments on commit 63d8058

Please sign in to comment.