-
Notifications
You must be signed in to change notification settings - Fork 11.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[VPlan] Relax over-aggressive assertion in VPTransformState::get().
There are cases where a vector value has some users that demand the the single scalar value only (NeedsScalar), while other users demand the vector value (see attached test cases). In those cases, the NeedsScalar users should only demand the first lane. Fixes #91883.
- Loading branch information
Showing
2 changed files
with
227 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
226 changes: 226 additions & 0 deletions
226
llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,226 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 | ||
; RUN: opt -S -passes=loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -S %s | FileCheck %s | ||
|
||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" | ||
|
||
; Test cases based on https://github.com/llvm/llvm-project/issues/91883. | ||
define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { | ||
; CHECK-LABEL: define void @iv.4_used_as_vector_and_first_lane( | ||
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0:[0-9]+]] { | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; CHECK: vector.ph: | ||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] | ||
; CHECK: vector.body: | ||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> | ||
; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4> | ||
; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], <i64 4, i64 4, i64 4, i64 4> | ||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 | ||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 | ||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 | ||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 | ||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]] | ||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]] | ||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]] | ||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]] | ||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 | ||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4 | ||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8 | ||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12 | ||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 | ||
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 | ||
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 | ||
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8 | ||
; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> | ||
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4> | ||
; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i64> [[STEP_ADD1]], <i64 4, i64 4, i64 4, i64 4> | ||
; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4> | ||
; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], <i64 128, i64 128, i64 128, i64 128> | ||
; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], <i64 128, i64 128, i64 128, i64 128> | ||
; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD5]], <i64 128, i64 128, i64 128, i64 128> | ||
; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD6]], <i64 128, i64 128, i64 128, i64 128> | ||
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP12]], i32 0 | ||
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 1 | ||
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP13]], i32 0 | ||
; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], 1 | ||
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP14]], i32 0 | ||
; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[TMP24]], 1 | ||
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP15]], i32 0 | ||
; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], 1 | ||
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP21]] | ||
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP23]] | ||
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP25]] | ||
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP27]] | ||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[TMP28]], i32 0 | ||
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i64, ptr [[TMP28]], i32 4 | ||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP28]], i32 8 | ||
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP28]], i32 12 | ||
; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP12]], ptr [[TMP32]], i32 4, <4 x i1> [[TMP16]]) | ||
; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP13]], ptr [[TMP33]], i32 4, <4 x i1> [[TMP17]]) | ||
; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP14]], ptr [[TMP34]], i32 4, <4 x i1> [[TMP18]]) | ||
; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP15]], ptr [[TMP35]], i32 4, <4 x i1> [[TMP19]]) | ||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 | ||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4> | ||
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 | ||
; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] | ||
; CHECK: middle.block: | ||
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] | ||
; CHECK: scalar.ph: | ||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] | ||
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] | ||
; CHECK: loop.header: | ||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] | ||
; CHECK-NEXT: [[G_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] | ||
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[G_SRC]], align 8 | ||
; CHECK-NEXT: [[IV_4:%.*]] = add nuw nsw i64 [[IV]], 4 | ||
; CHECK-NEXT: [[C:%.*]] = icmp ule i64 [[L]], 128 | ||
; CHECK-NEXT: br i1 [[C]], label [[LOOP_THEN:%.*]], label [[LOOP_LATCH]] | ||
; CHECK: loop.then: | ||
; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV_4]], 1 | ||
; CHECK-NEXT: [[G_DST:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OR]] | ||
; CHECK-NEXT: store i64 [[IV_4]], ptr [[G_DST]], align 4 | ||
; CHECK-NEXT: br label [[LOOP_LATCH]] | ||
; CHECK: loop.latch: | ||
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 | ||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 32 | ||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] | ||
; CHECK: exit: | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
br label %loop.header | ||
|
||
loop.header: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] | ||
%g.src = getelementptr inbounds i64, ptr %src, i64 %iv | ||
%l = load i64, ptr %g.src | ||
%iv.4 = add nuw nsw i64 %iv, 4 | ||
%c = icmp ule i64 %l, 128 | ||
br i1 %c, label %loop.then, label %loop.latch | ||
|
||
loop.then: | ||
%or = or disjoint i64 %iv.4, 1 | ||
%g.dst = getelementptr inbounds i64, ptr %dst, i64 %or | ||
store i64 %iv.4, ptr %g.dst, align 4 | ||
br label %loop.latch | ||
|
||
loop.latch: | ||
%iv.next = add nuw nsw i64 %iv, 1 | ||
%exitcond = icmp eq i64 %iv.next, 32 | ||
br i1 %exitcond, label %exit, label %loop.header | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { | ||
; CHECK-LABEL: define void @iv.4_used_as_first_lane( | ||
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] { | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; CHECK: vector.ph: | ||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] | ||
; CHECK: vector.body: | ||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | ||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 | ||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 | ||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 | ||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 | ||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]] | ||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]] | ||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]] | ||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]] | ||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 | ||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4 | ||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8 | ||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12 | ||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 | ||
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 | ||
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 | ||
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8 | ||
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP0]], 4 | ||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP1]], 4 | ||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP2]], 4 | ||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP3]], 4 | ||
; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], <i64 128, i64 128, i64 128, i64 128> | ||
; CHECK-NEXT: [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD1]], <i64 128, i64 128, i64 128, i64 128> | ||
; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD2]], <i64 128, i64 128, i64 128, i64 128> | ||
; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD3]], <i64 128, i64 128, i64 128, i64 128> | ||
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[TMP12]], 1 | ||
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP13]], 1 | ||
; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP14]], 1 | ||
; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP15]], 1 | ||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP20]] | ||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP21]] | ||
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP22]] | ||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP23]] | ||
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[TMP24]], i32 0 | ||
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[TMP24]], i32 4 | ||
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP24]], i32 8 | ||
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[TMP24]], i32 12 | ||
; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD]], ptr [[TMP28]], i32 4, <4 x i1> [[TMP16]]) | ||
; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD1]], ptr [[TMP29]], i32 4, <4 x i1> [[TMP17]]) | ||
; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD2]], ptr [[TMP30]], i32 4, <4 x i1> [[TMP18]]) | ||
; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD3]], ptr [[TMP31]], i32 4, <4 x i1> [[TMP19]]) | ||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 | ||
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 | ||
; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] | ||
; CHECK: middle.block: | ||
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] | ||
; CHECK: scalar.ph: | ||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] | ||
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] | ||
; CHECK: loop.header: | ||
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] | ||
; CHECK-NEXT: [[G_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] | ||
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[G_SRC]], align 8 | ||
; CHECK-NEXT: [[IV_4:%.*]] = add nuw nsw i64 [[IV]], 4 | ||
; CHECK-NEXT: [[C:%.*]] = icmp ule i64 [[L]], 128 | ||
; CHECK-NEXT: br i1 [[C]], label [[LOOP_THEN:%.*]], label [[LOOP_LATCH]] | ||
; CHECK: loop.then: | ||
; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV_4]], 1 | ||
; CHECK-NEXT: [[G_DST:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OR]] | ||
; CHECK-NEXT: store i64 [[L]], ptr [[G_DST]], align 4 | ||
; CHECK-NEXT: br label [[LOOP_LATCH]] | ||
; CHECK: loop.latch: | ||
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 | ||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 32 | ||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] | ||
; CHECK: exit: | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
br label %loop.header | ||
|
||
loop.header: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] | ||
%g.src = getelementptr inbounds i64, ptr %src, i64 %iv | ||
%l = load i64, ptr %g.src | ||
%iv.4 = add nuw nsw i64 %iv, 4 | ||
%c = icmp ule i64 %l, 128 | ||
br i1 %c, label %loop.then, label %loop.latch | ||
|
||
loop.then: | ||
%or = or disjoint i64 %iv.4, 1 | ||
%g.dst = getelementptr inbounds i64, ptr %dst, i64 %or | ||
store i64 %l, ptr %g.dst, align 4 | ||
br label %loop.latch | ||
|
||
loop.latch: | ||
%iv.next = add nuw nsw i64 %iv, 1 | ||
%exitcond = icmp eq i64 %iv.next, 32 | ||
br i1 %exitcond, label %exit, label %loop.header | ||
|
||
exit: | ||
ret void | ||
} | ||
;. | ||
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} | ||
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} | ||
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} | ||
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} | ||
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} | ||
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} | ||
;. |