Skip to content

Commit 3c34e9d

Browse files
committed
[LV] Don't trigger legacy/vplan assert when forcing costs
When forcing an instruction cost of 1, for example, the legacy cost model will treat an entire load interleave group as being a cost of 1, whereas the legacy cost model will treat each load in the group as having a cost of 1. I don't believe it makes any sense to trigger the assert for matching legacy and vplan cost models when forcing an instruction cost. Given the reason for having the option to force an instruction cost is to encourage greater testing of a PR, it seems like frequently triggering the assert will simply deter people from doing so.
1 parent fe6e178 commit 3c34e9d

File tree

2 files changed

+171
-0
lines changed

2 files changed

+171
-0
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7047,7 +7047,13 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
70477047
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
70487048
// with early exits and plans with additional VPlan simplifications. The
70497049
// legacy cost model doesn't properly model costs for such loops.
7050+
// NOTE: If the user has forced a target instruction cost this assert is very
7051+
// likely to trigger because the VPlan recipes don't map 1:1 with the scalar
7052+
// instructions that the legacy cost model is based on. One example of this is
7053+
// for interleave groups - VPlan will use the forced cost for the whole group,
7054+
// whereas the legacy cost model will use it for each load.
70507055
assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() ||
7056+
ForceTargetInstructionCost.getNumOccurrences() > 0 ||
70517057
planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),
70527058
CostCtx, OrigLoop,
70537059
BestFactor.Width) ||

llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,170 @@ for.end:
419419
ret void
420420
}
421421

422+
%struct.foo = type { i16, i16, i16, i16 }
423+
424+
define void @mismatched_interleave_group_costs(ptr noalias %dst, ptr noalias readonly %src1, ptr noalias readonly %src2, i32 %n) #1 {
425+
; CHECK-LABEL: define void @mismatched_interleave_group_costs(
426+
; CHECK-SAME: ptr noalias [[DST:%.*]], ptr noalias readonly [[SRC1:%.*]], ptr noalias readonly [[SRC2:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
427+
; CHECK-NEXT: [[ENTRY:.*]]:
428+
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
429+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 8
430+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
431+
; CHECK: [[VECTOR_PH]]:
432+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 8
433+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
434+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
435+
; CHECK: [[VECTOR_BODY]]:
436+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
437+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO:%.*]], ptr [[SRC1]], i64 [[INDEX]]
438+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i16>, ptr [[TMP0]], align 2
439+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i16> [[WIDE_VEC]], <32 x i16> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
440+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <32 x i16> [[WIDE_VEC]], <32 x i16> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
441+
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <32 x i16> [[WIDE_VEC]], <32 x i16> poison, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
442+
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <32 x i16> [[WIDE_VEC]], <32 x i16> poison, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
443+
; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[STRIDED_VEC]] to <8 x i32>
444+
; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <8 x i32> [[TMP1]], splat (i32 1)
445+
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[STRIDED_VEC1]] to <8 x i32>
446+
; CHECK-NEXT: [[TMP4:%.*]] = shl nsw <8 x i32> [[TMP3]], splat (i32 3)
447+
; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i16> [[STRIDED_VEC2]] to <8 x i32>
448+
; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i16> [[STRIDED_VEC3]] to <8 x i32>
449+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[SRC2]], i64 [[INDEX]]
450+
; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <32 x i16>, ptr [[TMP7]], align 2
451+
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i16> [[WIDE_VEC4]], <32 x i16> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
452+
; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <32 x i16> [[WIDE_VEC4]], <32 x i16> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
453+
; CHECK-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <32 x i16> [[WIDE_VEC4]], <32 x i16> poison, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
454+
; CHECK-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <32 x i16> [[WIDE_VEC4]], <32 x i16> poison, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
455+
; CHECK-NEXT: [[TMP8:%.*]] = sext <8 x i16> [[STRIDED_VEC5]] to <8 x i32>
456+
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <8 x i32> [[TMP8]], splat (i32 3)
457+
; CHECK-NEXT: [[TMP10:%.*]] = sext <8 x i16> [[STRIDED_VEC6]] to <8 x i32>
458+
; CHECK-NEXT: [[TMP11:%.*]] = sext <8 x i16> [[STRIDED_VEC7]] to <8 x i32>
459+
; CHECK-NEXT: [[TMP12:%.*]] = mul nsw <8 x i32> [[TMP11]], splat (i32 10)
460+
; CHECK-NEXT: [[TMP13:%.*]] = sext <8 x i16> [[STRIDED_VEC8]] to <8 x i32>
461+
; CHECK-NEXT: [[TMP14:%.*]] = mul nsw <8 x i32> [[TMP13]], splat (i32 -5)
462+
; CHECK-NEXT: [[TMP15:%.*]] = add nsw <8 x i32> [[TMP10]], [[TMP5]]
463+
; CHECK-NEXT: [[TMP16:%.*]] = mul nsw <8 x i32> [[TMP15]], splat (i32 9)
464+
; CHECK-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[TMP4]], [[TMP2]]
465+
; CHECK-NEXT: [[TMP18:%.*]] = shl nsw <8 x i32> [[TMP6]], splat (i32 1)
466+
; CHECK-NEXT: [[TMP19:%.*]] = sub nsw <8 x i32> [[TMP17]], [[TMP18]]
467+
; CHECK-NEXT: [[TMP20:%.*]] = add nsw <8 x i32> [[TMP19]], [[TMP9]]
468+
; CHECK-NEXT: [[TMP21:%.*]] = add nsw <8 x i32> [[TMP20]], [[TMP12]]
469+
; CHECK-NEXT: [[TMP22:%.*]] = add nsw <8 x i32> [[TMP21]], [[TMP16]]
470+
; CHECK-NEXT: [[TMP23:%.*]] = add nsw <8 x i32> [[TMP22]], [[TMP14]]
471+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i64 [[INDEX]]
472+
; CHECK-NEXT: store <8 x i32> [[TMP23]], ptr [[TMP24]], align 4
473+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
474+
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
475+
; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
476+
; CHECK: [[MIDDLE_BLOCK]]:
477+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
478+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
479+
; CHECK: [[SCALAR_PH]]:
480+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
481+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
482+
; CHECK: [[FOR_BODY]]:
483+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
484+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[SRC1]], i64 [[INDVARS_IV]]
485+
; CHECK-NEXT: [[TMP26:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
486+
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP26]] to i32
487+
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[CONV]], 1
488+
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 2
489+
; CHECK-NEXT: [[TMP27:%.*]] = load i16, ptr [[B]], align 2
490+
; CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP27]] to i32
491+
; CHECK-NEXT: [[MUL4:%.*]] = shl nsw i32 [[CONV3]], 3
492+
; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4
493+
; CHECK-NEXT: [[TMP28:%.*]] = load i16, ptr [[C]], align 2
494+
; CHECK-NEXT: [[CONV7:%.*]] = sext i16 [[TMP28]] to i32
495+
; CHECK-NEXT: [[D:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 6
496+
; CHECK-NEXT: [[TMP29:%.*]] = load i16, ptr [[D]], align 2
497+
; CHECK-NEXT: [[CONV12:%.*]] = sext i16 [[TMP29]] to i32
498+
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[SRC2]], i64 [[INDVARS_IV]]
499+
; CHECK-NEXT: [[TMP30:%.*]] = load i16, ptr [[ARRAYIDX16]], align 2
500+
; CHECK-NEXT: [[CONV18:%.*]] = sext i16 [[TMP30]] to i32
501+
; CHECK-NEXT: [[MUL19:%.*]] = mul nsw i32 [[CONV18]], 3
502+
; CHECK-NEXT: [[B23:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX16]], i64 2
503+
; CHECK-NEXT: [[TMP31:%.*]] = load i16, ptr [[B23]], align 2
504+
; CHECK-NEXT: [[CONV24:%.*]] = sext i16 [[TMP31]] to i32
505+
; CHECK-NEXT: [[C29:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX16]], i64 4
506+
; CHECK-NEXT: [[TMP32:%.*]] = load i16, ptr [[C29]], align 2
507+
; CHECK-NEXT: [[CONV30:%.*]] = sext i16 [[TMP32]] to i32
508+
; CHECK-NEXT: [[MUL31:%.*]] = mul nsw i32 [[CONV30]], 10
509+
; CHECK-NEXT: [[D35:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX16]], i64 6
510+
; CHECK-NEXT: [[TMP33:%.*]] = load i16, ptr [[D35]], align 2
511+
; CHECK-NEXT: [[CONV36:%.*]] = sext i16 [[TMP33]] to i32
512+
; CHECK-NEXT: [[MUL37:%.*]] = mul nsw i32 [[CONV36]], -5
513+
; CHECK-NEXT: [[REASS_ADD:%.*]] = add nsw i32 [[CONV24]], [[CONV7]]
514+
; CHECK-NEXT: [[REASS_MUL:%.*]] = mul nsw i32 [[REASS_ADD]], 9
515+
; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[MUL4]], [[MUL]]
516+
; CHECK-NEXT: [[TMP34:%.*]] = shl nsw i32 [[CONV12]], 1
517+
; CHECK-NEXT: [[ADD14:%.*]] = sub nsw i32 [[ADD9]], [[TMP34]]
518+
; CHECK-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD14]], [[MUL19]]
519+
; CHECK-NEXT: [[ADD26:%.*]] = add nsw i32 [[ADD20]], [[MUL31]]
520+
; CHECK-NEXT: [[ADD32:%.*]] = add nsw i32 [[ADD26]], [[REASS_MUL]]
521+
; CHECK-NEXT: [[ADD38:%.*]] = add nsw i32 [[ADD32]], [[MUL37]]
522+
; CHECK-NEXT: [[ARRAYIDX40:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i64 [[INDVARS_IV]]
523+
; CHECK-NEXT: store i32 [[ADD38]], ptr [[ARRAYIDX40]], align 4
524+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
525+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
526+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
527+
; CHECK: [[EXIT]]:
528+
; CHECK-NEXT: ret void
529+
;
530+
entry:
531+
%wide.trip.count = zext nneg i32 %n to i64
532+
br label %for.body
533+
534+
for.body:
535+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
536+
%arrayidx = getelementptr inbounds nuw %struct.foo, ptr %src1, i64 %indvars.iv
537+
%0 = load i16, ptr %arrayidx, align 2
538+
%conv = sext i16 %0 to i32
539+
%mul = shl nsw i32 %conv, 1
540+
%b = getelementptr inbounds nuw i8, ptr %arrayidx, i64 2
541+
%1 = load i16, ptr %b, align 2
542+
%conv3 = sext i16 %1 to i32
543+
%mul4 = shl nsw i32 %conv3, 3
544+
%c = getelementptr inbounds nuw i8, ptr %arrayidx, i64 4
545+
%2 = load i16, ptr %c, align 2
546+
%conv7 = sext i16 %2 to i32
547+
%d = getelementptr inbounds nuw i8, ptr %arrayidx, i64 6
548+
%3 = load i16, ptr %d, align 2
549+
%conv12 = sext i16 %3 to i32
550+
%arrayidx16 = getelementptr inbounds nuw %struct.foo, ptr %src2, i64 %indvars.iv
551+
%4 = load i16, ptr %arrayidx16, align 2
552+
%conv18 = sext i16 %4 to i32
553+
%mul19 = mul nsw i32 %conv18, 3
554+
%b23 = getelementptr inbounds nuw i8, ptr %arrayidx16, i64 2
555+
%5 = load i16, ptr %b23, align 2
556+
%conv24 = sext i16 %5 to i32
557+
%c29 = getelementptr inbounds nuw i8, ptr %arrayidx16, i64 4
558+
%6 = load i16, ptr %c29, align 2
559+
%conv30 = sext i16 %6 to i32
560+
%mul31 = mul nsw i32 %conv30, 10
561+
%d35 = getelementptr inbounds nuw i8, ptr %arrayidx16, i64 6
562+
%7 = load i16, ptr %d35, align 2
563+
%conv36 = sext i16 %7 to i32
564+
%mul37 = mul nsw i32 %conv36, -5
565+
%reass.add = add nsw i32 %conv24, %conv7
566+
%reass.mul = mul nsw i32 %reass.add, 9
567+
%add9 = add nsw i32 %mul4, %mul
568+
%8 = shl nsw i32 %conv12, 1
569+
%add14 = sub nsw i32 %add9, %8
570+
%add20 = add nsw i32 %add14, %mul19
571+
%add26 = add nsw i32 %add20, %mul31
572+
%add32 = add nsw i32 %add26, %reass.mul
573+
%add38 = add nsw i32 %add32, %mul37
574+
%arrayidx40 = getelementptr inbounds nuw i32, ptr %dst, i64 %indvars.iv
575+
store i32 %add38, ptr %arrayidx40, align 4
576+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
577+
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
578+
br i1 %exitcond.not, label %exit, label %for.body
579+
580+
exit:
581+
ret void
582+
}
583+
422584
attributes #0 = { "target-features"="+neon,+sve" vscale_range(1,16) }
585+
attributes #1 = { vscale_range(1,16) "target-cpu"="neoverse-v1" }
423586

424587
declare void @llvm.assume(i1 noundef)
425588
declare i64 @llvm.umin.i64(i64, i64)
@@ -450,4 +613,6 @@ declare i64 @llvm.umin.i64(i64, i64)
450613
; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]]}
451614
; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]}
452615
; CHECK: [[LOOP25]] = distinct !{[[LOOP25]], [[META2]], [[META1]]}
616+
; CHECK: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]}
617+
; CHECK: [[LOOP27]] = distinct !{[[LOOP27]], [[META2]], [[META1]]}
453618
;.

0 commit comments

Comments
 (0)