Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9283,6 +9283,14 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
// one exists.
TotalCost += calculateEarlyExitCost(CostCtx, Plan, VF.Width);

// If the expected trip count is less than the VF, the vector loop will only
// execute a single iteration. Then the middle block is executed the same
// number of times as the vector region.
// TODO: Extend logic to always account for the cost of the middle block.
auto ExpectedTC = getSmallBestKnownTC(PSE, L);
if (ExpectedTC && ElementCount::isKnownLE(*ExpectedTC, VF.Width))
TotalCost += Plan.getMiddleBlock()->cost(VF.Width, CostCtx);

// When interleaving only scalar and vector cost will be equal, which in turn
// would lead to a divide by 0. Fall back to hard threshold.
if (VF.Width.isScalar()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -569,53 +569,36 @@ define double @test_load_used_by_other_load_scev_low_trip_count(ptr %ptr.a, ptr
; I64-NEXT: [[ENTRY:.*]]:
; I64-NEXT: br label %[[OUTER_LOOP:.*]]
; I64: [[OUTER_LOOP_LOOPEXIT:.*]]:
; I64-NEXT: [[RESULT_LCSSA:%.*]] = phi double [ [[RESULT:%.*]], %[[INNER_LOOP:.*]] ]
; I64-NEXT: br label %[[OUTER_LOOP]]
; I64: [[OUTER_LOOP]]:
; I64-NEXT: [[ACCUM:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP29:%.*]], %[[OUTER_LOOP_LOOPEXIT]] ]
; I64-NEXT: [[ACCUM:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[RESULT_LCSSA]], %[[OUTER_LOOP_LOOPEXIT]] ]
; I64-NEXT: [[COND:%.*]] = call i1 @cond()
; I64-NEXT: br i1 [[COND]], label %[[INNER_LOOP_PREHEADER:.*]], label %[[EXIT:.*]]
; I64: [[INNER_LOOP_PREHEADER]]:
; I64-NEXT: br label %[[VECTOR_PH:.*]]
; I64: [[VECTOR_PH]]:
; I64-NEXT: br label %[[VECTOR_BODY:.*]]
; I64: [[VECTOR_BODY]]:
; I64-NEXT: [[TMP0:%.*]] = add i64 0, 1
; I64-NEXT: [[TMP1:%.*]] = add i64 1, 1
; I64-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[TMP0]]
; I64-NEXT: br label %[[INNER_LOOP]]
; I64: [[INNER_LOOP]]:
; I64-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[INNER_LOOP]] ], [ 0, %[[INNER_LOOP_PREHEADER]] ]
; I64-NEXT: [[ACCUM_INNER:%.*]] = phi double [ [[MUL1:%.*]], %[[INNER_LOOP]] ], [ [[ACCUM]], %[[INNER_LOOP_PREHEADER]] ]
; I64-NEXT: [[TMP1:%.*]] = add i64 [[IV]], 1
; I64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[TMP1]]
; I64-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[TMP0]]
; I64-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[TMP1]]
; I64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
; I64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
; I64-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP6]]
; I64-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP7]]
; I64-NEXT: [[TMP10:%.*]] = load double, ptr [[PTR_A]], align 8
; I64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i64 0
; I64-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
; I64-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], zeroinitializer
; I64-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP2]], i64 8
; I64-NEXT: [[ADD1:%.*]] = fadd double [[TMP10]], 0.000000e+00
; I64-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8
; I64-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP12]], align 8
; I64-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP13]], align 8
; I64-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i32 0
; I64-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[TMP15]], i32 1
; I64-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP11]], zeroinitializer
; I64-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x double> poison, double [[ACCUM]], i64 0
; I64-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT1]], <2 x double> poison, <2 x i32> zeroinitializer
; I64-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLAT2]], <2 x double> [[TMP18]], <2 x i32> <i32 1, i32 2>
; I64-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP17]], zeroinitializer
; I64-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[TMP20]], zeroinitializer
; I64-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[TMP21]], splat (double 1.000000e+00)
; I64-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP8]], align 8
; I64-NEXT: [[MUL1]] = fmul double [[ADD1]], 0.000000e+00
; I64-NEXT: [[MUL2:%.*]] = fmul double [[TMP15]], 0.000000e+00
; I64-NEXT: [[ADD2:%.*]] = fadd double [[MUL2]], 0.000000e+00
; I64-NEXT: [[ADD3:%.*]] = fadd double [[ADD2]], 1.000000e+00
; I64-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP9]], align 8
; I64-NEXT: [[TMP25:%.*]] = insertelement <2 x double> poison, double [[TMP23]], i32 0
; I64-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[TMP25]], double [[TMP24]], i32 1
; I64-NEXT: [[TMP27:%.*]] = fdiv <2 x double> [[TMP26]], [[TMP22]]
; I64-NEXT: [[TMP28:%.*]] = fsub <2 x double> [[TMP19]], [[TMP27]]
; I64-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; I64: [[MIDDLE_BLOCK]]:
; I64-NEXT: [[TMP29]] = extractelement <2 x double> [[TMP28]], i32 1
; I64-NEXT: br label %[[OUTER_LOOP_LOOPEXIT]]
; I64-NEXT: [[DIV:%.*]] = fdiv double [[TMP24]], [[ADD3]]
; I64-NEXT: [[RESULT]] = fsub double [[ACCUM_INNER]], [[DIV]]
; I64-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; I64-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1
; I64-NEXT: br i1 [[EXITCOND]], label %[[OUTER_LOOP_LOOPEXIT]], label %[[INNER_LOOP]]
; I64: [[EXIT]]:
; I64-NEXT: ret double [[ACCUM]]
;
Expand Down