From a2a75242af145ce35dce6d7de6ccd59c9ee967a9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 14 Nov 2025 20:19:49 +0000 Subject: [PATCH] [VPlan] Skip applying InstsToScalarize with forced instr costs. ForceTargetInstructionCost in the legacy cost model overrides any costs from InstsToScalarize. Match the behavior in the VPlan-based cost model. This fixes a crash with -force-target-instr-cost for the added test case. --- .../Transforms/Vectorize/LoopVectorize.cpp | 21 ++--- .../AArch64/force-target-instruction-cost.ll | 78 +++++++++++++++++++ 2 files changed, 89 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cbfbc29360b0b..b51d754b93a9e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6923,16 +6923,17 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, }); Cost += ForcedCost; } - for (const auto &[Scalarized, ScalarCost] : CM.InstsToScalarize[VF]) { - if (CostCtx.skipCostComputation(Scalarized, VF.isVector())) - continue; - CostCtx.SkipCostComputation.insert(Scalarized); - LLVM_DEBUG({ - dbgs() << "Cost of " << ScalarCost << " for VF " << VF - << ": profitable to scalarize " << *Scalarized << "\n"; - }); - Cost += ScalarCost; - } + if (!ForceTargetInstructionCost.getNumOccurrences()) + for (const auto &[Scalarized, ScalarCost] : CM.InstsToScalarize[VF]) { + if (CostCtx.skipCostComputation(Scalarized, VF.isVector())) + continue; + CostCtx.SkipCostComputation.insert(Scalarized); + LLVM_DEBUG({ + dbgs() << "Cost of " << ScalarCost << " for VF " << VF + << ": profitable to scalarize " << *Scalarized << "\n"; + }); + Cost += ScalarCost; + } return Cost; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 29bbd015eed1f..f8368c445b349 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -380,6 +380,84 @@ for.end: ret void } +define void @forced_scalar_instr(ptr %gep.dst) { +; COMMON-LABEL: define void @forced_scalar_instr( +; COMMON-SAME: ptr [[GEP_DST:%.*]]) { +; COMMON-NEXT: [[ENTRY:.*:]] +; COMMON-NEXT: br label %[[VECTOR_PH:.*]] +; COMMON: [[VECTOR_PH]]: +; COMMON-NEXT: br label %[[VECTOR_BODY:.*]] +; COMMON: [[VECTOR_BODY]]: +; COMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] +; COMMON-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] +; COMMON-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 +; COMMON-NEXT: [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 4) +; COMMON-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; COMMON-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; COMMON: [[PRED_STORE_IF]]: +; COMMON-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; COMMON-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 0 +; COMMON-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP3]] +; COMMON-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], 1 +; COMMON-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE]] +; COMMON: [[PRED_STORE_CONTINUE]]: +; COMMON-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; COMMON-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] +; COMMON: [[PRED_STORE_IF1]]: +; COMMON-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 +; COMMON-NEXT: [[TMP9:%.*]] = add i32 [[TMP0]], 1 +; COMMON-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP8]] +; COMMON-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], 1 +; COMMON-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE2]] +; COMMON: [[PRED_STORE_CONTINUE2]]: +; COMMON-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; COMMON-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] +; COMMON: [[PRED_STORE_IF3]]: +; COMMON-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 2 +; COMMON-NEXT: [[TMP14:%.*]] = add i32 [[TMP0]], 2 +; COMMON-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP13]] +; COMMON-NEXT: [[TMP16:%.*]] = or i32 [[TMP14]], 1 +; COMMON-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE4]] +; COMMON: [[PRED_STORE_CONTINUE4]]: +; COMMON-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; COMMON-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]] +; COMMON: [[PRED_STORE_IF5]]: +; COMMON-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3 +; COMMON-NEXT: [[TMP19:%.*]] = add i32 [[TMP0]], 3 +; COMMON-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP18]] +; COMMON-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], 1 +; COMMON-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE6]] +; COMMON: [[PRED_STORE_CONTINUE6]]: +; COMMON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; COMMON-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) +; COMMON-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 +; COMMON-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; COMMON: [[MIDDLE_BLOCK]]: +; COMMON-NEXT: br label %[[EXIT:.*]] +; COMMON: [[EXIT]]: +; COMMON-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr i32, ptr %gep.dst, i64 %iv + %t = trunc i64 %iv to i32 + %o = or i32 %t, 1 + store i32 %o, ptr %gep, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 4 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + attributes #0 = { "target-features"="+neon,+sve" vscale_range(1,16) } declare void @llvm.assume(i1 noundef)