Skip to content

Conversation

@fhahn
Copy link
Contributor

@fhahn fhahn commented Nov 16, 2025

ForceTargetInstructionCost in the legacy cost model overrides any costs from InstsToScalarize. Match the behavior in the VPlan-based cost model. This fixes a crash with -force-target-instr-cost for the added test case.

ForceTargetInstructionCost in the legacy cost model overrides any costs
from InstsToScalarize. Match the behavior in the VPlan-based cost model.
This fixes a crash with -force-target-instr-cost for the added test
case.
@llvmbot
Copy link
Member

llvmbot commented Nov 16, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Florian Hahn (fhahn)

Changes

ForceTargetInstructionCost in the legacy cost model overrides any costs from InstsToScalarize. Match the behavior in the VPlan-based cost model. This fixes a crash with -force-target-instr-cost for the added test case.


Full diff: https://github.com/llvm/llvm-project/pull/168269.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+11-10)
  • (modified) llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll (+78)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cbfbc29360b0b..b51d754b93a9e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6923,16 +6923,17 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
     });
     Cost += ForcedCost;
   }
-  for (const auto &[Scalarized, ScalarCost] : CM.InstsToScalarize[VF]) {
-    if (CostCtx.skipCostComputation(Scalarized, VF.isVector()))
-      continue;
-    CostCtx.SkipCostComputation.insert(Scalarized);
-    LLVM_DEBUG({
-      dbgs() << "Cost of " << ScalarCost << " for VF " << VF
-             << ": profitable to scalarize " << *Scalarized << "\n";
-    });
-    Cost += ScalarCost;
-  }
+  if (!ForceTargetInstructionCost.getNumOccurrences())
+    for (const auto &[Scalarized, ScalarCost] : CM.InstsToScalarize[VF]) {
+      if (CostCtx.skipCostComputation(Scalarized, VF.isVector()))
+        continue;
+      CostCtx.SkipCostComputation.insert(Scalarized);
+      LLVM_DEBUG({
+        dbgs() << "Cost of " << ScalarCost << " for VF " << VF
+               << ": profitable to scalarize " << *Scalarized << "\n";
+      });
+      Cost += ScalarCost;
+    }
 
   return Cost;
 }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index 29bbd015eed1f..f8368c445b349 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -380,6 +380,84 @@ for.end:
   ret void
 }
 
+define void @forced_scalar_instr(ptr %gep.dst) {
+; COMMON-LABEL: define void @forced_scalar_instr(
+; COMMON-SAME: ptr [[GEP_DST:%.*]]) {
+; COMMON-NEXT:  [[ENTRY:.*:]]
+; COMMON-NEXT:    br label %[[VECTOR_PH:.*]]
+; COMMON:       [[VECTOR_PH]]:
+; COMMON-NEXT:    br label %[[VECTOR_BODY:.*]]
+; COMMON:       [[VECTOR_BODY]]:
+; COMMON-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
+; COMMON-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
+; COMMON-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
+; COMMON-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 4)
+; COMMON-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
+; COMMON-NEXT:    br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; COMMON:       [[PRED_STORE_IF]]:
+; COMMON-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; COMMON-NEXT:    [[TMP4:%.*]] = add i32 [[TMP0]], 0
+; COMMON-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP3]]
+; COMMON-NEXT:    [[TMP6:%.*]] = or i32 [[TMP4]], 1
+; COMMON-NEXT:    store i32 [[TMP6]], ptr [[TMP5]], align 4
+; COMMON-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; COMMON:       [[PRED_STORE_CONTINUE]]:
+; COMMON-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
+; COMMON-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; COMMON:       [[PRED_STORE_IF1]]:
+; COMMON-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
+; COMMON-NEXT:    [[TMP9:%.*]] = add i32 [[TMP0]], 1
+; COMMON-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP8]]
+; COMMON-NEXT:    [[TMP11:%.*]] = or i32 [[TMP9]], 1
+; COMMON-NEXT:    store i32 [[TMP11]], ptr [[TMP10]], align 4
+; COMMON-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; COMMON:       [[PRED_STORE_CONTINUE2]]:
+; COMMON-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
+; COMMON-NEXT:    br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; COMMON:       [[PRED_STORE_IF3]]:
+; COMMON-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 2
+; COMMON-NEXT:    [[TMP14:%.*]] = add i32 [[TMP0]], 2
+; COMMON-NEXT:    [[TMP15:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP13]]
+; COMMON-NEXT:    [[TMP16:%.*]] = or i32 [[TMP14]], 1
+; COMMON-NEXT:    store i32 [[TMP16]], ptr [[TMP15]], align 4
+; COMMON-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; COMMON:       [[PRED_STORE_CONTINUE4]]:
+; COMMON-NEXT:    [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
+; COMMON-NEXT:    br i1 [[TMP17]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
+; COMMON:       [[PRED_STORE_IF5]]:
+; COMMON-NEXT:    [[TMP18:%.*]] = add i64 [[INDEX]], 3
+; COMMON-NEXT:    [[TMP19:%.*]] = add i32 [[TMP0]], 3
+; COMMON-NEXT:    [[TMP20:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP18]]
+; COMMON-NEXT:    [[TMP21:%.*]] = or i32 [[TMP19]], 1
+; COMMON-NEXT:    store i32 [[TMP21]], ptr [[TMP20]], align 4
+; COMMON-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; COMMON:       [[PRED_STORE_CONTINUE6]]:
+; COMMON-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; COMMON-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
+; COMMON-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8
+; COMMON-NEXT:    br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; COMMON:       [[MIDDLE_BLOCK]]:
+; COMMON-NEXT:    br label %[[EXIT:.*]]
+; COMMON:       [[EXIT]]:
+; COMMON-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep = getelementptr i32, ptr %gep.dst, i64 %iv
+  %t = trunc i64 %iv to i32
+  %o = or i32 %t, 1
+  store i32 %o, ptr %gep, align 4
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, 4
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
 attributes #0 = { "target-features"="+neon,+sve" vscale_range(1,16) }
 
 declare void @llvm.assume(i1 noundef)

@llvmbot
Copy link
Member

llvmbot commented Nov 16, 2025

@llvm/pr-subscribers-vectorizers

Author: Florian Hahn (fhahn)

Changes

ForceTargetInstructionCost in the legacy cost model overrides any costs from InstsToScalarize. Match the behavior in the VPlan-based cost model. This fixes a crash with -force-target-instr-cost for the added test case.


Full diff: https://github.com/llvm/llvm-project/pull/168269.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+11-10)
  • (modified) llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll (+78)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cbfbc29360b0b..b51d754b93a9e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6923,16 +6923,17 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
     });
     Cost += ForcedCost;
   }
-  for (const auto &[Scalarized, ScalarCost] : CM.InstsToScalarize[VF]) {
-    if (CostCtx.skipCostComputation(Scalarized, VF.isVector()))
-      continue;
-    CostCtx.SkipCostComputation.insert(Scalarized);
-    LLVM_DEBUG({
-      dbgs() << "Cost of " << ScalarCost << " for VF " << VF
-             << ": profitable to scalarize " << *Scalarized << "\n";
-    });
-    Cost += ScalarCost;
-  }
+  if (!ForceTargetInstructionCost.getNumOccurrences())
+    for (const auto &[Scalarized, ScalarCost] : CM.InstsToScalarize[VF]) {
+      if (CostCtx.skipCostComputation(Scalarized, VF.isVector()))
+        continue;
+      CostCtx.SkipCostComputation.insert(Scalarized);
+      LLVM_DEBUG({
+        dbgs() << "Cost of " << ScalarCost << " for VF " << VF
+               << ": profitable to scalarize " << *Scalarized << "\n";
+      });
+      Cost += ScalarCost;
+    }
 
   return Cost;
 }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index 29bbd015eed1f..f8368c445b349 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -380,6 +380,84 @@ for.end:
   ret void
 }
 
+define void @forced_scalar_instr(ptr %gep.dst) {
+; COMMON-LABEL: define void @forced_scalar_instr(
+; COMMON-SAME: ptr [[GEP_DST:%.*]]) {
+; COMMON-NEXT:  [[ENTRY:.*:]]
+; COMMON-NEXT:    br label %[[VECTOR_PH:.*]]
+; COMMON:       [[VECTOR_PH]]:
+; COMMON-NEXT:    br label %[[VECTOR_BODY:.*]]
+; COMMON:       [[VECTOR_BODY]]:
+; COMMON-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
+; COMMON-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
+; COMMON-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
+; COMMON-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 4)
+; COMMON-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
+; COMMON-NEXT:    br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; COMMON:       [[PRED_STORE_IF]]:
+; COMMON-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; COMMON-NEXT:    [[TMP4:%.*]] = add i32 [[TMP0]], 0
+; COMMON-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP3]]
+; COMMON-NEXT:    [[TMP6:%.*]] = or i32 [[TMP4]], 1
+; COMMON-NEXT:    store i32 [[TMP6]], ptr [[TMP5]], align 4
+; COMMON-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; COMMON:       [[PRED_STORE_CONTINUE]]:
+; COMMON-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
+; COMMON-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; COMMON:       [[PRED_STORE_IF1]]:
+; COMMON-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
+; COMMON-NEXT:    [[TMP9:%.*]] = add i32 [[TMP0]], 1
+; COMMON-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP8]]
+; COMMON-NEXT:    [[TMP11:%.*]] = or i32 [[TMP9]], 1
+; COMMON-NEXT:    store i32 [[TMP11]], ptr [[TMP10]], align 4
+; COMMON-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
+; COMMON:       [[PRED_STORE_CONTINUE2]]:
+; COMMON-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
+; COMMON-NEXT:    br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; COMMON:       [[PRED_STORE_IF3]]:
+; COMMON-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 2
+; COMMON-NEXT:    [[TMP14:%.*]] = add i32 [[TMP0]], 2
+; COMMON-NEXT:    [[TMP15:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP13]]
+; COMMON-NEXT:    [[TMP16:%.*]] = or i32 [[TMP14]], 1
+; COMMON-NEXT:    store i32 [[TMP16]], ptr [[TMP15]], align 4
+; COMMON-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
+; COMMON:       [[PRED_STORE_CONTINUE4]]:
+; COMMON-NEXT:    [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
+; COMMON-NEXT:    br i1 [[TMP17]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
+; COMMON:       [[PRED_STORE_IF5]]:
+; COMMON-NEXT:    [[TMP18:%.*]] = add i64 [[INDEX]], 3
+; COMMON-NEXT:    [[TMP19:%.*]] = add i32 [[TMP0]], 3
+; COMMON-NEXT:    [[TMP20:%.*]] = getelementptr i32, ptr [[GEP_DST]], i64 [[TMP18]]
+; COMMON-NEXT:    [[TMP21:%.*]] = or i32 [[TMP19]], 1
+; COMMON-NEXT:    store i32 [[TMP21]], ptr [[TMP20]], align 4
+; COMMON-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
+; COMMON:       [[PRED_STORE_CONTINUE6]]:
+; COMMON-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; COMMON-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
+; COMMON-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8
+; COMMON-NEXT:    br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; COMMON:       [[MIDDLE_BLOCK]]:
+; COMMON-NEXT:    br label %[[EXIT:.*]]
+; COMMON:       [[EXIT]]:
+; COMMON-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep = getelementptr i32, ptr %gep.dst, i64 %iv
+  %t = trunc i64 %iv to i32
+  %o = or i32 %t, 1
+  store i32 %o, ptr %gep, align 4
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, 4
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
 attributes #0 = { "target-features"="+neon,+sve" vscale_range(1,16) }
 
 declare void @llvm.assume(i1 noundef)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants