Skip to content

Commit

Permalink
[CostModel][X86] Add CostKinds handling for SSE FCMP_ONE/FCMP_UEQ pre…
Browse files Browse the repository at this point in the history
…dicates

These require special handling to account for their expansion in lowering.

I'm trying very hard not to have to add predicate specific costs - but it might be inevitable.....
  • Loading branch information
RKSimon committed Sep 6, 2022
1 parent 3a90af1 commit 83552e8
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 81 deletions.
13 changes: 12 additions & 1 deletion llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2867,7 +2867,6 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Pred == CmpInst::BAD_FCMP_PREDICATE))
Pred = cast<CmpInst>(I)->getPredicate();

// TODO: Handle pre-AVX FCMP_ONE/FCMP_UEQ slow cases.
switch (Pred) {
case CmpInst::Predicate::ICMP_NE:
// xor(cmpeq(x,y),-1)
Expand Down Expand Up @@ -2896,6 +2895,18 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
ExtraCost = 3;
}
break;
case CmpInst::Predicate::FCMP_ONE:
case CmpInst::Predicate::FCMP_UEQ:
// Without AVX we need to expand FCMP_ONE/FCMP_UEQ cases.
// Use FCMP_UEQ expansion - FCMP_ONE should be the same.
if (CondTy && !ST->hasAVX())
return getCmpSelInstrCost(Opcode, ValTy, CondTy,
CmpInst::Predicate::FCMP_UNO, CostKind) +
getCmpSelInstrCost(Opcode, ValTy, CondTy,
CmpInst::Predicate::FCMP_OEQ, CostKind) +
getArithmeticInstrCost(Instruction::Or, CondTy, CostKind);

break;
case CmpInst::Predicate::BAD_ICMP_PREDICATE:
case CmpInst::Predicate::BAD_FCMP_PREDICATE:
// Assume worst case scenario and add the maximum extra cost.
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/Analysis/CostModel/X86/fcmp-codesize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,15 @@ define i32 @cmp_float_oeq(i32 %arg) {
define i32 @cmp_float_one(i32 %arg) {
; SSE-LABEL: 'cmp_float_one'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX-LABEL: 'cmp_float_one'
Expand Down Expand Up @@ -401,15 +401,15 @@ define i32 @cmp_float_olt(i32 %arg) {
define i32 @cmp_float_ueq(i32 %arg) {
; SSE-LABEL: 'cmp_float_ueq'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX-LABEL: 'cmp_float_ueq'
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/Analysis/CostModel/X86/fcmp-latency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,15 @@ define i32 @cmp_float_oeq(i32 %arg) {
define i32 @cmp_float_one(i32 %arg) {
; SSE-LABEL: 'cmp_float_one'
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fcmp one float undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fcmp one double undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX1-LABEL: 'cmp_float_one'
Expand Down Expand Up @@ -492,15 +492,15 @@ define i32 @cmp_float_olt(i32 %arg) {
define i32 @cmp_float_ueq(i32 %arg) {
; SSE-LABEL: 'cmp_float_ueq'
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = fcmp ueq float undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F64 = fcmp ueq double undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX1-LABEL: 'cmp_float_ueq'
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/Analysis/CostModel/X86/fcmp-sizelatency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,15 @@ define i32 @cmp_float_oeq(i32 %arg) {
define i32 @cmp_float_one(i32 %arg) {
; SSE-LABEL: 'cmp_float_one'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp one float undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp one <2 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp one <4 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp one <8 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp one <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp one double undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp one <2 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp one <4 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp one <8 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp one <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX-LABEL: 'cmp_float_one'
Expand Down Expand Up @@ -401,15 +401,15 @@ define i32 @cmp_float_olt(i32 %arg) {
define i32 @cmp_float_ueq(i32 %arg) {
; SSE-LABEL: 'cmp_float_ueq'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fcmp ueq float undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = fcmp ueq <2 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fcmp ueq <4 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fcmp ueq <8 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16F32 = fcmp ueq <16 x float> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fcmp ueq double undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fcmp ueq <2 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fcmp ueq <4 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8F64 = fcmp ueq <8 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16F64 = fcmp ueq <16 x double> undef, undef
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX-LABEL: 'cmp_float_ueq'
Expand Down
Loading

0 comments on commit 83552e8

Please sign in to comment.