Skip to content

Commit

Permalink
[CostModel][X86] Account for older SSE targets with slow fp->int conv…
Browse files Browse the repository at this point in the history
…ersions

Both the conversion cost and the xmm->gpr transfer cost tend to be a lot higher on early SSE targets
  • Loading branch information
RKSimon committed Jul 8, 2021
1 parent c34b0ab commit 8ef67fa
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 49 deletions.
12 changes: 12 additions & 0 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Expand Up @@ -2076,9 +2076,15 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 22 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 4 },

{ ISD::FP_TO_SINT, MVT::i32, MVT::f32, 1 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f32, 1 },
{ ISD::FP_TO_SINT, MVT::i32, MVT::f64, 1 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f64, 1 },
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 3 },
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 3 },

{ ISD::FP_TO_UINT, MVT::i32, MVT::f32, 1 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f64, 1 },
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 3 },
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 3 },
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
Expand Down Expand Up @@ -2115,6 +2121,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 15 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 18 },

{ ISD::FP_TO_SINT, MVT::i32, MVT::f32, 4 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f32, 4 },
{ ISD::FP_TO_SINT, MVT::i32, MVT::f64, 4 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f64, 4 },
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 },
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 },
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
Expand All @@ -2123,7 +2133,9 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 },
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },

{ ISD::FP_TO_UINT, MVT::i32, MVT::f32, 4 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f64, 4 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 15 },
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 },
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 },
Expand Down
90 changes: 60 additions & 30 deletions llvm/test/Analysis/CostModel/X86/fptosi.ll
@@ -1,21 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
;
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1

define i32 @fptosi_double_i64(i32 %arg) {
; SSE2-LABEL: 'fptosi_double_i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptosi double undef to i64
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_double_i64'
Expand Down Expand Up @@ -61,12 +61,19 @@ define i32 @fptosi_double_i64(i32 %arg) {
}

define i32 @fptosi_double_i32(i32 %arg) {
; SSE-LABEL: 'fptosi_double_i32'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
; SSE2-LABEL: 'fptosi_double_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = fptosi double undef to i32
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_double_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fptosi_double_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
Expand All @@ -88,6 +95,13 @@ define i32 @fptosi_double_i32(i32 %arg) {
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_double_i32'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I32 = fptosi double undef to i32
%V2I32 = fptosi <2 x double> undef to <2 x i32>
Expand All @@ -98,7 +112,7 @@ define i32 @fptosi_double_i32(i32 %arg) {

define i32 @fptosi_double_i16(i32 %arg) {
; SSE2-LABEL: 'fptosi_double_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptosi double undef to i16
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
Expand Down Expand Up @@ -148,7 +162,7 @@ define i32 @fptosi_double_i16(i32 %arg) {

define i32 @fptosi_double_i8(i32 %arg) {
; SSE2-LABEL: 'fptosi_double_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptosi double undef to i8
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
Expand Down Expand Up @@ -198,11 +212,11 @@ define i32 @fptosi_double_i8(i32 %arg) {

define i32 @fptosi_float_i64(i32 %arg) {
; SSE2-LABEL: 'fptosi_float_i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptosi float undef to i64
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_float_i64'
Expand Down Expand Up @@ -254,13 +268,21 @@ define i32 @fptosi_float_i64(i32 %arg) {
}

define i32 @fptosi_float_i32(i32 %arg) {
; SSE-LABEL: 'fptosi_float_i32'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
; SSE2-LABEL: 'fptosi_float_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = fptosi float undef to i32
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_float_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fptosi_float_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32
Expand All @@ -285,6 +307,14 @@ define i32 @fptosi_float_i32(i32 %arg) {
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_float_i32'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I32 = fptosi float undef to i32
%V2I32 = fptosi <2 x float> undef to <2 x i32>
Expand All @@ -296,7 +326,7 @@ define i32 @fptosi_float_i32(i32 %arg) {

define i32 @fptosi_float_i16(i32 %arg) {
; SSE2-LABEL: 'fptosi_float_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptosi float undef to i16
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
Expand Down Expand Up @@ -353,7 +383,7 @@ define i32 @fptosi_float_i16(i32 %arg) {

define i32 @fptosi_float_i8(i32 %arg) {
; SSE2-LABEL: 'fptosi_float_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptosi float undef to i8
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
Expand Down

0 comments on commit 8ef67fa

Please sign in to comment.