Skip to content

Commit

Permalink
[X86][SSE] Update the cost table for integer-integer conversions on S…
Browse files Browse the repository at this point in the history
…SE2/SSE4.1.

Previously in the conversion cost table there are no entries for integer-integer
conversions on SSE2. This will result in imprecise costs for certain vectorized
operations. This patch adds those entries for SSE2 and SSE4.1. The cost numbers
are counted from the result of running llc on the new test case in this patch.


Differential revision: http://reviews.llvm.org/D15132

llvm-svn: 255315
  • Loading branch information
Cong Hou committed Dec 11, 2015
1 parent 533ff00 commit 59898d8
Show file tree
Hide file tree
Showing 3 changed files with 435 additions and 5 deletions.
81 changes: 79 additions & 2 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Expand Up @@ -528,6 +528,9 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");

// FIXME: Need a better design of the cost table to handle non-simple types of
// potential massive combinations (elem_num x src_type x dst_type).

static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
Expand Down Expand Up @@ -705,7 +708,38 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 },
};

static const TypeConversionCostTblEntry SSE2ConvTbl[] = {
static const TypeConversionCostTblEntry SSE41ConversionTbl[] = {
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 2 },

{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 30 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 },
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 },
};

static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
// These are somewhat magic numbers justified by looking at the output of
// Intel's IACA, running some kernels and making sure when we take
// legalization into account the throughput will be overestimated.
Expand All @@ -726,13 +760,42 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },

{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 },
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 },
{ ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },

{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 14 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 7 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 31 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 },
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 },
};

std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
std::pair<int, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);

if (ST->hasSSE2() && !ST->hasAVX()) {
if (const auto *Entry = ConvertCostTableLookup(SSE2ConvTbl, ISD,
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
LTDest.second, LTSrc.second))
return LTSrc.first * Entry->Cost;
}
Expand Down Expand Up @@ -770,6 +833,20 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
return Entry->Cost;
}

if (ST->hasSSE41()) {
if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
return Entry->Cost;
}

if (ST->hasSSE2()) {
if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
return Entry->Cost;
}

return BaseT::getCastInstrCost(Opcode, Dst, Src);
}

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Analysis/CostModel/X86/sitofp.ll
Expand Up @@ -248,13 +248,13 @@ define <2 x double> @sitofpv2i64v2double(<2 x i64> %a) {
; SSE2: cost of 20 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv2i64v2double
; AVX1: cost of 4 {{.*}} sitofp
; AVX1: cost of 20 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv2i64v2double
; AVX2: cost of 4 {{.*}} sitofp
; AVX2: cost of 20 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv2i64v2double
; AVX512F: cost of 4 {{.*}} sitofp
; AVX512F: cost of 20 {{.*}} sitofp
%1 = sitofp <2 x i64> %a to <2 x double>
ret <2 x double> %1
}
Expand Down

0 comments on commit 59898d8

Please sign in to comment.