Skip to content

Commit

Permalink
[X86][CostModel] Correct the costs for truncate to a mask register wi…
Browse files Browse the repository at this point in the history
…th avx512

I've modified isTruncateFree to get an accurate cost for types that need to be split. I'm planning to look into fixing it for all vectors, but need more cost cleanups first.

Differential Revision: https://reviews.llvm.org/D78973
  • Loading branch information
topperc committed Apr 28, 2020
1 parent 01b5f52 commit d42192c
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 102 deletions.
5 changes: 5 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -30611,6 +30611,11 @@ bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const {
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (!VT1.isInteger() || !VT2.isInteger())
return false;
// Truncate to mask registers aren't free.
// TODO: No vector truncates are free.
if (Subtarget.hasAVX512() && VT2.isVector() &&
VT2.getVectorElementType() == MVT::i1)
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
return NumBits1 > NumBits2;
Expand Down
47 changes: 47 additions & 0 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Expand Up @@ -1396,6 +1396,17 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

{ ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 2 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 },
{ ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 },
};

static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
Expand All @@ -1420,6 +1431,20 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },

{ ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, // sext+vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, // sext+vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 3 }, // sext+vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, // sext+vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, // sext+vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, // sext+vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 3 }, // sext+vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, // zmm vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, // zmm vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i32, 2 }, // vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, // zmm vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, // zmm vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 2 }, // vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 2 },
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 2 },
Expand Down Expand Up @@ -1539,6 +1564,15 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },

{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 },
{ ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // vpsllw+vptestmb
{ ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // vpsllw+vptestmw
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // vpsllw+vptestmb
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // vpsllw+vptestmw
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // vpsllw+vptestmb
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // vpsllw+vptestmw
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // vpsllw+vptestmb
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // vpsllw+vptestmw
{ ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // vpsllw+vptestmb
};

static const TypeConversionCostTblEntry AVX512DQVLConversionTbl[] = {
Expand All @@ -1564,6 +1598,19 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
};

static const TypeConversionCostTblEntry AVX512VLConversionTbl[] = {
{ ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, // sext+vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, // sext+vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, // sext+vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 8 }, // split+2*v8i8
{ ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, // sext+vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, // sext+vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, // sext+vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 8 }, // split+2*v8i16
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, // vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, // vpslld+vptestmd
{ ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, // vpsllq+vptestmq
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, // vpsllq+vptestmq

// sign extend is vpcmpeq+maskedmove+vpmovdw+vpacksswb
// zero extend is vpcmpeq+maskedmove+vpmovdw+vpsrlw+vpackuswb
{ ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 5 },
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/CostModel/X86/cast.ll
Expand Up @@ -44,10 +44,10 @@ define i32 @add(i32 %arg) {
; AVX512-LABEL: 'add'
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A = zext <4 x i1> undef to <4 x i32>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <4 x i1> undef to <4 x i32>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C = trunc <4 x i32> undef to <4 x i1>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C = trunc <4 x i32> undef to <4 x i1>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D = zext <8 x i1> undef to <8 x i32>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = sext <8 x i1> undef to <8 x i32>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i1>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i1>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = zext i1 undef to i32
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %H = trunc i32 undef to i1
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
Expand Down

0 comments on commit d42192c

Please sign in to comment.