Skip to content

Commit

Permalink
[CostModel][X86] getArithmeticInstrCost - move SLM reduceVMULWidth co…
Browse files Browse the repository at this point in the history
…st handling into the generic MUL handling

This is still SLM specific atm, but converting this to more closely match the codegen from reduceVMULWidth should be straightforward
  • Loading branch information
RKSimon committed Aug 26, 2022
1 parent 9c29b4a commit f3590b6
Showing 1 changed file with 16 additions and 23 deletions.
39 changes: 16 additions & 23 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Expand Up @@ -254,6 +254,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed);
unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed);
unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
bool SignedMode = Op1Signed || Op2Signed;

// If both are representable as i15 and at least one is constant,
// zero-extended, or sign-extended from vXi16 (or less pre-SSE41) then we
Expand All @@ -275,6 +276,20 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
LT.second =
MVT::getVectorVT(MVT::i16, 2 * LT.second.getVectorNumElements());
}

// Check if the vXi32 operands can be shrunk into a smaller datatype.
// This should match the codegen from reduceVMULWidth.
// TODO: Make this generic (!ST->SSE41 || ST->isPMULLDSlow()).
if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
if (OpMinSize <= 7)
return LT.first * 3; // pmullw/sext
if (!SignedMode && OpMinSize <= 8)
return LT.first * 3; // pmullw/zext
if (OpMinSize <= 15)
return LT.first * 5; // pmullw/pmulhw/pshuf
if (!SignedMode && OpMinSize <= 16)
return LT.first * 5; // pmullw/pmulhw/pshuf
}
}

// Vector multiply by pow2 will be simplified to shifts.
Expand Down Expand Up @@ -372,32 +387,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::SUB, MVT::v2i64, { 4 } },
};

if (ST->useSLMArithCosts()) {
if (Args.size() == 2 && ISD == ISD::MUL && LT.second == MVT::v4i32) {
// Check if the operands can be shrinked into a smaller datatype.
// TODO: Merge this into generiic vXi32 MUL patterns above.
bool Op1Signed = false;
unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed);
bool Op2Signed = false;
unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed);

bool SignedMode = Op1Signed || Op2Signed;
unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);

if (OpMinSize <= 7)
return LT.first * 3; // pmullw/sext
if (!SignedMode && OpMinSize <= 8)
return LT.first * 3; // pmullw/zext
if (OpMinSize <= 15)
return LT.first * 5; // pmullw/pmulhw/pshuf
if (!SignedMode && OpMinSize <= 16)
return LT.first * 5; // pmullw/pmulhw/pshuf
}

if (ST->useSLMArithCosts())
if (const auto *Entry = CostTableLookup(SLMCostTable, ISD, LT.second))
if (auto KindCost = Entry->Cost[CostKind])
return LT.first * KindCost.value();
}

static const CostKindTblEntry AVX512BWUniformConstCostTable[] = {
{ ISD::SHL, MVT::v64i8, { 2 } }, // psllw + pand.
Expand Down

0 comments on commit f3590b6

Please sign in to comment.