Skip to content

Commit

Permalink
[VP] Cost model for VPMemory operations on PowerPC.
Browse files Browse the repository at this point in the history
PPC Implementation of getVPMemoryOpCost and hasActiveVectorLength.

Reviewed By: Roland Froese

Differential Revision: https://reviews.llvm.org/D109417
  • Loading branch information
bmahjour committed Dec 7, 2021
1 parent 81f9dc8 commit 8aee783
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 0 deletions.
83 changes: 83 additions & 0 deletions llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Expand Up @@ -1388,3 +1388,86 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,

return false;
}

bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType,
Align Alignment) const {
// Only load and stores instructions can have variable vector length on Power.
if (Opcode != Instruction::Load && Opcode != Instruction::Store)
return false;
// Loads/stores with length instructions use bits 0-7 of the GPR operand and
// therefore cannot be used in 32-bit mode.
if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
return false;
if (auto *VecTy = dyn_cast<FixedVectorType>(DataType)) {
unsigned VecWidth = DataType->getPrimitiveSizeInBits();
return VecWidth == 128;
}
Type *ScalarTy = DataType->getScalarType();

if (ScalarTy->isPointerTy())
return true;

if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
return true;

if (!ScalarTy->isIntegerTy())
return false;

unsigned IntWidth = ScalarTy->getIntegerBitWidth();
return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
}

InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace, CostKind, I);
if (TLI->getValueType(DL, Src, true) == MVT::Other)
return Cost;
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return Cost;

assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");

auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
assert(SrcVTy && "Expected a vector type for VP memory operations");

if (hasActiveVectorLength(Opcode, Src, Alignment)) {
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, SrcVTy);

InstructionCost CostFactor =
vectorCostAdjustmentFactor(Opcode, Src, nullptr);
if (!CostFactor.isValid())
return InstructionCost::getMax();

InstructionCost Cost = LT.first * CostFactor;
assert(Cost.isValid() && "Expected valid cost");

// On P9 but not on P10, if the op is misaligned then it will cause a
// pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
// ones.
const Align DesiredAlignment(16);
if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9)
return Cost;

// Since alignment may be under estimated, we try to compute the probability
// that the actual address is aligned to the desired boundary. For example
// an 8-byte aligned load is assumed to be actually 16-byte aligned half the
// time, while a 4-byte aligned load has a 25% chance of being 16-byte
// aligned.
float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();
float MisalignmentProb = 1.0 - AlignmentProb;
return (MisalignmentProb * P9PipelineFlushEstimate) +
(AlignmentProb * *Cost.getValue());
}

// Usually we should not get to this point, but the following is an attempt to
// model the cost of legalization. Currently we can only lower intrinsics with
// evl but no mask, on Power 9/10. Otherwise, we must scalarize.
return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
}
11 changes: 11 additions & 0 deletions llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
Expand Up @@ -137,6 +137,17 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
bool areFunctionArgsABICompatible(const Function *Caller,
const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const;
bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
Align Alignment) const;
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);

private:
// The following constant is used for estimating costs on power9.
static const InstructionCost::CostType P9PipelineFlushEstimate = 80;

/// @}
};

Expand Down

0 comments on commit 8aee783

Please sign in to comment.