Skip to content

Commit

Permalink
[TargetLowering] findOptimalMemOpLowering. NFCI.
Browse files Browse the repository at this point in the history
This was a local static funtion in SelectionDAG, which I've promoted to
TargetLowering so that I can reuse it to estimate the cost of a memory
operation in D59787.

Differential Revision: https://reviews.llvm.org/D59766

llvm-svn: 359543
  • Loading branch information
Sjoerd Meijer committed Apr 30, 2019
1 parent 59a4c04 commit 0ed4619
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 123 deletions.
14 changes: 14 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Expand Up @@ -2934,6 +2934,20 @@ class TargetLowering : public TargetLoweringBase {
}
};

/// Determines the optimal series of memory ops to replace the memset / memcpy.
/// Return true if the number of memory ops is below the threshold (Limit).
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset,
bool ZeroMemset,
bool MemcpyStrSrc,
bool AllowOverlap,
unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes) const;

/// Check to see if the specified operand of the specified instruction is a
/// constant integer. If so, check to see if there are any bits set in the
/// constant that are not demanded. If so, shrink the constant and return
Expand Down
141 changes: 18 additions & 123 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Expand Up @@ -5563,111 +5563,6 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
SrcDelta + G->getOffset());
}

/// Determines the optimal series of memory ops to replace the memset / memcpy.
/// Return true if the number of memory ops is below the threshold (Limit).
/// It returns the types of the sequence of memory ops to perform
/// memset / memcpy by reference.
static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset,
bool ZeroMemset,
bool MemcpyStrSrc,
bool AllowOverlap,
unsigned DstAS, unsigned SrcAS,
SelectionDAG &DAG,
const TargetLowering &TLI) {
assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
"Expecting memcpy / memset source to meet alignment requirement!");
// If 'SrcAlign' is zero, that means the memory operation does not need to
// load the value, i.e. memset or memcpy from constant string. Otherwise,
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
// is the specified alignment of the memory operation. If it is zero, that
// means it's possible to change the alignment of the destination.
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
// not need to be loaded.
const Function &F = DAG.getMachineFunction().getFunction();
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
IsMemset, ZeroMemset, MemcpyStrSrc,
F.getAttributes());

if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
VT = MVT::i64;
while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
!TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());

// Find the largest legal integer type.
MVT LVT = MVT::i64;
while (!TLI.isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());

// If the type we've chosen is larger than the largest legal integer type
// then use that instead.
if (VT.bitsGT(LVT))
VT = LVT;
}

unsigned NumMemOps = 0;
while (Size != 0) {
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
EVT NewVT = VT;
unsigned NewVTSize;

bool Found = false;
if (VT.isVector() || VT.isFloatingPoint()) {
NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
TLI.isSafeMemOpType(NewVT.getSimpleVT()))
Found = true;
else if (NewVT == MVT::i64 &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
TLI.isSafeMemOpType(MVT::f64)) {
// i64 is usually not legal on 32-bit targets, but f64 may be.
NewVT = MVT::f64;
Found = true;
}
}

if (!Found) {
do {
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
if (NewVT == MVT::i8)
break;
} while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
}
NewVTSize = NewVT.getSizeInBits() / 8;

// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;
if (NumMemOps && AllowOverlap && NewVTSize < Size &&
TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
Fast)
VTSize = Size;
else {
VT = NewVT;
VTSize = NewVTSize;
}
}

if (++NumMemOps > Limit)
return false;

MemOps.push_back(VT);
Size -= VTSize;
}

return true;
}

static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
// On Darwin, -Os means optimize for size without hurting performance, so
// only really optimize for size when -Oz (MinSize) is used.
Expand Down Expand Up @@ -5734,13 +5629,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);

if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroConstant ? 0 : SrcAlign),
false, false, CopyFromConstant, true,
DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(),
DAG, TLI))
if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroConstant ? 0 : SrcAlign),
false, false, CopyFromConstant, true,
DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes()))
return SDValue();

if (DstAlignCanChange) {
Expand Down Expand Up @@ -5915,12 +5810,12 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SrcAlign = Align;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);

if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align), SrcAlign,
false, false, false, false,
DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(),
DAG, TLI))
if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align), SrcAlign,
false, false, false, false,
DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(),
MF.getFunction().getAttributes()))
return SDValue();

if (DstAlignCanChange) {
Expand Down Expand Up @@ -6015,11 +5910,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
DstAlignCanChange = true;
bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
true, IsZeroVal, false, true,
DstPtrInfo.getAddrSpace(), ~0u,
DAG, TLI))
if (!TLI.findOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
true, IsZeroVal, false, true,
DstPtrInfo.getAddrSpace(), ~0u,
MF.getFunction().getAttributes()))
return SDValue();

if (DstAlignCanChange) {
Expand Down
101 changes: 101 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Expand Up @@ -153,6 +153,107 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
return LowerCallTo(CLI);
}

bool
TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
unsigned Limit, uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset,
bool ZeroMemset,
bool MemcpyStrSrc,
bool AllowOverlap,
unsigned DstAS, unsigned SrcAS,
const AttributeList &FuncAttributes) const {
// If 'SrcAlign' is zero, that means the memory operation does not need to
// load the value, i.e. memset or memcpy from constant string. Otherwise,
// it's the inferred alignment of the source. 'DstAlign', on the other hand,
// is the specified alignment of the memory operation. If it is zero, that
// means it's possible to change the alignment of the destination.
// 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
// not need to be loaded.
if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
return false;

EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
IsMemset, ZeroMemset, MemcpyStrSrc,
FuncAttributes);

if (VT == MVT::Other) {
// Use the largest integer type whose alignment constraints are satisfied.
// We only need to check DstAlign here as SrcAlign is always greater or
// equal to DstAlign (or zero).
VT = MVT::i64;
while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
!allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
assert(VT.isInteger());

// Find the largest legal integer type.
MVT LVT = MVT::i64;
while (!isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());

// If the type we've chosen is larger than the largest legal integer type
// then use that instead.
if (VT.bitsGT(LVT))
VT = LVT;
}

unsigned NumMemOps = 0;
while (Size != 0) {
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
EVT NewVT = VT;
unsigned NewVTSize;

bool Found = false;
if (VT.isVector() || VT.isFloatingPoint()) {
NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
isSafeMemOpType(NewVT.getSimpleVT()))
Found = true;
else if (NewVT == MVT::i64 &&
isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
isSafeMemOpType(MVT::f64)) {
// i64 is usually not legal on 32-bit targets, but f64 may be.
NewVT = MVT::f64;
Found = true;
}
}

if (!Found) {
do {
NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
if (NewVT == MVT::i8)
break;
} while (!isSafeMemOpType(NewVT.getSimpleVT()));
}
NewVTSize = NewVT.getSizeInBits() / 8;

// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;
if (NumMemOps && AllowOverlap && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
Fast)
VTSize = Size;
else {
VT = NewVT;
VTSize = NewVTSize;
}
}

if (++NumMemOps > Limit)
return false;

MemOps.push_back(VT);
Size -= VTSize;
}

return true;
}

/// Soften the operands of a comparison. This code is shared among BR_CC,
/// SELECT_CC, and SETCC handlers.
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
Expand Down

0 comments on commit 0ed4619

Please sign in to comment.