Skip to content

Commit

Permalink
Merging r242433:
Browse files Browse the repository at this point in the history
------------------------------------------------------------------------
r242433 | tstellar | 2015-07-16 12:40:07 -0700 (Thu, 16 Jul 2015) | 11 lines

AMDPGU/SI: Use AssertZext node to mask high bit for scratch offsets

Summary:
We can safely assume that the high bit of scratch offsets will never
be set, because this would require at least 128 GB of GPU memory.

Reviewers: arsenm

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D11225
------------------------------------------------------------------------

llvm-svn: 242683
  • Loading branch information
zmodem committed Jul 20, 2015
1 parent 12555f8 commit d10907a
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 3 deletions.
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Expand Up @@ -123,6 +123,11 @@ def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
"true",
"VI SGPR initilization bug requiring a fixed SGPR allocation size">;

def FeatureEnableHugeScratchBuffer : SubtargetFeature<"huge-scratch-buffer",
"EnableHugeScratchBuffer",
"true",
"Enable scratch buffer sizes greater than 128 GB">;

class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Expand Up @@ -73,7 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
IsaVersion(ISAVersion0_0_0),
IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
0),
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Expand Up @@ -89,6 +89,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
bool FeatureDisable;
int LDSBankCount;
unsigned IsaVersion;
bool EnableHugeScratchBuffer;

AMDGPUFrameLowering FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
Expand Down Expand Up @@ -271,6 +272,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
return DevName;
}

bool enableHugeScratchBuffer() const {
return EnableHugeScratchBuffer;
}

bool dumpCode() const {
return DumpCode;
}
Expand Down
30 changes: 28 additions & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Expand Up @@ -812,10 +812,29 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {

SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {

SDLoc SL(Op);
FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
unsigned FrameIndex = FINode->getIndex();

return DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
// A FrameIndex node represents a 32-bit offset into scratch memory. If
// the high bit of a frame index offset were to be set, this would mean
// that it represented an offset of ~2GB * 64 = ~128GB from the start of the
// scratch buffer, with 64 being the number of threads per wave.
//
// If we know the machine uses less than 128GB of scratch, then we can
// amrk the high bit of the FrameIndex node as known zero,
// which is important, because it means in most situations we can
// prove that values derived from FrameIndex nodes are non-negative.
// This enables us to take advantage of more addressing modes when
// accessing scratch buffers, since for scratch reads/writes, the register
// offset must always be positive.

SDValue TFI = DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
if (Subtarget->enableHugeScratchBuffer())
return TFI;

return DAG.getNode(ISD::AssertZext, SL, MVT::i32, TFI,
DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 31)));
}

/// This transforms the control flow intrinsics to get the branch destination as
Expand Down Expand Up @@ -2034,6 +2053,13 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
}
}

static bool isFrameIndexOp(SDValue Op) {
if (Op.getOpcode() == ISD::AssertZext)
Op = Op.getOperand(0);

return isa<FrameIndexSDNode>(Op);
}

/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
/// with frame index operands.
/// LLVM assumes that inputs are to these instructions are registers.
Expand All @@ -2042,7 +2068,7 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,

SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
if (!isa<FrameIndexSDNode>(Node->getOperand(i))) {
if (!isFrameIndexOp(Node->getOperand(i))) {
Ops.push_back(Node->getOperand(i));
continue;
}
Expand Down

0 comments on commit d10907a

Please sign in to comment.