Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"

#define DEBUG_TYPE "amdgpu-call-lowering"
Expand Down Expand Up @@ -414,7 +415,10 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &B, ArgInfo &OrigArg,
MachineFunction &MF = B.getMF();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getDataLayout();
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

// This isn't really a constant pool but close enough.
MachinePointerInfo PtrInfo(MF.getPSVManager().getConstantPool(),
AMDGPUAS::CONSTANT_ADDRESS);

LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);

Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,17 @@ Intrinsic::ID AMDGPU::getIntrinsicID(const MachineInstr &I) {

// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
bool AMDGPU::isUniformMMO(const MachineMemOperand *MMO) {
// FIXME: null value is should be treated as unknown, not as uniform.
const Value *Ptr = MMO->getValue();
if (!Ptr) {
if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
return PSV->isConstantPool() || PSV->isStack() || PSV->isGOT() ||
PSV->isJumpTable();
}

// FIXME: null value is should be treated as unknown, not as uniform.
return true;
}

// UndefValue means this is a load of a kernel input. These are uniform.
// Sometimes LDS instructions have constant pointers.
// If Ptr is null, then that means this mem operand contains a
Expand Down
27 changes: 20 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
Expand Down Expand Up @@ -2321,14 +2322,15 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
return B.buildUnmerge(S32, Dst).getReg(1);
}

// TODO: can we be smarter about machine pointer info?
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
Register LoadAddr = MRI.createGenericVirtualRegister(
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
// For code object version 5, private_base and shared_base are passed through
// implicit kernargs.
if (AMDGPU::getAMDHSACodeObjectVersion(*MF.getFunction().getParent()) >=
AMDGPU::AMDHSA_COV5) {
// TODO: can we be smarter about machine pointer info?
MachinePointerInfo PtrInfo = getKernargSegmentPtrInfo(B.getMF());

AMDGPUTargetLowering::ImplicitParameter Param =
AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPUTargetLowering::SHARED_BASE
: AMDGPUTargetLowering::PRIVATE_BASE;
Expand All @@ -2343,7 +2345,7 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
return Register();

MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo,
PtrInfo.getWithOffset(Offset),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
LLT::scalar(32), commonAlignment(Align(64), Offset));
Expand All @@ -2361,6 +2363,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
if (!loadInputValue(QueuePtr, B, AMDGPUFunctionArgInfo::QUEUE_PTR))
return Register();

// TODO: can we be smarter about machine pointer info?
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

// Offset into amd_queue_t for group_segment_aperture_base_hi /
// private_segment_aperture_base_hi.
uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
Expand Down Expand Up @@ -4709,6 +4714,14 @@ bool AMDGPULegalizerInfo::legalizeWorkitemIDIntrinsic(
return true;
}

MachinePointerInfo
AMDGPULegalizerInfo::getKernargSegmentPtrInfo(MachineFunction &MF) const {
// This isn't really a constant pool but close enough.
MachinePointerInfo PtrInfo(MF.getPSVManager().getConstantPool());
PtrInfo.AddrSpace = AMDGPUAS::CONSTANT_ADDRESS;
return PtrInfo;
}

Register AMDGPULegalizerInfo::getKernargParameterPtr(MachineIRBuilder &B,
int64_t Offset) const {
LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
Expand Down Expand Up @@ -4736,8 +4749,8 @@ bool AMDGPULegalizerInfo::legalizeKernargMemParameter(MachineInstr &MI,
"unexpected kernarg parameter type");

Register Ptr = getKernargParameterPtr(B, Offset);
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
B.buildLoad(DstReg, Ptr, PtrInfo, Align(4),
MachinePointerInfo PtrInfo = getKernargSegmentPtrInfo(B.getMF());
B.buildLoad(DstReg, Ptr, PtrInfo.getWithOffset(Offset), Align(4),
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
MI.eraseFromParent();
Expand Down Expand Up @@ -7260,9 +7273,9 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
return false;

// TODO: can we be smarter about machine pointer info?
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo = getKernargSegmentPtrInfo(MF);
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo,
PtrInfo.getWithOffset(Offset),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
LLT::scalar(64), commonAlignment(Align(64), Offset));
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
unsigned Dim, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;

MachinePointerInfo getKernargSegmentPtrInfo(MachineFunction &MF) const;
Register getKernargParameterPtr(MachineIRBuilder &B, int64_t Offset) const;
bool legalizeKernargMemParameter(MachineInstr &MI, MachineIRBuilder &B,
uint64_t Offset,
Expand Down
33 changes: 23 additions & 10 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
Expand Down Expand Up @@ -2265,6 +2266,14 @@ bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
return TargetLowering::isTypeDesirableForOp(Op, VT);
}

MachinePointerInfo
SITargetLowering::getKernargSegmentPtrInfo(MachineFunction &MF) const {
// This isn't really a constant pool but close enough.
MachinePointerInfo PtrInfo(MF.getPSVManager().getConstantPool());
PtrInfo.AddrSpace = AMDGPUAS::CONSTANT_ADDRESS;
return PtrInfo;
}

SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
const SDLoc &SL,
SDValue Chain,
Expand Down Expand Up @@ -2341,7 +2350,9 @@ SDValue SITargetLowering::lowerKernargMemParameter(
SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain,
uint64_t Offset, Align Alignment, bool Signed,
const ISD::InputArg *Arg) const {
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

MachinePointerInfo PtrInfo =
getKernargSegmentPtrInfo(DAG.getMachineFunction());

// Try to avoid using an extload by loading earlier than the argument address,
// and extracting the relevant bits. The load should hopefully be merged with
Expand All @@ -2356,7 +2367,8 @@ SDValue SITargetLowering::lowerKernargMemParameter(
// TODO: If we passed in the base kernel offset we could have a better
// alignment than 4, but we don't really need it.
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, AlignDownOffset);
SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr, PtrInfo, Align(4),
SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr,
PtrInfo.getWithOffset(AlignDownOffset), Align(4),
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);

Expand All @@ -2371,9 +2383,9 @@ SDValue SITargetLowering::lowerKernargMemParameter(
}

SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Alignment,
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
SDValue Load = DAG.getLoad(
MemVT, SL, Chain, Ptr, PtrInfo.getWithOffset(Offset), Alignment,
MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant);

SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
return DAG.getMergeValues({Val, Load.getValue(1)}, SL);
Expand Down Expand Up @@ -8109,10 +8121,11 @@ SITargetLowering::loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT,
MachineFunction &MF = DAG.getMachineFunction();
uint64_t Offset = getImplicitParameterOffset(MF, Param);
SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), Offset);
MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
return DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, PtrInfo, Alignment,
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
MachinePointerInfo PtrInfo =
getKernargSegmentPtrInfo(DAG.getMachineFunction());
return DAG.getLoad(
VT, DL, DAG.getEntryNode(), Ptr, PtrInfo.getWithOffset(Offset), Alignment,
MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant);
}

SDValue SITargetLowering::lowerTrapHsaQueuePtr(SDValue Op,
Expand Down Expand Up @@ -11944,7 +11957,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
(AS == AMDGPUAS::GLOBAL_ADDRESS &&
Subtarget->getScalarizeGlobalBehavior() && Load->isSimple() &&
isMemOpHasNoClobberedMemOperand(Load))) {
(Load->isInvariant() || isMemOpHasNoClobberedMemOperand(Load)))) {
if ((!Op->isDivergent() || AMDGPU::isUniformMMO(MMO)) &&
Alignment >= Align(4) && NumElements < 32) {
if (MemVT.isPow2VectorType() ||
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
unsigned &NumIntermediates, MVT &RegisterVT) const override;

private:
MachinePointerInfo getKernargSegmentPtrInfo(MachineFunction &MF) const;

SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
SDValue Chain, uint64_t Offset) const;
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
Expand Down
Loading
Loading