Skip to content

Commit

Permalink
AMDGPU/SI: Add support for R_AMDGPU_GOTPCREL
Browse files Browse the repository at this point in the history
Reviewers: rafael, ruiu, tony-tye, arsenm, kzhuravl

Subscribers: arsenm, llvm-commits, kzhuravl

Differential Revision: http://reviews.llvm.org/D21484

llvm-svn: 275268
  • Loading branch information
tstellarAMD committed Jul 13, 2016
1 parent 533a893 commit 418beb7
Show file tree
Hide file tree
Showing 10 changed files with 277 additions and 51 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Expand Up @@ -106,7 +106,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());

TS->EmitDirectiveHSACodeObjectVersion(2, 0);
TS->EmitDirectiveHSACodeObjectVersion(2, 1);

AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
Expand Up @@ -39,6 +39,13 @@ using namespace llvm;
AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
Ctx(ctx), ST(st) { }

static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) {
switch (MOFlags) {
default: return MCSymbolRefExpr::VK_None;
case SIInstrInfo::MO_GOTPCREL: return MCSymbolRefExpr::VK_GOTPCREL;
}
}

void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {

int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode());
Expand Down Expand Up @@ -69,7 +76,8 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName()));
const MCExpr *SymExpr = MCSymbolRefExpr::create(Sym, Ctx);
const MCExpr *SymExpr =
MCSymbolRefExpr::create(Sym, getVariantKind(MO.getTargetFlags()),Ctx);
const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr,
MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
MCOp = MCOperand::createExpr(Expr);
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Expand Up @@ -134,9 +134,9 @@ static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
}

static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
if (!RM.hasValue())
return Reloc::PIC_;
return *RM;
// The AMDGPU toolchain only supports generating shared objects, so we
// must always use PIC.
return Reloc::PIC_;
}

AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
Expand Down
73 changes: 50 additions & 23 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Expand Up @@ -510,12 +510,14 @@ bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
const Value *Ptr = MemNode->getMemOperand()->getValue();

// UndefValue means this is a load of a kernel input. These are uniform.
// Sometimes LDS instructions have constant pointers
if (isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || isa<Constant>(Ptr) ||
isa<GlobalValue>(Ptr))
// Sometimes LDS instructions have constant pointers.
// If Ptr is null, then that means this mem operand contains a
// PseudoSourceValue like GOT.
if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
return true;

const Instruction *I = dyn_cast_or_null<Instruction>(Ptr);
const Instruction *I = dyn_cast<Instruction>(Ptr);
return I && I->getMetadata("amdgpu.uniform");
}

Expand Down Expand Up @@ -1517,27 +1519,22 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
return DAG.getUNDEF(ASC->getValueType(0));
}

static bool shouldEmitGOTReloc(const GlobalValue *GV,
const TargetMachine &TM) {
return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
!TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
}

bool
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
if (GA->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
return false;

return TargetLowering::isOffsetFoldingLegal(GA);
// We can fold offsets for anything that doesn't require a GOT relocation.
return GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
!shouldEmitGOTReloc(GA->getGlobal(), getTargetMachine());
}

SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);

if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);

SDLoc DL(GSD);
const GlobalValue *GV = GSD->getGlobal();
EVT PtrVT = Op.getValueType();

static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
SDLoc DL, unsigned Offset, EVT PtrVT,
unsigned GAFlags = SIInstrInfo::MO_NONE) {
// In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is
// lowered to the following code sequence:
// s_getpc_b64 s[0:1]
Expand All @@ -1555,11 +1552,41 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
// of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
// small. This requires us to add 4 to the global variable offset in order to
// compute the correct address.
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32,
GSD->getOffset() + 4);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4,
GAFlags);
return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, GA);
}

SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);

if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);

SDLoc DL(GSD);
const GlobalValue *GV = GSD->getGlobal();
EVT PtrVT = Op.getValueType();

if (!shouldEmitGOTReloc(GV, getTargetMachine()))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);

SDValue GOTAddr = buildPCRelGlobalAddress(DAG, GV, DL, 0, PtrVT,
SIInstrInfo::MO_GOTPCREL);

Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
const DataLayout &DataLayout = DAG.getDataLayout();
unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
// FIXME: Use a PseudoSourceValue once those can be assigned an address space.
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));

return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr,
PtrInfo, false, false, true, Align);
}

SDValue SITargetLowering::lowerTRAP(SDValue Op,
SelectionDAG &DAG) const {
const MachineFunction &MF = DAG.getMachineFunction();
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Expand Up @@ -91,6 +91,12 @@ class SIInstrInfo final : public AMDGPUInstrInfo {
unsigned OpIdx1) const override;

public:

enum TargetOperandFlags {
MO_NONE = 0,
MO_GOTPCREL = 1
};

explicit SIInstrInfo(const SISubtarget &);

const SIRegisterInfo &getRegisterInfo() const {
Expand Down
203 changes: 203 additions & 0 deletions llvm/test/CodeGen/AMDGPU/global-variable-relocs.ll
@@ -0,0 +1,203 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji < %s | FileCheck %s

@private = private addrspace(1) global [256 x i32] zeroinitializer
@internal = internal addrspace(1) global [256 x i32] zeroinitializer
@available_externally = available_externally addrspace(1) global [256 x i32] zeroinitializer
@linkonce = linkonce addrspace(1) global [256 x i32] zeroinitializer
@weak= weak addrspace(1) global [256 x i32] zeroinitializer
@common = common addrspace(1) global [256 x i32] zeroinitializer
@extern_weak = extern_weak addrspace(1) global [256 x i32]
@linkonce_odr = linkonce_odr addrspace(1) global [256 x i32] zeroinitializer
@weak_odr = weak_odr addrspace(1) global [256 x i32] zeroinitializer
@external = external addrspace(1) global [256 x i32]
@external_w_init = addrspace(1) global [256 x i32] zeroinitializer

; CHECK-LABEL: private_test:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], private+8
; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], 0
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[ADDR_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[ADDR_HI]]
; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
define void @private_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @private, i32 0, i32 1
%val = load i32, i32 addrspace(1)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}

; CHECK-LABEL: internal_test:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], internal+8
; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], 0
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[ADDR_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[ADDR_HI]]
; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
define void @internal_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @internal, i32 0, i32 1
%val = load i32, i32 addrspace(1)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}

; CHECK-LABEL: available_externally_test:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], available_externally@GOTPCREL+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
define void @available_externally_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @available_externally, i32 0, i32 1
%val = load i32, i32 addrspace(1)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}

; CHECK-LABEL: linkonce_test:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], linkonce@GOTPCREL+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
define void @linkonce_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @linkonce, i32 0, i32 1
%val = load i32, i32 addrspace(1)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}

; CHECK-LABEL: weak_test:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], weak@GOTPCREL+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
define void @weak_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @weak, i32 0, i32 1
%val = load i32, i32 addrspace(1)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}

; CHECK-LABEL: common_test:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], common@GOTPCREL+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
define void @common_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @common, i32 0, i32 1
%val = load i32, i32 addrspace(1)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}

; CHECK-LABEL: extern_weak_test:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], extern_weak@GOTPCREL+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
define void @extern_weak_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @extern_weak, i32 0, i32 1
%val = load i32, i32 addrspace(1)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}

; CHECK-LABEL: linkonce_odr_test:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], linkonce_odr@GOTPCREL+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
define void @linkonce_odr_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @linkonce_odr, i32 0, i32 1
%val = load i32, i32 addrspace(1)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}

; CHECK-LABEL: weak_odr_test:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], weak_odr@GOTPCREL+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
define void @weak_odr_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @weak_odr, i32 0, i32 1
%val = load i32, i32 addrspace(1)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}

; CHECK-LABEL: external_test:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], external@GOTPCREL+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
define void @external_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @external, i32 0, i32 1
%val = load i32, i32 addrspace(1)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}

; CHECK-LABEL: external_w_init_test:
; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], external_w_init@GOTPCREL+4
; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
define void @external_w_init_test(i32 addrspace(1)* %out) {
%ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @external_w_init, i32 0, i32 1
%val = load i32, i32 addrspace(1)* %ptr
store i32 %val, i32 addrspace(1)* %out
ret void
}

; CHECK: .local private
; CHECK: .local internal
; CHECK: .weak linkonce
; CHECK: .weak weak
; CHECK: .weak linkonce_odr
; CHECK: .weak weak_odr
; CHECK-NOT: external{{$}}
; CHECK: .globl external_w_init
18 changes: 0 additions & 18 deletions llvm/test/CodeGen/AMDGPU/global-zero-initializer.ll

This file was deleted.

0 comments on commit 418beb7

Please sign in to comment.