diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 7ab062bcc4da7..3d4adb16a2716 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7133,6 +7133,17 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op, bool SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // OSes that use ELF REL relocations (instead of RELA) can only store a + // 32-bit addend in the instruction, so it is not safe to allow offset folding + // which can create arbitrary 64-bit addends. (This is only a problem for + // R_AMDGPU_*32_HI relocations since other relocation types are unaffected by + // the high 32 bits of the addend.) + // + // This should be kept in sync with how HasRelocationAddend is initialized in + // the constructor of ELFAMDGPUAsmBackend. + if (!Subtarget->isAmdHsaOS()) + return false; + // We can fold offsets for anything that doesn't require a GOT relocation. return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || diff --git a/llvm/test/CodeGen/AMDGPU/global-constant.ll b/llvm/test/CodeGen/AMDGPU/global-constant.ll index 336f012ec80e4..38b9c5df7faa1 100644 --- a/llvm/test/CodeGen/AMDGPU/global-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/global-constant.ll @@ -19,14 +19,14 @@ ; GCN-DEFAULT: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+12 ; MESA uses absolute relocations. -; GCN-MESA: s_add_u32 s2, s4, private1@abs32@lo -; GCN-MESA: s_addc_u32 s3, s5, private1@abs32@hi +; GCN-MESA: s_add_u32 s2, private1@abs32@lo, s4 +; GCN-MESA: s_addc_u32 s3, private1@abs32@hi, s5 ; PAL uses absolute relocations. -; GCN-PAL: s_add_u32 s2, s4, private1@abs32@lo -; GCN-PAL: s_addc_u32 s3, s5, private1@abs32@hi -; GCN-PAL: s_add_u32 s4, s4, private2@abs32@lo -; GCN-PAL: s_addc_u32 s5, s5, private2@abs32@hi +; GCN-PAL: s_add_u32 s2, private1@abs32@lo, s4 +; GCN-PAL: s_addc_u32 s3, private1@abs32@hi, s5 +; GCN-PAL: s_add_u32 s4, private2@abs32@lo, s4 +; GCN-PAL: s_addc_u32 s5, private2@abs32@hi, s5 ; R600-LABEL: private_test define amdgpu_kernel void @private_test(i32 %index, ptr addrspace(1) %out) { @@ -44,13 +44,13 @@ define amdgpu_kernel void @private_test(i32 %index, ptr addrspace(1) %out) { ; GCN-DEFAULT: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], available_externally@gotpcrel32@lo+4 ; GCN-DEFAULT: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], available_externally@gotpcrel32@hi+12 -; GCN-MESA: s_mov_b32 s1, available_externally@abs32@hi+4 -; GCN-MESA: s_mov_b32 s0, available_externally@abs32@lo+4 +; GCN-MESA: s_mov_b32 s1, available_externally@abs32@hi +; GCN-MESA: s_mov_b32 s0, available_externally@abs32@lo ; R600-LABEL: available_externally_test -; GCN-PAL: s_mov_b32 s3, available_externally@abs32@hi+4 -; GCN-PAL: s_mov_b32 s2, available_externally@abs32@lo+4 +; GCN-PAL: s_mov_b32 s3, available_externally@abs32@hi +; GCN-PAL: s_mov_b32 s2, available_externally@abs32@lo define amdgpu_kernel void @available_externally_test(ptr addrspace(1) %out) { %ptr = getelementptr [256 x i32], ptr addrspace(4) @available_externally, i32 0, i32 1 %val = load i32, ptr addrspace(4) %ptr diff --git a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll index c7cefb37281ed..41e8762311306 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll @@ -315,10 +315,9 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(ptr ad ; FUNC-LABEL: {{^}}test_memcpy_const_string_align4: ; SI: s_getpc_b64 -; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, hello.align4@rel32@lo+20 +; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, hello.align4@rel32@lo+4 ; SI: s_addc_u32 -; SI-DAG: s_load_dwordx4 -; SI-DAG: s_load_dwordx4 +; SI-DAG: s_load_dwordx8 ; SI-DAG: s_load_dwordx2 ; SI-DAG: buffer_store_dwordx4 ; SI-DAG: buffer_store_dwordx4 diff --git a/llvm/test/CodeGen/AMDGPU/rel32.ll b/llvm/test/CodeGen/AMDGPU/rel32.ll index aa2d269523d80..59d64f3ad2b5e 100644 --- a/llvm/test/CodeGen/AMDGPU/rel32.ll +++ b/llvm/test/CodeGen/AMDGPU/rel32.ll @@ -6,9 +6,22 @@ ; CHECK-LABEL: rel32_neg_offset: ; CHECK: s_getpc_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]] -; CHECK-NEXT: s_add_u32 s[[LO]], s[[LO]], g@rel32@lo-4 -; CHECK-NEXT: s_addc_u32 s[[HI]], s[[HI]], g@rel32@hi+4 +; CHECK-NEXT: s_add_u32 s[[LO]], s[[LO]], g@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s[[HI]], s[[HI]], g@rel32@hi+12 +; CHECK-NEXT: s_add_u32 s[[LO]], s[[LO]], -16 +; CHECK-NEXT: s_addc_u32 s[[HI]], s[[HI]], -1 define ptr addrspace(4) @rel32_neg_offset() { - %r = getelementptr i32, ptr addrspace(4) @g, i64 -2 + %r = getelementptr i32, ptr addrspace(4) @g, i64 -4 + ret ptr addrspace(4) %r +} + +; CHECK-LABEL: rel32_large_offset: +; CHECK: s_getpc_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]] +; CHECK-NEXT: s_add_u32 s[[LO]], s[[LO]], g@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s[[HI]], s[[HI]], g@rel32@hi+12 +; CHECK-NEXT: s_add_u32 s[[LO]], s[[LO]], 0x502f9000 +; CHECK-NEXT: s_addc_u32 s[[HI]], s[[HI]], 9 +define ptr addrspace(4) @rel32_large_offset() { + %r = getelementptr i32, ptr addrspace(4) @g, i64 10000000000 ret ptr addrspace(4) %r }