Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Make default AMDHSA Code Object Version to be 5 #65410

Merged
merged 1 commit into from Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/include/clang/Driver/Options.td
Expand Up @@ -4616,12 +4616,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
NegFlag<SetFalse, [], [ClangOption, CC1Option]>>, Group<m_Group>;

def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
Visibility<[ClangOption, CC1Option]>,
Values<"none,2,3,4,5">,
NormalizedValuesScope<"TargetOptions">,
NormalizedValues<["COV_None", "COV_2", "COV_3", "COV_4", "COV_5"]>,
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_5">;

defm cumode : SimpleMFlag<"cumode",
"Specify CU wavefront", "Specify WGP wavefront",
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Driver/ToolChains/CommonArgs.cpp
Expand Up @@ -2341,7 +2341,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D,

unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
unsigned CodeObjVer = 4; // default
unsigned CodeObjVer = 5; // default
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args))
StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
return CodeObjVer;
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -1,7 +1,7 @@
// Create module flag for code object version.

// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -o - %s | FileCheck %s -check-prefix=V4
// RUN: -o - %s | FileCheck %s -check-prefix=V5

// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=2 -o - %s | FileCheck -check-prefix=V2 %s
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -1,10 +1,10 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=PRECOV5 %s


// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COV5 %s

// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenHIP/default-attributes.hip
Expand Up @@ -46,11 +46,11 @@ __global__ void kernel() {
// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
//.
// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
// OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
//.
// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
// OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPT: !2 = !{i32 1, !"wchar_size", i32 4}
//.
4 changes: 2 additions & 2 deletions clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
Expand Up @@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) {
// GFX900: attributes #8 = { nounwind }
// GFX900: attributes #9 = { convergent nounwind }
//.
// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
// NOCPU: !1 = !{i32 1, !"wchar_size", i32 4}
// NOCPU: !2 = !{i32 2, i32 0}
// NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0}
Expand All @@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) {
// NOCPU: !15 = !{i32 1}
// NOCPU: !16 = !{!"int*"}
//.
// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
// GFX900: !1 = !{i32 1, !"wchar_size", i32 4}
// GFX900: !2 = !{i32 2, i32 0}
// GFX900: !3 = !{!4, !4, i64 0}
Expand Down
10 changes: 5 additions & 5 deletions clang/test/CodeGenOpenCL/builtins-amdgcn.cl
Expand Up @@ -599,13 +599,13 @@ void test_get_local_id(int d, global int *out)
}

// CHECK-LABEL: @test_get_workgroup_size(
// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4
// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
void test_get_workgroup_size(int d, global int *out)
{
switch (d) {
Expand Down
16 changes: 8 additions & 8 deletions clang/test/Driver/hip-device-libs.hip
Expand Up @@ -160,13 +160,13 @@
// Test default code object version.
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4
// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5

// Test default code object version with old device library without abi_version_400.bc
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
// Test default code object version with old device library without abi_version_500.bc
// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5

// Test -mcode-object-version=3
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
Expand All @@ -193,12 +193,12 @@
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5

// Test -mcode-object-version=5 with old device library without abi_version_400.bc
// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
// RUN: -mcode-object-version=5 \
// Test -mcode-object-version=4 with old device library without abi_version_400.bc
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
// RUN: -mcode-object-version=4 \
// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4

// ALL-NOT: error:
// ALL: {{"[^"]*clang[^"]*"}}
Expand Down
2 changes: 1 addition & 1 deletion lld/test/ELF/emulation-amdgpu.s
Expand Up @@ -13,7 +13,7 @@
# CHECK-NEXT: DataEncoding: LittleEndian (0x1)
# CHECK-NEXT: FileVersion: 1
# CHECK-NEXT: OS/ABI: AMDGPU_HSA (0x40)
# CHECK-NEXT: ABIVersion: 2
# CHECK-NEXT: ABIVersion: 3
# CHECK-NEXT: Unused: (00 00 00 00 00 00 00)
# CHECK-NEXT: }
# CHECK-NEXT: Type: Executable (0x2)
Expand Down
2 changes: 1 addition & 1 deletion lld/test/ELF/lto/amdgcn-oses.ll
Expand Up @@ -15,7 +15,7 @@
; RUN: llvm-readobj --file-headers %t/mesa3d.so | FileCheck %s --check-prefixes=GCN,NON-AMDHSA,MESA3D

; AMDHSA: OS/ABI: AMDGPU_HSA (0x40)
; AMDHSA: ABIVersion: 2
; AMDHSA: ABIVersion: 3

; AMDPAL: OS/ABI: AMDGPU_PAL (0x41)
; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)
Expand Down
15 changes: 7 additions & 8 deletions llvm/docs/AMDGPUUsage.rst
Expand Up @@ -1418,12 +1418,12 @@ The AMDGPU backend uses the following ELF header:

* ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
runtime ABI for code object V4. Specify using the Clang option
``-mcode-object-version=4``. This is the default code object
version if not specified.
``-mcode-object-version=4``.

* ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA
runtime ABI for code object V5. Specify using the Clang option
``-mcode-object-version=5``.
``-mcode-object-version=5``. This is the default code object
version if not specified.

* ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
runtime ABI.
Expand Down Expand Up @@ -3852,6 +3852,10 @@ same *vendor-name*.
Code Object V4 Metadata
+++++++++++++++++++++++

. warning::
Code object V4 is not the default code object version emitted by this version
of LLVM.

Code object V4 metadata is the same as
:ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions
defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
Expand Down Expand Up @@ -3882,11 +3886,6 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
Code Object V5 Metadata
+++++++++++++++++++++++

.. warning::
Code object V5 is not the default code object version emitted by this version
of LLVM.


Code object V5 metadata is the same as
:ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table
:ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Expand Up @@ -34,7 +34,7 @@
static llvm::cl::opt<unsigned>
AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden,
llvm::cl::desc("AMDHSA Code Object Version"),
llvm::cl::init(4));
llvm::cl::init(5));

namespace {

Expand Down Expand Up @@ -177,7 +177,7 @@ unsigned getCodeObjectVersion(const Module &M) {
}

// Default code object version.
return AMDHSA_COV4;
return AMDHSA_COV5;
}

unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
Expand Down
Expand Up @@ -7,7 +7,7 @@
define amdgpu_kernel void @stack_write_fi() {
; CHECK-LABEL: stack_write_fi:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_add_u32 s0, s0, s15
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: s_mov_b32 s5, 0
; CHECK-NEXT: s_mov_b32 s4, 0
Expand Down
57 changes: 28 additions & 29 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
Expand Up @@ -7,43 +7,42 @@ declare void @callee()
define amdgpu_kernel void @call_debug_loc() {
; CHECK-LABEL: name: call_debug_loc
; CHECK: bb.1.entry:
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !6
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !6
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !6
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !6
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !6
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !6
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !6
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !6
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !6
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !6
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13, debug-location !6
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12, debug-location !6
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9, debug-location !6
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !6
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY $sgpr6_sgpr7
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !6
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !6
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !6
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !6
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !6
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !6
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !6
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !6
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF debug-location !6
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !6
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !6
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !6
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !6
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !6
; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !6
; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !6
; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !6
; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY14]], [[COPY1]], implicit $exec, debug-location !6
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !6
; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !6
; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !6
; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !6
; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY15]], [[COPY]], implicit $exec, debug-location !6
; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !6
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !6
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !6
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !6
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !6
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !6
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !6
; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !6
; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !6
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !6
; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !6
; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !6
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]], debug-location !6
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]], debug-location !6
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]], debug-location !6
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]], debug-location !6
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]], debug-location !6
; CHECK-NEXT: $sgpr12 = COPY [[COPY11]], debug-location !6
; CHECK-NEXT: $sgpr13 = COPY [[COPY12]], debug-location !6
; CHECK-NEXT: $sgpr14 = COPY [[COPY13]], debug-location !6
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]], debug-location !6
; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !6
; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def $scc, debug-location !6
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !6 :: (dereferenceable invariant load (p0) from got, addrspace 4)
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
Expand Up @@ -32,13 +32,13 @@ define void @call_result_align_1() {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
Expand Down Expand Up @@ -81,13 +81,13 @@ define void @call_result_align_8() {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
Expand Down Expand Up @@ -131,13 +131,13 @@ define void @declaration_result_align_8() {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr_align8
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
Expand Down Expand Up @@ -181,11 +181,11 @@ define ptr addrspace(1) @tail_call_assert_align() {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr6_sgpr7
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @returns_ptr_align8
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]](p4)
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
Expand Down