diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 83e5fcef7d7b00..d1701851fea0a9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -523,8 +523,10 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel( uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; - if (Arg.use_empty()) + if (Arg.use_empty()) { + ++i; continue; + } ArrayRef OrigArgRegs = VRegs[i]; Register ArgReg = diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll index 4c48f9cc49fec6..76a6f1732102bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=HSA-VI %s -; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { ; HSA-VI-LABEL: name: i8_arg @@ -1070,17 +1070,55 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi ret void } -define amdgpu_kernel void @empty_struct_arg({} %in) nounwind { +; 0-sized arguments do not add a slot to the argument register set, so +; waste an index in the virtual register array. +define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind { ; HSA-VI-LABEL: name: empty_struct_arg ; HSA-VI: bb.1 (%ir-block.0): ; HSA-VI: liveins: $sgpr4_sgpr5 ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) + ; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: empty_struct_arg ; LEGACY-MESA-VI: bb.1 (%ir-block.0): ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4) + ; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 + store i32 %arg1, i32 addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind { + ; HSA-VI-LABEL: name: empty_array_arg + ; HSA-VI: bb.1 (%ir-block.0): + ; HSA-VI: liveins: $sgpr4_sgpr5 + ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; HSA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; HSA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4) + ; HSA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; HSA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; HSA-VI: S_ENDPGM 0 + ; LEGACY-MESA-VI-LABEL: name: empty_array_arg + ; LEGACY-MESA-VI: bb.1 (%ir-block.0): + ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1 + ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1 + ; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 + ; LEGACY-MESA-VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4) + ; LEGACY-MESA-VI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; LEGACY-MESA-VI: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; LEGACY-MESA-VI: S_ENDPGM 0 + store i32 %arg1, i32 addrspace(1)* undef ret void } @@ -1171,13 +1209,15 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, ; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 13 ; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4) + ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4) + ; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0 + ; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32 ; HSA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1) ; HSA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) ; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) - ; HSA-VI: G_STORE %3:_(s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; HSA-VI: G_STORE %4:_(s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) + ; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) + ; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) ; HSA-VI: S_ENDPGM 0 ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment ; LEGACY-MESA-VI: bb.1 (%ir-block.1): @@ -1190,13 +1230,15 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, ; LEGACY-MESA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32 ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 49 ; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4) + ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4) + ; LEGACY-MESA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0 + ; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32 ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 ; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1) ; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) ; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI: G_STORE %3:_(s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; LEGACY-MESA-VI: G_STORE %4:_(s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1) + ; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1) ; LEGACY-MESA-VI: S_ENDPGM 0 %val0 = extractvalue <{i32, i64}> %arg0, 0 %val1 = extractvalue <{i32, i64}> %arg0, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index f7e6fd5cd47460..c5bea011252f21 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -2,6 +2,34 @@ ; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs -o - %s | FileCheck %s ; FIXME: pre-VI should have same ABI without legal i16 operations. +define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 { + ; CHECK-LABEL: name: void_func_empty_arg + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] + store i32 %arg1, i32 addrspace(1)* undef + ret void +} + +define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 { + ; CHECK-LABEL: name: void_func_empty_array + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] + store i32 %arg1, i32 addrspace(1)* undef + ret void +} + define void @void_func_i1(i1 %arg0) #0 { ; CHECK-LABEL: name: void_func_i1 ; CHECK: bb.1 (%ir-block.0):