diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 443bb45a23514..9036b26a6f6bc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -187,6 +187,11 @@ def CC_AMDGPU_Func : CallingConv<[ CCIfByVal>, CCIfType<[i1], CCPromoteToType>, CCIfType<[i8, i16], CCIfExtend>>, + + CCIfInReg("SGPR"#i)) // SGPR0-29 + >>>, + CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index cff7e4bc66218..4681004d3ba74 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2665,6 +2665,11 @@ SDValue SITargetLowering::LowerFormalArguments( if (!IsKernel) { CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, isVarArg); + if (!IsGraphics && !Subtarget->enableFlatScratch()) { + CCInfo.AllocateRegBlock(ArrayRef{AMDGPU::SGPR0, AMDGPU::SGPR1, + AMDGPU::SGPR2, AMDGPU::SGPR3}, + 4); + } CCInfo.AnalyzeFormalArguments(Splits, AssignFn); } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 7b8a37532c9fa..d0c84f7bf2574 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2529,8 +2529,8 @@ bool isArgPassedInSGPR(const Argument *A) { return A->hasAttribute(Attribute::InReg) || A->hasAttribute(Attribute::ByVal); default: - // TODO: Should calls support inreg for SGPR inputs? - return false; + // TODO: treat i1 as divergent? + return A->hasAttribute(Attribute::InReg); } } @@ -2556,8 +2556,7 @@ bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) { return CB->paramHasAttr(ArgNo, Attribute::InReg) || CB->paramHasAttr(ArgNo, Attribute::ByVal); default: - // TODO: Should calls support inreg for SGPR inputs? - return false; + return CB->paramHasAttr(ArgNo, Attribute::InReg); } } diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll index 48528c6112b00..d7d8d29fbfd42 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll @@ -39,6 +39,15 @@ define i32 @asm_sgpr(i32 %divergent) { ret i32 %sgpr } +; SGPR asm outputs are uniform regardless of the input operands. +; Argument not divergent if marked inreg. +; CHECK-LABEL: for function 'asm_sgpr_inreg_arg': +; CHECK-NOT: DIVERGENT +define i32 @asm_sgpr_inreg_arg(i32 inreg %divergent) { + %sgpr = call i32 asm "; def $0, $1","=s,v"(i32 %divergent) + ret i32 %sgpr +} + ; CHECK-LABEL: for function 'asm_mixed_sgpr_vgpr': ; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1, $2", "=s,=v,v"(i32 %divergent) ; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0 @@ -58,6 +67,18 @@ define void @single_lane_func_arguments(i32 %i32, i1 %i1) #2 { ret void } +; CHECK-LABEL: for function 'divergent_args': +; CHECK: DIVERGENT ARGUMENTS +define void @divergent_args(i32 %i32, i1 %i1) { + ret void +} + +; CHECK-LABEL: for function 'no_divergent_args_if_inreg': +; CHECK-NOT: DIVERGENT +define void @no_divergent_args_if_inreg(i32 inreg %i32, i1 inreg %i1) { + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.readfirstlane(i32) #0 declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #1 diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/kernel-args.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/kernel-args.ll index 4b014f969257b..f6fe654896a2d 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/kernel-args.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/kernel-args.ll @@ -30,8 +30,6 @@ define amdgpu_kernel void @test_amdgpu_kernel(ptr addrspace(4) byref([4 x <16 x ; CHECK: DIVERGENT: ; CHECK: DIVERGENT: ; CHECK: DIVERGENT: -; CHECK: DIVERGENT: -; CHECK: DIVERGENT: define void @test_c(ptr addrspace(5) byval([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { ret void } diff --git a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll new file mode 100644 index 0000000000000..84287b5e4458d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll @@ -0,0 +1,1808 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s + +define void @void_func_i1_inreg(i1 inreg %arg0) #0 { +; GFX9-LABEL: void_func_i1_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_and_b32 s4, s4, 1 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_i1_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_and_b32 s0, s0, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store i1 %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_i8_inreg(i8 inreg %arg0) #0 { +; GFX9-LABEL: void_func_i8_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_i8_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b8 v[0:1], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store i8 %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_i16_inreg(i16 inreg %arg0) #0 { +; GFX9-LABEL: void_func_i16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: global_store_short v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_i16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b16 v[0:1], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store i16 %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_i32_inreg(i32 inreg %arg0) #0 { +; GFX9-LABEL: void_func_i32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_i32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store i32 %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_i64_inreg(i64 inreg %arg0) #0 { +; GFX9-LABEL: void_func_i64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store i64 %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_f16_inreg(half inreg %arg0) #0 { +; GFX9-LABEL: void_func_f16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: global_store_short v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_f16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b16 v[0:1], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store half %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_f32_inreg(float inreg %arg0) #0 { +; GFX9-LABEL: void_func_f32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_f32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store float %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_f64_inreg(double inreg %arg0) #0 { +; GFX9-LABEL: void_func_f64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_f64_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store double %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v2i16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v2i16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <2 x i16> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v3i16_inreg(<3 x i16> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v3i16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s5 +; GFX9-NEXT: global_store_short v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v3i16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b16 v[0:1], v0, off +; GFX11-NEXT: global_store_b32 v[0:1], v1, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <3 x i16> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v4i16_inreg(<4 x i16> inreg %arg0) #0 { +; GFX89-LABEL: void_func_v4i16_inreg: +; GFX89: ; %bb.0: +; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX89-NEXT: v_mov_b32_e32 v0, s4 +; GFX89-NEXT: v_mov_b32_e32 v1, s5 +; GFX89-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GFX89-NEXT: s_waitcnt vmcnt(0) +; GFX89-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v4i16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <4 x i16> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v5i16_inreg(<5 x i16> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v5i16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: global_store_short v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v5i16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b16 v[0:1], v2, off +; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <5 x i16> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v8i16_inreg(<8 x i16> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v8i16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v8i16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <8 x i16> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v2i32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v2i32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <2 x i32> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v3i32_inreg(<3 x i32> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v3i32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: global_store_dwordx3 v[0:1], v[0:2], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v3i32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_mov_b32_e32 v2, s2 +; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <3 x i32> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v4i32_inreg(<4 x i32> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v4i32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v4i32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <4 x i32> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v5i32_inreg(<5 x i32> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v5i32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v5i32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_mov_b32_e32 v2, s2 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b32 v[0:1], v4, off +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <5 x i32> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v8i32_inreg(<8 x i32> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v8i32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v8i32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 +; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 +; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <8 x i32> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v16i32_inreg(<16 x i32> inreg %arg0) #0 { +; CIGFX89-LABEL: void_func_v16i32_inreg: +; CIGFX89: ; %bb.0: +; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CIGFX89-NEXT: v_mov_b32_e32 v0, s16 +; CIGFX89-NEXT: v_mov_b32_e32 v1, s17 +; CIGFX89-NEXT: v_mov_b32_e32 v2, s18 +; CIGFX89-NEXT: v_mov_b32_e32 v3, s19 +; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; CIGFX89-NEXT: s_nop 0 +; CIGFX89-NEXT: v_mov_b32_e32 v0, s12 +; CIGFX89-NEXT: v_mov_b32_e32 v1, s13 +; CIGFX89-NEXT: v_mov_b32_e32 v2, s14 +; CIGFX89-NEXT: v_mov_b32_e32 v3, s15 +; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; CIGFX89-NEXT: s_nop 0 +; CIGFX89-NEXT: v_mov_b32_e32 v0, s8 +; CIGFX89-NEXT: v_mov_b32_e32 v1, s9 +; CIGFX89-NEXT: v_mov_b32_e32 v2, s10 +; CIGFX89-NEXT: v_mov_b32_e32 v3, s11 +; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; CIGFX89-NEXT: s_nop 0 +; CIGFX89-NEXT: v_mov_b32_e32 v0, s4 +; CIGFX89-NEXT: v_mov_b32_e32 v1, s5 +; CIGFX89-NEXT: v_mov_b32_e32 v2, s6 +; CIGFX89-NEXT: v_mov_b32_e32 v3, s7 +; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; CIGFX89-NEXT: s_waitcnt vmcnt(0) +; CIGFX89-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v16i32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13 +; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15 +; GFX11-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9 +; GFX11-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11 +; GFX11-NEXT: v_dual_mov_b32 v8, s4 :: v_dual_mov_b32 v9, s5 +; GFX11-NEXT: v_dual_mov_b32 v10, s6 :: v_dual_mov_b32 v11, s7 +; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1 +; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3 +; GFX11-NEXT: s_clause 0x3 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off +; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <16 x i32> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v32i32_inreg(<32 x i32> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v32i32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v7, v1 +; GFX9-NEXT: v_mov_b32_e32 v6, v0 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v4, s28 +; GFX9-NEXT: v_mov_b32_e32 v5, s29 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[4:7], off +; GFX9-NEXT: v_mov_b32_e32 v0, s24 +; GFX9-NEXT: v_mov_b32_e32 v1, s25 +; GFX9-NEXT: v_mov_b32_e32 v2, s26 +; GFX9-NEXT: v_mov_b32_e32 v3, s27 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s20 +; GFX9-NEXT: v_mov_b32_e32 v1, s21 +; GFX9-NEXT: v_mov_b32_e32 v2, s22 +; GFX9-NEXT: v_mov_b32_e32 v3, s23 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: v_mov_b32_e32 v1, s17 +; GFX9-NEXT: v_mov_b32_e32 v2, s18 +; GFX9-NEXT: v_mov_b32_e32 v3, s19 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s12 +; GFX9-NEXT: v_mov_b32_e32 v1, s13 +; GFX9-NEXT: v_mov_b32_e32 v2, s14 +; GFX9-NEXT: v_mov_b32_e32 v3, s15 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v32i32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 +; GFX11-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v1, s29 +; GFX11-NEXT: v_dual_mov_b32 v4, s24 :: v_dual_mov_b32 v5, s25 +; GFX11-NEXT: v_dual_mov_b32 v6, s26 :: v_dual_mov_b32 v7, s27 +; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off +; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 +; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 +; GFX11-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13 +; GFX11-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15 +; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v13, s5 +; GFX11-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v15, s7 +; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1 +; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3 +; GFX11-NEXT: s_clause 0x4 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off +; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off +; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <32 x i32> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v2i64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v2i64_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <2 x i64> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v3i64_inreg(<3 x i64> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v3i64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v3i64_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <3 x i64> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v4i64_inreg(<4 x i64> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v4i64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v4i64_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 +; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 +; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <4 x i64> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v5i64_inreg(<5 x i64> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v5i64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s12 +; GFX9-NEXT: v_mov_b32_e32 v1, s13 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v5i64_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 +; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 +; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 +; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: global_store_b64 v[0:1], v[8:9], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <5 x i64> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v8i64_inreg(<8 x i64> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v8i64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: v_mov_b32_e32 v1, s17 +; GFX9-NEXT: v_mov_b32_e32 v2, s18 +; GFX9-NEXT: v_mov_b32_e32 v3, s19 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s12 +; GFX9-NEXT: v_mov_b32_e32 v1, s13 +; GFX9-NEXT: v_mov_b32_e32 v2, s14 +; GFX9-NEXT: v_mov_b32_e32 v3, s15 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v8i64_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13 +; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15 +; GFX11-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9 +; GFX11-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11 +; GFX11-NEXT: v_dual_mov_b32 v8, s4 :: v_dual_mov_b32 v9, s5 +; GFX11-NEXT: v_dual_mov_b32 v10, s6 :: v_dual_mov_b32 v11, s7 +; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1 +; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3 +; GFX11-NEXT: s_clause 0x3 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off +; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <8 x i64> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v16i64_inreg(<16 x i64> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v16i64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v7, v1 +; GFX9-NEXT: v_mov_b32_e32 v6, v0 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v4, s28 +; GFX9-NEXT: v_mov_b32_e32 v5, s29 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[4:7], off +; GFX9-NEXT: v_mov_b32_e32 v0, s24 +; GFX9-NEXT: v_mov_b32_e32 v1, s25 +; GFX9-NEXT: v_mov_b32_e32 v2, s26 +; GFX9-NEXT: v_mov_b32_e32 v3, s27 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s20 +; GFX9-NEXT: v_mov_b32_e32 v1, s21 +; GFX9-NEXT: v_mov_b32_e32 v2, s22 +; GFX9-NEXT: v_mov_b32_e32 v3, s23 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: v_mov_b32_e32 v1, s17 +; GFX9-NEXT: v_mov_b32_e32 v2, s18 +; GFX9-NEXT: v_mov_b32_e32 v3, s19 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s12 +; GFX9-NEXT: v_mov_b32_e32 v1, s13 +; GFX9-NEXT: v_mov_b32_e32 v2, s14 +; GFX9-NEXT: v_mov_b32_e32 v3, s15 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v16i64_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 +; GFX11-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v1, s29 +; GFX11-NEXT: v_dual_mov_b32 v4, s24 :: v_dual_mov_b32 v5, s25 +; GFX11-NEXT: v_dual_mov_b32 v6, s26 :: v_dual_mov_b32 v7, s27 +; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off +; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 +; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 +; GFX11-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13 +; GFX11-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15 +; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v13, s5 +; GFX11-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v15, s7 +; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1 +; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3 +; GFX11-NEXT: s_clause 0x4 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off +; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off +; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <16 x i64> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v2f16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v2f16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b32 v[0:1], v0, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <2 x half> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v3f16_inreg(<3 x half> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v3f16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s5 +; GFX9-NEXT: global_store_short v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v3f16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b16 v[0:1], v0, off +; GFX11-NEXT: global_store_b32 v[0:1], v1, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <3 x half> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v4f16_inreg(<4 x half> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v4f16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v4f16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <4 x half> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v8f16_inreg(<8 x half> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v8f16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v8f16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <8 x half> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v16f16_inreg(<16 x half> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v16f16_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v16f16_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 +; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 +; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <16 x half> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v2f32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v2f32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <2 x float> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v3f32_inreg(<3 x float> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v3f32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: global_store_dwordx3 v[0:1], v[0:2], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v3f32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_mov_b32_e32 v2, s2 +; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <3 x float> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v4f32_inreg(<4 x float> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v4f32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v4f32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <4 x float> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v8f32_inreg(<8 x float> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v8f32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v8f32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 +; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 +; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <8 x float> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v16f32_inreg(<16 x float> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v16f32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: v_mov_b32_e32 v1, s17 +; GFX9-NEXT: v_mov_b32_e32 v2, s18 +; GFX9-NEXT: v_mov_b32_e32 v3, s19 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s12 +; GFX9-NEXT: v_mov_b32_e32 v1, s13 +; GFX9-NEXT: v_mov_b32_e32 v2, s14 +; GFX9-NEXT: v_mov_b32_e32 v3, s15 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v16f32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13 +; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15 +; GFX11-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9 +; GFX11-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11 +; GFX11-NEXT: v_dual_mov_b32 v8, s4 :: v_dual_mov_b32 v9, s5 +; GFX11-NEXT: v_dual_mov_b32 v10, s6 :: v_dual_mov_b32 v11, s7 +; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1 +; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3 +; GFX11-NEXT: s_clause 0x3 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off +; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <16 x float> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v2f64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v2f64_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <2 x double> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v3f64_inreg(<3 x double> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v3f64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v3f64_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <3 x double> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v4f64_inreg(<4 x double> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v4f64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v4f64_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 +; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 +; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <4 x double> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v8f64_inreg(<8 x double> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v8f64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: v_mov_b32_e32 v1, s17 +; GFX9-NEXT: v_mov_b32_e32 v2, s18 +; GFX9-NEXT: v_mov_b32_e32 v3, s19 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s12 +; GFX9-NEXT: v_mov_b32_e32 v1, s13 +; GFX9-NEXT: v_mov_b32_e32 v2, s14 +; GFX9-NEXT: v_mov_b32_e32 v3, s15 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v8f64_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13 +; GFX11-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15 +; GFX11-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9 +; GFX11-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11 +; GFX11-NEXT: v_dual_mov_b32 v8, s4 :: v_dual_mov_b32 v9, s5 +; GFX11-NEXT: v_dual_mov_b32 v10, s6 :: v_dual_mov_b32 v11, s7 +; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1 +; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3 +; GFX11-NEXT: s_clause 0x3 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off +; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <8 x double> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v16f64_inreg(<16 x double> inreg %arg0) #0 { +; GFX9-LABEL: void_func_v16f64_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v7, v1 +; GFX9-NEXT: v_mov_b32_e32 v6, v0 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v4, s28 +; GFX9-NEXT: v_mov_b32_e32 v5, s29 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[4:7], off +; GFX9-NEXT: v_mov_b32_e32 v0, s24 +; GFX9-NEXT: v_mov_b32_e32 v1, s25 +; GFX9-NEXT: v_mov_b32_e32 v2, s26 +; GFX9-NEXT: v_mov_b32_e32 v3, s27 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s20 +; GFX9-NEXT: v_mov_b32_e32 v1, s21 +; GFX9-NEXT: v_mov_b32_e32 v2, s22 +; GFX9-NEXT: v_mov_b32_e32 v3, s23 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: v_mov_b32_e32 v1, s17 +; GFX9-NEXT: v_mov_b32_e32 v2, s18 +; GFX9-NEXT: v_mov_b32_e32 v3, s19 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s12 +; GFX9-NEXT: v_mov_b32_e32 v1, s13 +; GFX9-NEXT: v_mov_b32_e32 v2, s14 +; GFX9-NEXT: v_mov_b32_e32 v3, s15 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v16f64_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 +; GFX11-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v1, s29 +; GFX11-NEXT: v_dual_mov_b32 v4, s24 :: v_dual_mov_b32 v5, s25 +; GFX11-NEXT: v_dual_mov_b32 v6, s26 :: v_dual_mov_b32 v7, s27 +; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off +; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 +; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 +; GFX11-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13 +; GFX11-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15 +; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v13, s5 +; GFX11-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v15, s7 +; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1 +; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3 +; GFX11-NEXT: s_clause 0x4 +; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off +; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off +; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off +; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off +; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store <16 x double> %arg0, ptr addrspace(1) undef + ret void +} + +define void @void_func_v32i32_i1_i8_i16_f32_inreg(<32 x i32> inreg %arg0, i1 inreg %arg1, i8 inreg %arg2, i16 inreg %arg3, half inreg %arg4) #0 { +; GFX9-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v13, v1 +; GFX9-NEXT: v_mov_b32_e32 v12, v0 +; GFX9-NEXT: v_mov_b32_e32 v10, s28 +; GFX9-NEXT: v_mov_b32_e32 v11, s29 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s24 +; GFX9-NEXT: v_mov_b32_e32 v1, s25 +; GFX9-NEXT: v_mov_b32_e32 v2, s26 +; GFX9-NEXT: v_mov_b32_e32 v3, s27 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s20 +; GFX9-NEXT: v_mov_b32_e32 v1, s21 +; GFX9-NEXT: v_mov_b32_e32 v2, s22 +; GFX9-NEXT: v_mov_b32_e32 v3, s23 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: v_mov_b32_e32 v1, s17 +; GFX9-NEXT: v_mov_b32_e32 v2, s18 +; GFX9-NEXT: v_mov_b32_e32 v3, s19 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s12 +; GFX9-NEXT: v_mov_b32_e32 v1, s13 +; GFX9-NEXT: v_mov_b32_e32 v2, s14 +; GFX9-NEXT: v_mov_b32_e32 v3, s15 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, 1, v6 +; GFX9-NEXT: global_store_byte v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[0:1], v7, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_short v[0:1], v8, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_short v[0:1], v9, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v8, v0 +; GFX11-NEXT: v_dual_mov_b32 v6, s28 :: v_dual_mov_b32 v7, s29 +; GFX11-NEXT: v_dual_mov_b32 v10, s24 :: v_dual_mov_b32 v11, s25 +; GFX11-NEXT: v_dual_mov_b32 v12, s26 :: v_dual_mov_b32 v13, s27 +; GFX11-NEXT: v_dual_mov_b32 v14, s20 :: v_dual_mov_b32 v15, s21 +; GFX11-NEXT: v_dual_mov_b32 v16, s22 :: v_dual_mov_b32 v17, s23 +; GFX11-NEXT: v_dual_mov_b32 v18, s16 :: v_dual_mov_b32 v19, s17 +; GFX11-NEXT: v_dual_mov_b32 v20, s18 :: v_dual_mov_b32 v21, s19 +; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_dual_mov_b32 v6, s12 :: v_dual_mov_b32 v7, s13 +; GFX11-NEXT: v_dual_mov_b32 v8, s14 :: v_dual_mov_b32 v9, s15 +; GFX11-NEXT: v_and_b32_e32 v0, 1, v2 +; GFX11-NEXT: v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v11, s9 +; GFX11-NEXT: v_dual_mov_b32 v12, s10 :: v_dual_mov_b32 v13, s11 +; GFX11-NEXT: v_dual_mov_b32 v14, s4 :: v_dual_mov_b32 v15, s5 +; GFX11-NEXT: v_dual_mov_b32 v16, s6 :: v_dual_mov_b32 v17, s7 +; GFX11-NEXT: v_dual_mov_b32 v18, s0 :: v_dual_mov_b32 v19, s1 +; GFX11-NEXT: v_dual_mov_b32 v20, s2 :: v_dual_mov_b32 v21, s3 +; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b8 v[0:1], v0, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b8 v[0:1], v3, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b16 v[0:1], v4, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b16 v[0:1], v5, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile i1 %arg1, ptr addrspace(1) undef + store volatile i8 %arg2, ptr addrspace(1) undef + store volatile i16 %arg3, ptr addrspace(1) undef + store volatile half %arg4, ptr addrspace(1) undef + ret void +} + +define void @void_func_v32i32_v2i32_v2f32_inreg(<32 x i32> inreg %arg0, <2 x i32> inreg %arg1, <2 x float> inreg %arg2) #0 { +; GFX9-LABEL: void_func_v32i32_v2i32_v2f32_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v13, v1 +; GFX9-NEXT: v_mov_b32_e32 v12, v0 +; GFX9-NEXT: v_mov_b32_e32 v10, s28 +; GFX9-NEXT: v_mov_b32_e32 v11, s29 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s24 +; GFX9-NEXT: v_mov_b32_e32 v1, s25 +; GFX9-NEXT: v_mov_b32_e32 v2, s26 +; GFX9-NEXT: v_mov_b32_e32 v3, s27 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s20 +; GFX9-NEXT: v_mov_b32_e32 v1, s21 +; GFX9-NEXT: v_mov_b32_e32 v2, s22 +; GFX9-NEXT: v_mov_b32_e32 v3, s23 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: v_mov_b32_e32 v1, s17 +; GFX9-NEXT: v_mov_b32_e32 v2, s18 +; GFX9-NEXT: v_mov_b32_e32 v3, s19 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s12 +; GFX9-NEXT: v_mov_b32_e32 v1, s13 +; GFX9-NEXT: v_mov_b32_e32 v2, s14 +; GFX9-NEXT: v_mov_b32_e32 v3, s15 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[6:7], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[8:9], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_v32i32_v2i32_v2f32_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v8, v0 +; GFX11-NEXT: v_dual_mov_b32 v6, s28 :: v_dual_mov_b32 v7, s29 +; GFX11-NEXT: v_dual_mov_b32 v10, s24 :: v_dual_mov_b32 v11, s25 +; GFX11-NEXT: v_dual_mov_b32 v12, s26 :: v_dual_mov_b32 v13, s27 +; GFX11-NEXT: v_dual_mov_b32 v14, s20 :: v_dual_mov_b32 v15, s21 +; GFX11-NEXT: v_dual_mov_b32 v16, s22 :: v_dual_mov_b32 v17, s23 +; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v7, s17 +; GFX11-NEXT: v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v9, s19 +; GFX11-NEXT: v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v11, s13 +; GFX11-NEXT: v_dual_mov_b32 v12, s14 :: v_dual_mov_b32 v13, s15 +; GFX11-NEXT: v_dual_mov_b32 v14, s8 :: v_dual_mov_b32 v15, s9 +; GFX11-NEXT: v_dual_mov_b32 v16, s10 :: v_dual_mov_b32 v17, s11 +; GFX11-NEXT: v_dual_mov_b32 v18, s4 :: v_dual_mov_b32 v19, s5 +; GFX11-NEXT: v_dual_mov_b32 v20, s6 :: v_dual_mov_b32 v21, s7 +; GFX11-NEXT: v_dual_mov_b32 v22, s0 :: v_dual_mov_b32 v23, s1 +; GFX11-NEXT: v_dual_mov_b32 v24, s2 :: v_dual_mov_b32 v25, s3 +; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b128 v[0:1], v[22:25], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + store volatile <32 x i32> %arg0, ptr addrspace(1) undef + store volatile <2 x i32> %arg1, ptr addrspace(1) undef + store volatile <2 x float> %arg2, ptr addrspace(1) undef + ret void +} + +define void @too_many_args_use_workitem_id_x_inreg( + i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 inreg %arg7, + i32 inreg %arg8, i32 inreg %arg9, i32 inreg %arg10, i32 inreg %arg11, i32 inreg %arg12, i32 inreg %arg13, i32 inreg %arg14, i32 inreg %arg15, + i32 inreg %arg16, i32 inreg %arg17, i32 inreg %arg18, i32 inreg %arg19, i32 inreg %arg20, i32 inreg %arg21, i32 inreg %arg22, i32 inreg %arg23, + i32 inreg %arg24, i32 inreg %arg25, i32 inreg %arg26, i32 inreg %arg27, i32 inreg %arg28, i32 inreg %arg29, i32 inreg %arg30, i32 inreg %arg31) { +; GFX9-LABEL: {{^}}too_many_args_use_workitem_id_x_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s4 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s5 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s6 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s7 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s8 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s9 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s10 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s11 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s12 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s13 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s14 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s15 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s16 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s17 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s18 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s19 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s20 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s21 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s22 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s23 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s24 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s25 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s26 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s27 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s28 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v6, s29 +; GFX9-NEXT: global_store_dword v[0:1], v6, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[0:1], v1, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[0:1], v2, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[0:1], v3, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[0:1], v4, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[0:1], v5, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: {{^}}too_many_args_use_workitem_id_x_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX11-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX11-NEXT: v_mov_b32_e32 v6, s4 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_dual_mov_b32 v3, s6 :: v_dual_mov_b32 v2, s5 +; GFX11-NEXT: v_dual_mov_b32 v5, s8 :: v_dual_mov_b32 v4, s7 +; GFX11-NEXT: v_mov_b32_e32 v6, s9 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_dual_mov_b32 v5, s13 :: v_dual_mov_b32 v2, s10 +; GFX11-NEXT: v_dual_mov_b32 v3, s11 :: v_dual_mov_b32 v4, s12 +; GFX11-NEXT: v_mov_b32_e32 v6, s14 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v3, s16 +; GFX11-NEXT: v_dual_mov_b32 v4, s17 :: v_dual_mov_b32 v5, s18 +; GFX11-NEXT: v_mov_b32_e32 v6, s19 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_dual_mov_b32 v2, s20 :: v_dual_mov_b32 v3, s21 +; GFX11-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v5, s23 +; GFX11-NEXT: v_mov_b32_e32 v6, s24 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_dual_mov_b32 v5, s28 :: v_dual_mov_b32 v2, s25 +; GFX11-NEXT: v_dual_mov_b32 v3, s26 :: v_dual_mov_b32 v4, s27 +; GFX11-NEXT: v_mov_b32_e32 v6, s29 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + ;%val = call i32 @llvm.amdgcn.workitem.id.x() + ;store volatile i32 %val, ptr addrspace(1) undef + + store volatile i32 %arg0, ptr addrspace(1) undef + store volatile i32 %arg1, ptr addrspace(1) undef + store volatile i32 %arg2, ptr addrspace(1) undef + store volatile i32 %arg3, ptr addrspace(1) undef + store volatile i32 %arg4, ptr addrspace(1) undef + store volatile i32 %arg5, ptr addrspace(1) undef + store volatile i32 %arg6, ptr addrspace(1) undef + store volatile i32 %arg7, ptr addrspace(1) undef + + store volatile i32 %arg8, ptr addrspace(1) undef + store volatile i32 %arg9, ptr addrspace(1) undef + store volatile i32 %arg10, ptr addrspace(1) undef + store volatile i32 %arg11, ptr addrspace(1) undef + store volatile i32 %arg12, ptr addrspace(1) undef + store volatile i32 %arg13, ptr addrspace(1) undef + store volatile i32 %arg14, ptr addrspace(1) undef + store volatile i32 %arg15, ptr addrspace(1) undef + + store volatile i32 %arg16, ptr addrspace(1) undef + store volatile i32 %arg17, ptr addrspace(1) undef + store volatile i32 %arg18, ptr addrspace(1) undef + store volatile i32 %arg19, ptr addrspace(1) undef + store volatile i32 %arg20, ptr addrspace(1) undef + store volatile i32 %arg21, ptr addrspace(1) undef + store volatile i32 %arg22, ptr addrspace(1) undef + store volatile i32 %arg23, ptr addrspace(1) undef + + store volatile i32 %arg24, ptr addrspace(1) undef + store volatile i32 %arg25, ptr addrspace(1) undef + store volatile i32 %arg26, ptr addrspace(1) undef + store volatile i32 %arg27, ptr addrspace(1) undef + store volatile i32 %arg28, ptr addrspace(1) undef + store volatile i32 %arg29, ptr addrspace(1) undef + store volatile i32 %arg30, ptr addrspace(1) undef + store volatile i32 %arg31, ptr addrspace(1) undef + + ret void +} + +define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 { +; GFX9-LABEL: void_func_i32_v2float_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: v_mov_b32_e32 v0, s5 +; GFX9-NEXT: v_mov_b32_e32 v1, s6 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: void_func_i32_v2float_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, s2 +; GFX11-NEXT: v_mov_b32_e32 v0, s1 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off +; GFX11-NEXT: s_setpc_b64 s[30:31] + store i32 %arg0, ptr addrspace(1) undef + store <2 x float> %arg1, ptr addrspace(1) undef + ret void +} + +define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 { +; GFX9-LABEL: caller_void_func_i32_v2float_inreg: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s7, s33 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[8:9] +; GFX9-NEXT: s_add_u32 s8, s8, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s9, s9, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[8:9], s[8:9], 0x0 +; GFX9-NEXT: v_writelane_b32 v40, s7, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s2, s6 +; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s4, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: s_mov_b32 s33, s4 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: caller_void_func_i32_v2float_inreg: +; GFX11: ; %bb.0: +; GFX11-NEXT s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT s_mov_b32 s3, s33 +; GFX11-NEXT s_mov_b32 s33, s32 +; GFX11-NEXT s_or_saveexec_b32 s4, -1 +; GFX11-NEXT scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill +; GFX11-NEXT s_mov_b32 exec_lo, s4 +; GFX11-NEXT s_add_i32 s32, s32, 16 +; GFX11-NEXT s_getpc_b64 s[4:5] +; GFX11-NEXT s_add_u32 s4, s4, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 +; GFX11-NEXT s_addc_u32 s5, s5, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 +; GFX11-NEXT v_writelane_b32 v40, s3, 2 +; GFX11-NEXT s_load_b64 s[4:5], s[4:5], 0x0 +; GFX11-NEXT v_writelane_b32 v40, s30, 0 +; GFX11-NEXT v_writelane_b32 v40, s31, 1 +; GFX11-NEXT s_waitcnt lgkmcnt(0) +; GFX11-NEXT s_swappc_b64 s[30:31], s[4:5] +; GFX11-NEXT s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT v_readlane_b32 s31, v40, 1 +; GFX11-NEXT v_readlane_b32 s30, v40, 0 +; GFX11-NEXT v_readlane_b32 s0, v40, 2 +; GFX11-NEXT s_or_saveexec_b32 s1, -1 +; GFX11-NEXT scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload +; GFX11-NEXT s_mov_b32 exec_lo, s1 +; GFX11-NEXT s_add_i32 s32, s32, -16 +; GFX11-NEXT s_mov_b32 s33, s0 +; GFX11-NEXT s_waitcnt vmcnt(0) +; GFX11-NEXT s_setpc_b64 s[30:31] + call void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind noinline } + diff --git a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.ll b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.ll index 9dcd3a66a16db..deaf361c823cf 100644 --- a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.ll +++ b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.ll @@ -134,13 +134,11 @@ l2: ; any of the v_cmp source operands. ; GCN-LABEL: check_saveexec_overwrites_vcmp_source: -; GCN: .LBB7_3: ; %then -; GFX1010: v_cmp_ge_i32_e32 vcc_lo, s[[A:[0-9]+]], v{{.*}} -; GFX1010-NEXT: v_mov_b32_e32 {{.*}}, s[[A]] -; GFX1010-NEXT: s_and_saveexec_b32 s[[A]], vcc_lo -; GFX1030: v_cmp_ge_i32_e32 vcc_lo, s[[A:[0-9]+]], v{{.*}} -; GFX1030-NEXT: v_mov_b32_e32 {{.*}}, s[[A]] -; GFX1030-NEXT: s_and_saveexec_b32 s[[A]], vcc_lo +; GCN: .LBB7_2: ; %then +; GFX1010: v_cmp_eq_u32_e64 s[[C:[0-9]+]], s[[A:[0-9]+]], s[[B:[0-9]+]] +; GFX1010-NEXT: s_cmp_ge_i32 s[[C]], s[[B]] +; GFX1030: v_cmp_eq_u32_e64 s[[C:[0-9]+]], s[[A:[0-9]+]], s[[B:[0-9]+]] +; GFX1030-NEXT: s_cmp_ge_i32 s[[C]], s[[B]] define i32 @check_saveexec_overwrites_vcmp_source(i32 inreg %a, i32 inreg %b) { entry: %0 = icmp sge i32 %a, 0