diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 427a0d1fdf989..8eaec957b3835 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -641,7 +641,10 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), BytesInStackArgArea(MFI.getBytesInStackArgArea()), ReturnsVoid(MFI.returnsVoid()), - ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) { + ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), + PSInputAddr(MFI.getPSInputAddr()), + PSInputEnable(MFI.getPSInputEnable()), + Mode(MFI.getMode()) { for (Register Reg : MFI.getWWMReservedRegs()) WWMReservedRegs.push_back(regToString(Reg, TRI)); @@ -664,6 +667,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( LDSSize = YamlMFI.LDSSize; GDSSize = YamlMFI.GDSSize; DynLDSAlign = YamlMFI.DynLDSAlign; + PSInputAddr = YamlMFI.PSInputAddr; + PSInputEnable = YamlMFI.PSInputEnable; HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; Occupancy = YamlMFI.Occupancy; IsEntryFunction = YamlMFI.IsEntryFunction; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index c7ac0de07a7b7..59f84cd63a50c 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -276,6 +276,10 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { bool ReturnsVoid = true; std::optional ArgInfo; + + unsigned PSInputAddr = 0; + unsigned PSInputEnable = 0; + SIMode Mode; std::optional ScavengeFI; StringValue VGPRForAGPRCopy; @@ -312,6 +316,8 @@ template <> struct MappingTraits { YamlIO.mapOptional("bytesInStackArgArea", MFI.BytesInStackArgArea, 0u); YamlIO.mapOptional("returnsVoid", MFI.ReturnsVoid, true); YamlIO.mapOptional("argumentInfo", MFI.ArgInfo); + YamlIO.mapOptional("psInputAddr", MFI.PSInputAddr, 0u); + YamlIO.mapOptional("psInputEnable", MFI.PSInputEnable, 0u); YamlIO.mapOptional("mode", MFI.Mode, SIMode()); YamlIO.mapOptional("highBitsOf32BitAddress", MFI.HighBitsOf32BitAddress, 0u); diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll index acff981f98503..1cf3699240d4c 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll @@ -26,6 +26,8 @@ ; AFTER-PEI-NEXT: workGroupIDX: { reg: '$sgpr6' } ; AFTER-PEI-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } ; AFTER-PEI-NEXT: workItemIDX: { reg: '$vgpr0' } +; AFTER-PEI-NEXT: psInputAddr: 0 +; AFTER-PEI-NEXT: psInputEnable: 0 ; AFTER-PEI-NEXT: mode: ; AFTER-PEI-NEXT: ieee: true ; AFTER-PEI-NEXT: dx10-clamp: true diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir index 6d4c60eb221df..fa8607dc1306c 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -36,6 +36,8 @@ # FULL-NEXT: workItemIDX: { reg: '$vgpr0' } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +# FULL-NEXT: psInputAddr: 0 +# FULL-NEXT: psInputEnable: 0 # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true @@ -134,6 +136,8 @@ body: | # FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +# FULL-NEXT: psInputAddr: 0 +# FULL-NEXT: psInputEnable: 0 # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true @@ -203,6 +207,8 @@ body: | # FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +# FULL-NEXT: psInputAddr: 0 +# FULL-NEXT: psInputEnable: 0 # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true @@ -273,6 +279,8 @@ body: | # FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +# FULL-NEXT: psInputAddr: 0 +# FULL-NEXT: psInputEnable: 0 # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll index a3ed1f25ebd93..7d365faa336f9 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -30,6 +30,8 @@ ; CHECK-NEXT: workGroupIDX: { reg: '$sgpr6' } ; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } ; CHECK-NEXT: workItemIDX: { reg: '$vgpr0' } +; CHECK-NEXT: psInputAddr: 0 +; CHECK-NEXT: psInputEnable: 0 ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true @@ -70,6 +72,8 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { ; CHECK-NEXT: argumentInfo: ; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr3' } ; CHECK-NEXT: implicitBufferPtr: { reg: '$sgpr0_sgpr1' } +; CHECK-NEXT: psInputAddr: 1 +; CHECK-NEXT: psInputEnable: 1 ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: false ; CHECK-NEXT: dx10-clamp: true @@ -87,6 +91,16 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { ret void } +; CHECK-LABEL: {{^}}name: ps_shader_ps_input_enable +; CHECK: machineFunctionInfo: +; CHECK: psInputAddr: 36983 +; CHECK-NEXT: psInputEnable: 1{{$}} +define amdgpu_ps void @ps_shader_ps_input_enable(i32 %arg0, i32 inreg %arg1) #7 { + %gep = getelementptr inbounds [128 x i32], ptr addrspace(2) @gds, i32 0, i32 %arg0 + atomicrmw add ptr addrspace(2) %gep, i32 8 seq_cst + ret void +} + ; CHECK-LABEL: {{^}}name: gds_size_shader ; CHECK: gdsSize: 4096 define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 { @@ -124,6 +138,8 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 { ; CHECK-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } ; CHECK-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } ; CHECK-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +; CHECK-NEXT: psInputAddr: 0 +; CHECK-NEXT: psInputEnable: 0 ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true @@ -170,6 +186,8 @@ define void @function() { ; CHECK-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } ; CHECK-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } ; CHECK-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } +; CHECK-NEXT: psInputAddr: 0 +; CHECK-NEXT: psInputEnable: 0 ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true @@ -251,3 +269,4 @@ attributes #3 = { "amdgpu-dx10-clamp" = "false" "amdgpu-ieee" = "false" } attributes #4 = { "amdgpu-32bit-address-high-bits"="0xffff8000" } attributes #5 = { "amdgpu-gds-size"="4096" } attributes #6 = { convergent nounwind readnone willreturn } +attributes #7 = { "InitialPSInputAddr"="36983" }