diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index de81904143b7b..0596dca4a4ed2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -19,7 +19,6 @@ #include "AMDGPU.h" #include "AMDGPUHSAMetadataStreamer.h" #include "AMDGPUResourceUsageAnalysis.h" -#include "AMDKernelCodeT.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUMCExpr.h" @@ -29,6 +28,7 @@ #include "SIMachineFunctionInfo.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "Utils/AMDKernelCodeTUtils.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -205,8 +205,9 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() { if (STM.isMesaKernel(F) && (F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL)) { - amd_kernel_code_t KernelCode; + AMDGPUMCKernelCodeT KernelCode; getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF); + KernelCode.validate(&STM, MF->getContext()); getTargetStreamer()->EmitAMDKernelCodeT(KernelCode); } @@ -1320,7 +1321,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) { } } -void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, +void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out, const SIProgramInfo &CurrentProgramInfo, const MachineFunction &MF) const { const Function &F = MF.getFunction(); @@ -1331,24 +1332,22 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, const GCNSubtarget &STM = MF.getSubtarget(); MCContext &Ctx = MF.getContext(); - AMDGPU::initDefaultAMDKernelCodeT(Out, &STM); + Out.initDefault(&STM, Ctx, /*InitMCExpr=*/false); - Out.compute_pgm_resource_registers = - CurrentProgramInfo.getComputePGMRSrc1(STM) | - (CurrentProgramInfo.getComputePGMRSrc2() << 32); + Out.compute_pgm_resource1_registers = + CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx); + Out.compute_pgm_resource2_registers = + CurrentProgramInfo.getComputePGMRSrc2(Ctx); Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64; - if (getMCExprValue(CurrentProgramInfo.DynamicCallStack, Ctx)) - Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK; + Out.is_dynamic_callstack = CurrentProgramInfo.DynamicCallStack; - AMD_HSA_BITS_SET(Out.code_properties, - AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE, + AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE, getElementByteSizeValue(STM.getMaxPrivateElementSize(true))); const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo(); if (UserSGPRInfo.hasPrivateSegmentBuffer()) { - Out.code_properties |= - AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER; } if (UserSGPRInfo.hasDispatchPtr()) @@ -1374,10 +1373,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, Align MaxKernArgAlign; Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign); - Out.wavefront_sgpr_count = getMCExprValue(CurrentProgramInfo.NumSGPR, Ctx); - Out.workitem_vgpr_count = getMCExprValue(CurrentProgramInfo.NumVGPR, Ctx); - Out.workitem_private_segment_byte_size = - getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx); + Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR; + Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR; + Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize; Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize; // kernarg_segment_alignment is specified as log of the alignment. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 16d8952a533ef..87156f27fc6c5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -17,8 +17,6 @@ #include "SIProgramInfo.h" #include "llvm/CodeGen/AsmPrinter.h" -struct amd_kernel_code_t; - namespace llvm { class AMDGPUMachineFunction; @@ -29,6 +27,7 @@ class MCOperand; namespace AMDGPU { struct MCKernelDescriptor; +struct AMDGPUMCKernelCodeT; namespace HSAMD { class MetadataStreamer; } @@ -50,7 +49,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter { uint64_t getFunctionCodeSize(const MachineFunction &MF) const; void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF); - void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo, + void getAmdKernelCode(AMDGPU::AMDGPUMCKernelCodeT &Out, + const SIProgramInfo &KernelInfo, const MachineFunction &MF) const; /// Emit register usage information so that the GPU driver diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index d47a5f8ebb815..8b1c1c53de30d 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1340,7 +1340,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool ParseDirectiveAMDGCNTarget(); bool ParseDirectiveAMDHSACodeObjectVersion(); bool ParseDirectiveAMDHSAKernel(); - bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); + bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header); bool ParseDirectiveAMDKernelCodeT(); // TODO: Possibly make subtargetHasRegister const. bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); @@ -5873,7 +5873,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() { } bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, - amd_kernel_code_t &Header) { + AMDGPUMCKernelCodeT &C) { // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing // assembly for backwards compatibility. if (ID == "max_scratch_backing_memory_byte_size") { @@ -5883,25 +5883,13 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, SmallString<40> ErrStr; raw_svector_ostream Err(ErrStr); - if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { + if (!C.ParseKernelCodeT(ID, getParser(), Err)) { return TokError(Err.str()); } Lex(); - if (ID == "enable_dx10_clamp") { - if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) && - isGFX12Plus()) - return TokError("enable_dx10_clamp=1 is not allowed on GFX12+"); - } - - if (ID == "enable_ieee_mode") { - if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) && - isGFX12Plus()) - return TokError("enable_ieee_mode=1 is not allowed on GFX12+"); - } - if (ID == "enable_wavefront_size32") { - if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { + if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { if (!isGFX10Plus()) return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) @@ -5913,41 +5901,23 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, } if (ID == "wavefront_size") { - if (Header.wavefront_size == 5) { + if (C.wavefront_size == 5) { if (!isGFX10Plus()) return TokError("wavefront_size=5 is only allowed on GFX10+"); if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) return TokError("wavefront_size=5 requires +WavefrontSize32"); - } else if (Header.wavefront_size == 6) { + } else if (C.wavefront_size == 6) { if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) return TokError("wavefront_size=6 requires +WavefrontSize64"); } } - if (ID == "enable_wgp_mode") { - if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && - !isGFX10Plus()) - return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); - } - - if (ID == "enable_mem_ordered") { - if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && - !isGFX10Plus()) - return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); - } - - if (ID == "enable_fwd_progress") { - if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && - !isGFX10Plus()) - return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); - } - return false; } bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { - amd_kernel_code_t Header; - AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); + AMDGPUMCKernelCodeT KernelCode; + KernelCode.initDefault(&getSTI(), getContext()); while (true) { // Lex EndOfStatement. This is in a while loop, because lexing a comment @@ -5961,11 +5931,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { if (ID == ".end_amd_kernel_code_t") break; - if (ParseAMDKernelCodeTValue(ID, Header)) + if (ParseAMDKernelCodeTValue(ID, KernelCode)) return true; } - getTargetStreamer().EmitAMDKernelCodeT(Header); + KernelCode.validate(&getSTI(), getContext()); + getTargetStreamer().EmitAMDKernelCodeT(KernelCode); return false; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 02fe7be06280e..00e64e3419ba0 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -13,7 +13,6 @@ #include "AMDGPUTargetStreamer.h" #include "AMDGPUMCKernelDescriptor.h" #include "AMDGPUPTNote.h" -#include "AMDKernelCodeT.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" @@ -240,10 +239,9 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion( OS << "\t.amdhsa_code_object_version " << COV << '\n'; } -void -AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { +void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) { OS << "\t.amd_kernel_code_t\n"; - dumpAmdKernelCode(&Header, OS, "\t\t"); + Header.EmitKernelCodeT(OS, getContext()); OS << "\t.end_amd_kernel_code_t\n"; } @@ -789,12 +787,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() { void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {} -void -AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) { - +void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) { MCStreamer &OS = getStreamer(); OS.pushSection(); - OS.emitBytes(StringRef((const char*)&Header, sizeof(Header))); + Header.EmitKernelCodeT(OS, getContext()); OS.popSection(); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 706897a5dc1f4..e5c90060cb5d0 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -13,8 +13,6 @@ #include "Utils/AMDGPUPALMetadata.h" #include "llvm/MC/MCStreamer.h" -struct amd_kernel_code_t; - namespace llvm { class MCELFStreamer; @@ -23,6 +21,7 @@ class formatted_raw_ostream; namespace AMDGPU { +struct AMDGPUMCKernelCodeT; struct MCKernelDescriptor; namespace HSAMD { struct Metadata; @@ -54,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { CodeObjectVersion = COV; } - virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header){}; + virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) {}; virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){}; @@ -130,7 +129,7 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override; - void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override; + void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override; void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; @@ -186,7 +185,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { void EmitDirectiveAMDGCNTarget() override; - void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override; + void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override; void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 6d0e0b3f4de2c..1e9bfc77ab923 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -1111,7 +1111,7 @@ enum Type { TRAP = -2, WORKGROUP = -1 }; #define C_00B84C_LDS_SIZE 0xFF007FFF #define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24) #define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F) -#define C_00B84C_EXCP_EN +#define C_00B84C_EXCP_EN 0x80FFFFFF #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC #define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0 diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 2beaf903542bd..4b34fb27632a2 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -11,6 +11,7 @@ #include "AMDGPUAsmUtils.h" #include "AMDKernelCodeT.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "Utils/AMDKernelCodeTUtils.h" #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Attributes.h" @@ -1218,39 +1219,37 @@ unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, } } // end namespace IsaInfo -void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, +void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI) { IsaVersion Version = getIsaVersion(STI->getCPU()); - - memset(&Header, 0, sizeof(Header)); - - Header.amd_kernel_code_version_major = 1; - Header.amd_kernel_code_version_minor = 2; - Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU - Header.amd_machine_version_major = Version.Major; - Header.amd_machine_version_minor = Version.Minor; - Header.amd_machine_version_stepping = Version.Stepping; - Header.kernel_code_entry_byte_offset = sizeof(Header); - Header.wavefront_size = 6; + KernelCode.amd_kernel_code_version_major = 1; + KernelCode.amd_kernel_code_version_minor = 2; + KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU + KernelCode.amd_machine_version_major = Version.Major; + KernelCode.amd_machine_version_minor = Version.Minor; + KernelCode.amd_machine_version_stepping = Version.Stepping; + KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t); + if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { + KernelCode.wavefront_size = 5; + KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; + } else { + KernelCode.wavefront_size = 6; + } // If the code object does not support indirect functions, then the value must // be 0xffffffff. - Header.call_convention = -1; + KernelCode.call_convention = -1; // These alignment values are specified in powers of two, so alignment = // 2^n. The minimum alignment is 2^4 = 16. - Header.kernarg_segment_alignment = 4; - Header.group_segment_alignment = 4; - Header.private_segment_alignment = 4; + KernelCode.kernarg_segment_alignment = 4; + KernelCode.group_segment_alignment = 4; + KernelCode.private_segment_alignment = 4; if (Version.Major >= 10) { - if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { - Header.wavefront_size = 5; - Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; - } - Header.compute_pgm_resource_registers |= - S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | - S_00B848_MEM_ORDERED(1); + KernelCode.compute_pgm_resource_registers |= + S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | + S_00B848_MEM_ORDERED(1); } } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index fc4147df76e3e..3cfc42a7d24d5 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -37,6 +37,7 @@ class raw_ostream; namespace AMDGPU { +struct AMDGPUMCKernelCodeT; struct IsaVersion; /// Generic target versions emitted by this version of LLVM. @@ -860,7 +861,7 @@ unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); LLVM_READONLY unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc); -void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, +void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header, const MCSubtargetInfo *STI); bool isGroupSegment(const GlobalValue *GV); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h index 95ad3f35d18f8..75cb6cffbd51b 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h @@ -12,34 +12,51 @@ // //===----------------------------------------------------------------------===// -#define QNAME(name) amd_kernel_code_t::name +#define QNAME(name) AMDGPUMCKernelCodeT::name #define FLD_T(name) decltype(QNAME(name)), &QNAME(name) -#define FIELD2(sname, aname, name) \ - RECORD(sname, aname, printField, parseField) +#ifndef PRINTFIELD +#define PRINTFIELD(sname, aname, name) printField +#endif -#define FIELD(name) FIELD2(name, name, name) +#ifndef FIELD2 +#define FIELD2(sname, aname, name) \ + RECORD(sname, aname, PRINTFIELD(sname, aname, name), parseField) +#endif +#ifndef FIELD +#define FIELD(name) FIELD2(name, name, name) +#endif +#ifndef PRINTCODEPROP #define PRINTCODEPROP(name) \ printBitField +#endif +#ifndef PARSECODEPROP #define PARSECODEPROP(name) \ parseBitField +#endif +#ifndef CODEPROP #define CODEPROP(name, shift) \ RECORD(name, name, PRINTCODEPROP(shift), PARSECODEPROP(shift)) +#endif // have to define these lambdas because of Set/GetMacro +#ifndef PRINTCOMP #define PRINTCOMP(GetMacro, Shift) \ [](StringRef Name, const amd_kernel_code_t &C, raw_ostream &OS) { \ printName(OS, Name) << \ (int)GetMacro(C.compute_pgm_resource_registers >> Shift); \ } +#endif + +#ifndef PARSECOMP #define PARSECOMP(SetMacro, Shift) \ [](amd_kernel_code_t &C, MCAsmParser &MCParser, raw_ostream &Err) { \ int64_t Value = 0; \ @@ -49,15 +66,22 @@ C.compute_pgm_resource_registers |= SetMacro(Value) << Shift; \ return true; \ } +#endif +#ifndef COMPPGM #define COMPPGM(name, aname, GetMacro, SetMacro, Shift) \ RECORD(name, aname, PRINTCOMP(GetMacro, Shift), PARSECOMP(SetMacro, Shift)) +#endif +#ifndef COMPPGM1 #define COMPPGM1(name, aname, AccMacro) \ COMPPGM(name, aname, G_00B848_##AccMacro, S_00B848_##AccMacro, 0) +#endif +#ifndef COMPPGM2 #define COMPPGM2(name, aname, AccMacro) \ COMPPGM(name, aname, G_00B84C_##AccMacro, S_00B84C_##AccMacro, 32) +#endif /////////////////////////////////////////////////////////////////////////////// // Begin of the table @@ -143,13 +167,14 @@ FIELD(runtime_loader_kernel_symbol) #undef QNAME #undef FLD_T +#undef PRINTFIELD #undef FIELD2 #undef FIELD #undef PRINTCODEPROP #undef PARSECODEPROP #undef CODEPROP #undef PRINTCOMP -#undef PAPSECOMP +#undef PARSECOMP #undef COMPPGM #undef COMPPGM1 #undef COMPPGM2 diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp index 6bbc8c3157187..eaee1a2a97399 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp @@ -6,44 +6,205 @@ // //===----------------------------------------------------------------------===// // -/// \file - utility functions to parse/print amd_kernel_code_t structure +/// \file - utility functions to parse/print AMDGPUMCKernelCodeT structure // //===----------------------------------------------------------------------===// #include "AMDKernelCodeTUtils.h" #include "AMDKernelCodeT.h" #include "SIDefines.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringMap.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +using namespace llvm::AMDGPU; -static ArrayRef get_amd_kernel_code_t_FldNames() { - static StringRef const Table[] = { - "", // not found placeholder +// Generates the following for AMDGPUMCKernelCodeT struct members: +// - HasMemberXXXXX class +// A check to see if AMDGPUMCKernelCodeT has a specific member so it can +// determine which of the original amd_kernel_code_t members are duplicated +// (if the names don't match, the table driven strategy won't work). +// - IsMCExprXXXXX class +// Check whether a AMDGPUMCKernelcodeT struct member is MCExpr-ified or not. +// - GetMemberXXXXX class +// A retrieval helper for said member (of type const MCExpr *&). Will return +// a `Phony` const MCExpr * initialized to nullptr to preserve reference +// returns. +#define GEN_HAS_MEMBER(member) \ + class HasMember##member { \ + private: \ + struct KnownWithMember { \ + int member; \ + }; \ + class AmbiguousDerived : public AMDGPUMCKernelCodeT, \ + public KnownWithMember {}; \ + template \ + static constexpr std::false_type Test(decltype(U::member) *); \ + template static constexpr std::true_type Test(...); \ + \ + public: \ + static constexpr bool RESULT = \ + std::is_same_v(nullptr)), \ + std::true_type>; \ + }; \ + class IsMCExpr##member { \ + template , \ + U> * = nullptr> \ + static constexpr std::true_type HasMCExprType(decltype(U::member) *); \ + template static constexpr std::false_type HasMCExprType(...); \ + \ + public: \ + static constexpr bool RESULT = \ + std::is_same_v(nullptr)), \ + std::true_type>; \ + }; \ + class GetMember##member { \ + public: \ + static const MCExpr *Phony; \ + template * = nullptr> \ + static const MCExpr *&Get(U &C) { \ + assert(IsMCExpr##member::RESULT && \ + "Trying to retrieve member that does not exist."); \ + return C.member; \ + } \ + template * = nullptr> \ + static const MCExpr *&Get(U &C) { \ + return Phony; \ + } \ + }; \ + const MCExpr *GetMember##member::Phony = nullptr; + +// Cannot generate class declarations using the table driver approach (see table +// in AMDKernelCodeTInfo.h). Luckily, if any are missing here or eventually +// added to the table, an error should occur when trying to retrieve the table +// in getMCExprIndexTable. +GEN_HAS_MEMBER(amd_code_version_major) +GEN_HAS_MEMBER(amd_code_version_minor) +GEN_HAS_MEMBER(amd_machine_kind) +GEN_HAS_MEMBER(amd_machine_version_major) +GEN_HAS_MEMBER(amd_machine_version_minor) +GEN_HAS_MEMBER(amd_machine_version_stepping) + +GEN_HAS_MEMBER(kernel_code_entry_byte_offset) +GEN_HAS_MEMBER(kernel_code_prefetch_byte_size) + +GEN_HAS_MEMBER(granulated_workitem_vgpr_count) +GEN_HAS_MEMBER(granulated_wavefront_sgpr_count) +GEN_HAS_MEMBER(priority) +GEN_HAS_MEMBER(float_mode) +GEN_HAS_MEMBER(priv) +GEN_HAS_MEMBER(enable_dx10_clamp) +GEN_HAS_MEMBER(debug_mode) +GEN_HAS_MEMBER(enable_ieee_mode) +GEN_HAS_MEMBER(enable_wgp_mode) +GEN_HAS_MEMBER(enable_mem_ordered) +GEN_HAS_MEMBER(enable_fwd_progress) + +GEN_HAS_MEMBER(enable_sgpr_private_segment_wave_byte_offset) +GEN_HAS_MEMBER(user_sgpr_count) +GEN_HAS_MEMBER(enable_trap_handler) +GEN_HAS_MEMBER(enable_sgpr_workgroup_id_x) +GEN_HAS_MEMBER(enable_sgpr_workgroup_id_y) +GEN_HAS_MEMBER(enable_sgpr_workgroup_id_z) +GEN_HAS_MEMBER(enable_sgpr_workgroup_info) +GEN_HAS_MEMBER(enable_vgpr_workitem_id) +GEN_HAS_MEMBER(enable_exception_msb) +GEN_HAS_MEMBER(granulated_lds_size) +GEN_HAS_MEMBER(enable_exception) + +GEN_HAS_MEMBER(enable_sgpr_private_segment_buffer) +GEN_HAS_MEMBER(enable_sgpr_dispatch_ptr) +GEN_HAS_MEMBER(enable_sgpr_queue_ptr) +GEN_HAS_MEMBER(enable_sgpr_kernarg_segment_ptr) +GEN_HAS_MEMBER(enable_sgpr_dispatch_id) +GEN_HAS_MEMBER(enable_sgpr_flat_scratch_init) +GEN_HAS_MEMBER(enable_sgpr_private_segment_size) +GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_x) +GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_y) +GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_z) +GEN_HAS_MEMBER(enable_wavefront_size32) +GEN_HAS_MEMBER(enable_ordered_append_gds) +GEN_HAS_MEMBER(private_element_size) +GEN_HAS_MEMBER(is_ptr64) +GEN_HAS_MEMBER(is_dynamic_callstack) +GEN_HAS_MEMBER(is_debug_enabled) +GEN_HAS_MEMBER(is_xnack_enabled) + +GEN_HAS_MEMBER(workitem_private_segment_byte_size) +GEN_HAS_MEMBER(workgroup_group_segment_byte_size) +GEN_HAS_MEMBER(gds_segment_byte_size) +GEN_HAS_MEMBER(kernarg_segment_byte_size) +GEN_HAS_MEMBER(workgroup_fbarrier_count) +GEN_HAS_MEMBER(wavefront_sgpr_count) +GEN_HAS_MEMBER(workitem_vgpr_count) +GEN_HAS_MEMBER(reserved_vgpr_first) +GEN_HAS_MEMBER(reserved_vgpr_count) +GEN_HAS_MEMBER(reserved_sgpr_first) +GEN_HAS_MEMBER(reserved_sgpr_count) +GEN_HAS_MEMBER(debug_wavefront_private_segment_offset_sgpr) +GEN_HAS_MEMBER(debug_private_segment_buffer_sgpr) +GEN_HAS_MEMBER(kernarg_segment_alignment) +GEN_HAS_MEMBER(group_segment_alignment) +GEN_HAS_MEMBER(private_segment_alignment) +GEN_HAS_MEMBER(wavefront_size) +GEN_HAS_MEMBER(call_convention) +GEN_HAS_MEMBER(runtime_loader_kernel_symbol) + +static ArrayRef get_amd_kernel_code_t_FldNames() { + static constexpr StringLiteral const Table[] = { + "", // not found placeholder #define RECORD(name, altName, print, parse) #name -#include "AMDKernelCodeTInfo.h" +#include "Utils/AMDKernelCodeTInfo.h" #undef RECORD }; return ArrayRef(Table); } -static ArrayRef get_amd_kernel_code_t_FldAltNames() { - static StringRef const Table[] = { - "", // not found placeholder +static ArrayRef get_amd_kernel_code_t_FldAltNames() { + static constexpr StringLiteral const Table[] = { + "", // not found placeholder #define RECORD(name, altName, print, parse) #altName -#include "AMDKernelCodeTInfo.h" +#include "Utils/AMDKernelCodeTInfo.h" +#undef RECORD + }; + return ArrayRef(Table); +} + +static ArrayRef hasMCExprVersionTable() { + static bool const Table[] = { +#define RECORD(name, altName, print, parse) (IsMCExpr##name::RESULT) +#include "Utils/AMDKernelCodeTInfo.h" #undef RECORD }; return ArrayRef(Table); } -static StringMap createIndexMap(const ArrayRef &names, - const ArrayRef &altNames) { +using RetrieveFx = const MCExpr *&(*)(AMDGPUMCKernelCodeT &); + +static ArrayRef getMCExprIndexTable() { + static const RetrieveFx Table[] = { +#define RECORD(name, altName, print, parse) GetMember##name::Get +#include "Utils/AMDKernelCodeTInfo.h" +#undef RECORD + }; + return ArrayRef(Table); +} + +static StringMap createIndexMap(ArrayRef names, + ArrayRef altNames) { StringMap map; assert(names.size() == altNames.size()); for (unsigned i = 0; i < names.size(); ++i) { @@ -59,62 +220,111 @@ static int get_amd_kernel_code_t_FieldIndex(StringRef name) { return map.lookup(name) - 1; // returns -1 if not found } -static StringRef get_amd_kernel_code_t_FieldName(int index) { - return get_amd_kernel_code_t_FldNames()[index + 1]; -} +static constexpr std::pair getShiftMask(unsigned Value) { + unsigned Shift = 0; + unsigned Mask = 0; -// Field printing + Mask = ~Value; + for (; !(Mask & 1); Shift++, Mask >>= 1) { + } -static raw_ostream &printName(raw_ostream &OS, StringRef Name) { - return OS << Name << " = "; + return std::make_pair(Shift, Mask); } -template -static void printField(StringRef Name, const amd_kernel_code_t &C, - raw_ostream &OS) { - printName(OS, Name) << (int)(C.*ptr); +static const MCExpr *MaskShiftSet(const MCExpr *Val, uint32_t Mask, + uint32_t Shift, MCContext &Ctx) { + if (Mask) { + const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx); + Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx); + } + if (Shift) { + const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx); + Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx); + } + return Val; } -template -static void printBitField(StringRef Name, const amd_kernel_code_t &c, - raw_ostream &OS) { +static const MCExpr *MaskShiftGet(const MCExpr *Val, uint32_t Mask, + uint32_t Shift, MCContext &Ctx) { + if (Shift) { + const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx); + Val = MCBinaryExpr::createLShr(Val, ShiftExpr, Ctx); + } + if (Mask) { + const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx); + Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx); + } + return Val; +} + +class PrintField { +public: + template , T> * = nullptr> + static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C, + raw_ostream &OS, MCContext &Ctx) { + OS << Name << " = "; + const MCExpr *Value = C.*ptr; + int64_t Val; + if (Value->evaluateAsAbsolute(Val)) + OS << Val; + else + Value->print(OS, Ctx.getAsmInfo()); + } + + template , T> * = nullptr> + static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C, + raw_ostream &OS, MCContext &) { + OS << Name << " = " << (int)(C.*ptr); + } +}; + +template +static void printBitField(StringRef Name, const AMDGPUMCKernelCodeT &C, + raw_ostream &OS, MCContext &) { const auto Mask = (static_cast(1) << width) - 1; - printName(OS, Name) << (int)((c.*ptr >> shift) & Mask); + OS << Name << " = " << (int)((C.*ptr >> shift) & Mask); } -using PrintFx = void(*)(StringRef, const amd_kernel_code_t &, raw_ostream &); +using PrintFx = void (*)(StringRef, const AMDGPUMCKernelCodeT &, raw_ostream &, + MCContext &); static ArrayRef getPrinterTable() { static const PrintFx Table[] = { +#define COMPPGM1(name, aname, AccMacro) \ + COMPPGM(name, aname, C_00B848_##AccMacro, S_00B848_##AccMacro, 0) +#define COMPPGM2(name, aname, AccMacro) \ + COMPPGM(name, aname, C_00B84C_##AccMacro, S_00B84C_##AccMacro, 32) +#define PRINTFIELD(sname, aname, name) PrintField::printField +#define PRINTCOMP(Complement, PGMType) \ + [](StringRef Name, const AMDGPUMCKernelCodeT &C, raw_ostream &OS, \ + MCContext &Ctx) { \ + OS << Name << " = "; \ + auto [Shift, Mask] = getShiftMask(Complement); \ + const MCExpr *Value; \ + if (PGMType == 0) { \ + Value = \ + MaskShiftGet(C.compute_pgm_resource1_registers, Mask, Shift, Ctx); \ + } else { \ + Value = \ + MaskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx); \ + } \ + int64_t Val; \ + if (Value->evaluateAsAbsolute(Val)) \ + OS << Val; \ + else \ + Value->print(OS, Ctx.getAsmInfo()); \ + } #define RECORD(name, altName, print, parse) print -#include "AMDKernelCodeTInfo.h" +#include "Utils/AMDKernelCodeTInfo.h" #undef RECORD }; return ArrayRef(Table); } -void llvm::printAmdKernelCodeField(const amd_kernel_code_t &C, - int FldIndex, - raw_ostream &OS) { - auto Printer = getPrinterTable()[FldIndex]; - if (Printer) - Printer(get_amd_kernel_code_t_FieldName(FldIndex), C, OS); -} - -void llvm::dumpAmdKernelCode(const amd_kernel_code_t *C, - raw_ostream &OS, - const char *tab) { - const int Size = getPrinterTable().size(); - for (int i = 0; i < Size; ++i) { - OS << tab; - printAmdKernelCodeField(*C, i, OS); - OS << '\n'; - } -} - -// Field parsing - -static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value, raw_ostream& Err) { +static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value, + raw_ostream &Err) { if (MCParser.getLexer().isNot(AsmToken::Equal)) { Err << "expected '='"; @@ -129,8 +339,8 @@ static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value, raw_ostre return true; } -template -static bool parseField(amd_kernel_code_t &C, MCAsmParser &MCParser, +template +static bool parseField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser, raw_ostream &Err) { int64_t Value = 0; if (!expectAbsExpression(MCParser, Value, Err)) @@ -139,39 +349,241 @@ static bool parseField(amd_kernel_code_t &C, MCAsmParser &MCParser, return true; } -template -static bool parseBitField(amd_kernel_code_t &C, MCAsmParser &MCParser, +template +static bool parseBitField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser, raw_ostream &Err) { int64_t Value = 0; if (!expectAbsExpression(MCParser, Value, Err)) return false; - const uint64_t Mask = ((UINT64_C(1) << width) - 1) << shift; + const uint64_t Mask = ((UINT64_C(1) << width) - 1) << shift; C.*ptr &= (T)~Mask; C.*ptr |= (T)((Value << shift) & Mask); return true; } -using ParseFx = bool(*)(amd_kernel_code_t &, MCAsmParser &MCParser, - raw_ostream &Err); +static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, + raw_ostream &Err) { + if (MCParser.getLexer().isNot(AsmToken::Equal)) { + Err << "expected '='"; + return false; + } + MCParser.getLexer().Lex(); + + if (MCParser.parseExpression(Value)) { + Err << "Could not parse expression"; + return false; + } + return true; +} + +using ParseFx = bool (*)(AMDGPUMCKernelCodeT &, MCAsmParser &, raw_ostream &); static ArrayRef getParserTable() { static const ParseFx Table[] = { +#define COMPPGM1(name, aname, AccMacro) \ + COMPPGM(name, aname, G_00B848_##AccMacro, C_00B848_##AccMacro, 0) +#define COMPPGM2(name, aname, AccMacro) \ + COMPPGM(name, aname, G_00B84C_##AccMacro, C_00B84C_##AccMacro, 32) +#define PARSECOMP(Complement, PGMType) \ + [](AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser, \ + raw_ostream &Err) -> bool { \ + MCContext &Ctx = MCParser.getContext(); \ + const MCExpr *Value; \ + if (!parseExpr(MCParser, Value, Err)) \ + return false; \ + auto [Shift, Mask] = getShiftMask(Complement); \ + Value = MaskShiftSet(Value, Mask, Shift, Ctx); \ + const MCExpr *Compl = MCConstantExpr::create(Complement, Ctx); \ + if (PGMType == 0) { \ + C.compute_pgm_resource1_registers = MCBinaryExpr::createAnd( \ + C.compute_pgm_resource1_registers, Compl, Ctx); \ + C.compute_pgm_resource1_registers = MCBinaryExpr::createOr( \ + C.compute_pgm_resource1_registers, Value, Ctx); \ + } else { \ + C.compute_pgm_resource2_registers = MCBinaryExpr::createAnd( \ + C.compute_pgm_resource2_registers, Compl, Ctx); \ + C.compute_pgm_resource2_registers = MCBinaryExpr::createOr( \ + C.compute_pgm_resource2_registers, Value, Ctx); \ + } \ + return true; \ + } #define RECORD(name, altName, print, parse) parse -#include "AMDKernelCodeTInfo.h" +#include "Utils/AMDKernelCodeTInfo.h" #undef RECORD }; return ArrayRef(Table); } -bool llvm::parseAmdKernelCodeField(StringRef ID, - MCAsmParser &MCParser, - amd_kernel_code_t &C, - raw_ostream &Err) { +static void printAmdKernelCodeField(const AMDGPUMCKernelCodeT &C, int FldIndex, + raw_ostream &OS, MCContext &Ctx) { + auto Printer = getPrinterTable()[FldIndex]; + if (Printer) + Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx); +} + +void AMDGPUMCKernelCodeT::initDefault(const MCSubtargetInfo *STI, + MCContext &Ctx, bool InitMCExpr) { + AMDGPUMCKernelCodeT(); + + AMDGPU::initDefaultAMDKernelCodeT(*this, STI); + + if (InitMCExpr) { + const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx); + compute_pgm_resource1_registers = + MCConstantExpr::create(Lo_32(compute_pgm_resource_registers), Ctx); + compute_pgm_resource2_registers = + MCConstantExpr::create(Hi_32(compute_pgm_resource_registers), Ctx); + is_dynamic_callstack = ZeroExpr; + wavefront_sgpr_count = ZeroExpr; + workitem_vgpr_count = ZeroExpr; + workitem_private_segment_byte_size = ZeroExpr; + } +} + +void AMDGPUMCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) { + int64_t Value; + if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value)) + return; + + if (G_00B848_DX10_CLAMP(Value) && AMDGPU::isGFX12Plus(*STI)) { + Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX12+"); + return; + } + + if (G_00B848_IEEE_MODE(Value) && AMDGPU::isGFX12Plus(*STI)) { + Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX12+"); + return; + } + + if (G_00B848_WGP_MODE(Value) && !AMDGPU::isGFX10Plus(*STI)) { + Ctx.reportError({}, "enable_wgp_mode=1 is only allowed on GFX10+"); + return; + } + + if (G_00B848_MEM_ORDERED(Value) && !AMDGPU::isGFX10Plus(*STI)) { + Ctx.reportError({}, "enable_mem_ordered=1 is only allowed on GFX10+"); + return; + } + + if (G_00B848_FWD_PROGRESS(Value) && !AMDGPU::isGFX10Plus(*STI)) { + Ctx.reportError({}, "enable_fwd_progress=1 is only allowed on GFX10+"); + return; + } +} + +const MCExpr *&AMDGPUMCKernelCodeT::getMCExprForIndex(int Index) { + static const auto IndexTable = getMCExprIndexTable(); + return IndexTable[Index](*this); +} + +bool AMDGPUMCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser, + raw_ostream &Err) { const int Idx = get_amd_kernel_code_t_FieldIndex(ID); if (Idx < 0) { Err << "unexpected amd_kernel_code_t field name " << ID; return false; } + + if (hasMCExprVersionTable()[Idx]) { + const MCExpr *Value; + if (!parseExpr(MCParser, Value, Err)) + return false; + getMCExprForIndex(Idx) = Value; + return true; + } auto Parser = getParserTable()[Idx]; - return Parser ? Parser(C, MCParser, Err) : false; + return Parser ? Parser(*this, MCParser, Err) : false; +} + +void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx) { + const int Size = hasMCExprVersionTable().size(); + for (int i = 0; i < Size; ++i) { + OS << "\t\t"; + if (hasMCExprVersionTable()[i]) { + OS << get_amd_kernel_code_t_FldNames()[i + 1] << " = "; + int64_t Val; + const MCExpr *Value = getMCExprForIndex(i); + if (Value->evaluateAsAbsolute(Val)) + OS << Val; + else + Value->print(OS, Ctx.getAsmInfo()); + } else { + printAmdKernelCodeField(*this, i, OS, Ctx); + } + OS << '\n'; + } +} + +void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) { + OS.emitIntValue(amd_kernel_code_version_major, /*Size=*/4); + OS.emitIntValue(amd_kernel_code_version_minor, /*Size=*/4); + OS.emitIntValue(amd_machine_kind, /*Size=*/2); + OS.emitIntValue(amd_machine_version_major, /*Size=*/2); + OS.emitIntValue(amd_machine_version_minor, /*Size=*/2); + OS.emitIntValue(amd_machine_version_stepping, /*Size=*/2); + OS.emitIntValue(kernel_code_entry_byte_offset, /*Size=*/8); + OS.emitIntValue(kernel_code_prefetch_byte_offset, /*Size=*/8); + OS.emitIntValue(kernel_code_prefetch_byte_size, /*Size=*/8); + OS.emitIntValue(reserved0, /*Size=*/8); + + if (compute_pgm_resource1_registers != nullptr) + OS.emitValue(compute_pgm_resource1_registers, /*Size=*/4); + else + OS.emitIntValue(Lo_32(compute_pgm_resource_registers), + /*Size=*/4); + + if (compute_pgm_resource2_registers != nullptr) + OS.emitValue(compute_pgm_resource2_registers, /*Size=*/4); + else + OS.emitIntValue(Hi_32(compute_pgm_resource_registers), + /*Size=*/4); + + if (is_dynamic_callstack != nullptr) { + const MCExpr *CodeProps = MCConstantExpr::create(code_properties, Ctx); + CodeProps = MCBinaryExpr::createOr( + CodeProps, + MaskShiftSet(is_dynamic_callstack, + (1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1, + AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx), + Ctx); + OS.emitValue(CodeProps, /*Size=*/4); + } else + OS.emitIntValue(code_properties, /*Size=*/4); + + if (workitem_private_segment_byte_size != nullptr) + OS.emitValue(workitem_private_segment_byte_size, /*Size=*/4); + else + OS.emitIntValue(0, /*Size=*/4); + + OS.emitIntValue(workgroup_group_segment_byte_size, /*Size=*/4); + OS.emitIntValue(gds_segment_byte_size, /*Size=*/4); + OS.emitIntValue(kernarg_segment_byte_size, /*Size=*/8); + OS.emitIntValue(workgroup_fbarrier_count, /*Size=*/4); + + if (wavefront_sgpr_count != nullptr) + OS.emitValue(wavefront_sgpr_count, /*Size=*/2); + else + OS.emitIntValue(0, /*Size=*/2); + + if (workitem_vgpr_count != nullptr) + OS.emitValue(workitem_vgpr_count, /*Size=*/2); + else + OS.emitIntValue(0, /*Size=*/2); + + OS.emitIntValue(reserved_vgpr_first, /*Size=*/2); + OS.emitIntValue(reserved_vgpr_count, /*Size=*/2); + OS.emitIntValue(reserved_sgpr_first, /*Size=*/2); + OS.emitIntValue(reserved_sgpr_count, /*Size=*/2); + OS.emitIntValue(debug_wavefront_private_segment_offset_sgpr, + /*Size=*/2); + OS.emitIntValue(debug_private_segment_buffer_sgpr, /*Size=*/2); + OS.emitIntValue(kernarg_segment_alignment, /*Size=*/1); + OS.emitIntValue(group_segment_alignment, /*Size=*/1); + OS.emitIntValue(private_segment_alignment, /*Size=*/1); + OS.emitIntValue(wavefront_size, /*Size=*/1); + + OS.emitIntValue(call_convention, /*Size=*/4); + OS.emitBytes(StringRef((const char *)reserved3, /*Size=*/12)); + OS.emitIntValue(runtime_loader_kernel_symbol, /*Size=*/8); + OS.emitBytes(StringRef((const char *)control_directives, /*Size=*/16 * 8)); } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h index 41d0e0d745e5e..6aeb98f1ce147 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h @@ -7,29 +7,84 @@ //===----------------------------------------------------------------------===// // /// \file AMDKernelCodeTUtils.h +/// MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where +/// required. +/// // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H -#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H -struct amd_kernel_code_t; +#include "AMDKernelCodeT.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" namespace llvm { - class MCAsmParser; +class MCContext; +class MCExpr; +class MCStreamer; +class MCSubtargetInfo; class raw_ostream; -class StringRef; +namespace AMDGPU { + +struct AMDGPUMCKernelCodeT { + AMDGPUMCKernelCodeT() = default; + + // Names of most (if not all) members should match the ones used for table + // driven (array) generation in AMDKernelCodeTInfo.h. + uint32_t amd_kernel_code_version_major = 0; + uint32_t amd_kernel_code_version_minor = 0; + uint16_t amd_machine_kind = 0; + uint16_t amd_machine_version_major = 0; + uint16_t amd_machine_version_minor = 0; + uint16_t amd_machine_version_stepping = 0; + int64_t kernel_code_entry_byte_offset = 0; + int64_t kernel_code_prefetch_byte_offset = 0; + uint64_t kernel_code_prefetch_byte_size = 0; + uint64_t reserved0 = 0; + uint64_t compute_pgm_resource_registers = 0; + uint32_t code_properties = 0; + uint32_t workgroup_group_segment_byte_size = 0; + uint32_t gds_segment_byte_size = 0; + uint64_t kernarg_segment_byte_size = 0; + uint32_t workgroup_fbarrier_count = 0; + uint16_t reserved_vgpr_first = 0; + uint16_t reserved_vgpr_count = 0; + uint16_t reserved_sgpr_first = 0; + uint16_t reserved_sgpr_count = 0; + uint16_t debug_wavefront_private_segment_offset_sgpr = 0; + uint16_t debug_private_segment_buffer_sgpr = 0; + uint8_t kernarg_segment_alignment = 0; + uint8_t group_segment_alignment = 0; + uint8_t private_segment_alignment = 0; + uint8_t wavefront_size = 0; + int32_t call_convention = 0; + uint8_t reserved3[12] = {0}; + uint64_t runtime_loader_kernel_symbol = 0; + uint64_t control_directives[16] = {0}; + + const MCExpr *compute_pgm_resource1_registers = nullptr; + const MCExpr *compute_pgm_resource2_registers = nullptr; + + const MCExpr *is_dynamic_callstack = nullptr; + const MCExpr *wavefront_sgpr_count = nullptr; + const MCExpr *workitem_vgpr_count = nullptr; + const MCExpr *workitem_private_segment_byte_size = nullptr; -void printAmdKernelCodeField(const amd_kernel_code_t &C, int FldIndex, - raw_ostream &OS); + void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, + bool InitMCExpr = true); + void validate(const MCSubtargetInfo *STI, MCContext &Ctx); -void dumpAmdKernelCode(const amd_kernel_code_t *C, raw_ostream &OS, - const char *tab); + const MCExpr *&getMCExprForIndex(int Index); -bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser, - amd_kernel_code_t &C, raw_ostream &Err); + bool ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser, raw_ostream &Err); + void EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx); + void EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx); +}; +} // end namespace AMDGPU } // end namespace llvm -#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H +#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELCODET_H diff --git a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt index 19d3b690b1315..2f4ce8eaf1d60 100644 --- a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_component_library(LLVMAMDGPUUtils CodeGenTypes Core MC + MCParser Support TargetParser diff --git a/llvm/test/CodeGen/AMDGPU/kernel_code_t_recurse.ll b/llvm/test/CodeGen/AMDGPU/kernel_code_t_recurse.ll new file mode 100644 index 0000000000000..cdd6e88dd103b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/kernel_code_t_recurse.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=amdgcn-mesa-mesa3d < %s | FileCheck %s + +; CHECK-LABEL: non_kernel_recursion: +define void @non_kernel_recursion(i32 %val) #2 { + %cmp = icmp eq i32 %val, 0 + br i1 %cmp, label %ret, label %call + +call: + %val.sub1 = sub i32 %val, 1 + call void @non_kernel_recursion(i32 %val.sub1) + br label %ret + +ret: + ret void +} + +; CHECK-LABEL: kernel_caller_recursion: +; CHECK: .amd_kernel_code_t +; CHECK: is_dynamic_callstack = 1 +; CHECK: .end_amd_kernel_code_t +define amdgpu_kernel void @kernel_caller_recursion(i32 %n) #0 { + call void @non_kernel_recursion(i32 %n) + ret void +} diff --git a/llvm/test/MC/AMDGPU/amd_kernel_code_t.s b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s new file mode 100644 index 0000000000000..052ec0bfabb84 --- /dev/null +++ b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s @@ -0,0 +1,171 @@ +; RUN: llvm-mc -triple=amdgcn-mesa-mesa3d -mcpu=gfx900 -filetype=asm < %s | FileCheck --check-prefix=ASM %s +; RUN: llvm-mc -triple=amdgcn-mesa-mesa3d -mcpu=gfx900 -filetype=obj < %s > %t +; RUN: llvm-objdump -s %t | FileCheck --check-prefix=OBJDUMP %s + +; OBJDUMP: Contents of section .known_is_dynamic_callstack: +; OBJDUMP: 0030 00000000 00000000 00001000 00000000 + +; OBJDUMP: Contents of section .known_wavefront_sgpr_count: +; OBJDUMP: 0050 00000000 01000000 00000000 00000000 + +; OBJDUMP: Contents of section .known_workitem_vgpr_count: +; OBJDUMP: 0050 00000000 00000100 00000000 00000000 + +; OBJDUMP: Contents of section .known_workitem_private_segment_byte_size: +; OBJDUMP: 0030 00000000 00000000 00000000 01000000 + +; OBJDUMP: Contents of section .known_granulated_workitem_vgpr_count: +; OBJDUMP: 0030 01000000 00000000 00000000 00000000 + +; OBJDUMP: Contents of section .known_enable_sgpr_workgroup_id_x: +; OBJDUMP: 0030 00000000 80000000 00000000 00000000 + +; OBJDUMP: Contents of section .unknown_is_dynamic_callstack: +; OBJDUMP: 0030 00000000 00000000 00001000 00000000 + +; OBJDUMP: Contents of section .unknown_wavefront_sgpr_count: +; OBJDUMP: 0050 00000000 01000000 00000000 00000000 + +; OBJDUMP: Contents of section .unknown_workitem_vgpr_count: +; OBJDUMP: 0050 00000000 00000100 00000000 00000000 + +; OBJDUMP: Contents of section .unknown_workitem_private_segment_byte_size: +; OBJDUMP: 0030 00000000 00000000 00000000 01000000 + +; OBJDUMP: Contents of section .unknown_granulated_workitem_vgpr_count: +; OBJDUMP: 0030 01000000 00000000 00000000 00000000 + +; OBJDUMP: Contents of section .unknown_enable_sgpr_workgroup_id_x: +; OBJDUMP: 0030 00000000 80000000 00000000 00000000 + +.set known, 1 + +; ASM-LABEL: known_is_dynamic_callstack: +; ASM: is_dynamic_callstack = 1 +.section .known_is_dynamic_callstack +known_is_dynamic_callstack: + .amd_kernel_code_t + is_dynamic_callstack = known + .end_amd_kernel_code_t + s_endpgm + +; ASM-LABEL: known_wavefront_sgpr_count: +; ASM: wavefront_sgpr_count = 1 +.section .known_wavefront_sgpr_count +known_wavefront_sgpr_count: + .amd_kernel_code_t + wavefront_sgpr_count = known + .end_amd_kernel_code_t + s_endpgm + +; ASM-LABEL: known_workitem_vgpr_count: +; ASM: workitem_vgpr_count = 1 +.section .known_workitem_vgpr_count +known_workitem_vgpr_count: + .amd_kernel_code_t + workitem_vgpr_count = known + .end_amd_kernel_code_t + s_endpgm + +; ASM-LABEL: known_workitem_private_segment_byte_size: +; ASM: workitem_private_segment_byte_size = 1 +.section .known_workitem_private_segment_byte_size +known_workitem_private_segment_byte_size: + .amd_kernel_code_t + workitem_private_segment_byte_size = known + .end_amd_kernel_code_t + s_endpgm + +; ASM-LABEL: known_granulated_workitem_vgpr_count: +; ASM: granulated_workitem_vgpr_count = 1 +.section .known_granulated_workitem_vgpr_count +known_granulated_workitem_vgpr_count: + .amd_kernel_code_t + granulated_workitem_vgpr_count = known + .end_amd_kernel_code_t + s_endpgm + +; ASM-LABEL: known_enable_sgpr_workgroup_id_x: +; ASM: enable_sgpr_workgroup_id_x = 1 +.section .known_enable_sgpr_workgroup_id_x +known_enable_sgpr_workgroup_id_x: + .amd_kernel_code_t + enable_sgpr_workgroup_id_x = known + .end_amd_kernel_code_t + s_endpgm + +; ASM-LABEL: unknown_is_dynamic_callstack: +; ASM: is_dynamic_callstack = unknown +.section .unknown_is_dynamic_callstack +unknown_is_dynamic_callstack: + .amd_kernel_code_t + is_dynamic_callstack = unknown + .end_amd_kernel_code_t + s_endpgm + +; ASM-LABEL: unknown_wavefront_sgpr_count: +; ASM: wavefront_sgpr_count = unknown +.section .unknown_wavefront_sgpr_count +unknown_wavefront_sgpr_count: + .amd_kernel_code_t + wavefront_sgpr_count = unknown + .end_amd_kernel_code_t + s_endpgm + +; ASM-LABEL: unknown_workitem_vgpr_count: +; ASM: workitem_vgpr_count = unknown +.section .unknown_workitem_vgpr_count +unknown_workitem_vgpr_count: + .amd_kernel_code_t + workitem_vgpr_count = unknown + .end_amd_kernel_code_t + s_endpgm + +; ASM-LABEL: unknown_workitem_private_segment_byte_size: +; ASM: workitem_private_segment_byte_size = unknown +.section .unknown_workitem_private_segment_byte_size +unknown_workitem_private_segment_byte_size: + .amd_kernel_code_t + workitem_private_segment_byte_size = unknown + .end_amd_kernel_code_t + s_endpgm + +; ASM-LABEL: unknown_granulated_workitem_vgpr_count: +; ASM: granulated_workitem_vgpr_count = ((0&4294967232)|(unknown&63))&63 +; ASM: granulated_wavefront_sgpr_count = (((0&4294967232)|(unknown&63))>>6)&15 +; ASM: priority = (((0&4294967232)|(unknown&63))>>10)&3 +; ASM: float_mode = (((0&4294967232)|(unknown&63))>>12)&255 +; ASM: priv = (((0&4294967232)|(unknown&63))>>20)&1 +; ASM: enable_dx10_clamp = (((0&4294967232)|(unknown&63))>>21)&1 +; ASM: debug_mode = (((0&4294967232)|(unknown&63))>>22)&1 +; ASM: enable_ieee_mode = (((0&4294967232)|(unknown&63))>>23)&1 +; ASM: enable_wgp_mode = (((0&4294967232)|(unknown&63))>>29)&1 +; ASM: enable_mem_ordered = (((0&4294967232)|(unknown&63))>>30)&1 +; ASM: enable_fwd_progress = (((0&4294967232)|(unknown&63))>>31)&1 +.section .unknown_granulated_workitem_vgpr_count +unknown_granulated_workitem_vgpr_count: + .amd_kernel_code_t + granulated_workitem_vgpr_count = unknown + .end_amd_kernel_code_t + s_endpgm + +; ASM-LABEL: unknown_enable_sgpr_workgroup_id_x: +; ASM: enable_sgpr_private_segment_wave_byte_offset = ((0&4294967167)|((unknown&1)<<7))&1 +; ASM: user_sgpr_count = (((0&4294967167)|((unknown&1)<<7))>>1)&31 +; ASM: enable_trap_handler = (((0&4294967167)|((unknown&1)<<7))>>6)&1 +; ASM: enable_sgpr_workgroup_id_x = (((0&4294967167)|((unknown&1)<<7))>>7)&1 +; ASM: enable_sgpr_workgroup_id_y = (((0&4294967167)|((unknown&1)<<7))>>8)&1 +; ASM: enable_sgpr_workgroup_id_z = (((0&4294967167)|((unknown&1)<<7))>>9)&1 +; ASM: enable_sgpr_workgroup_info = (((0&4294967167)|((unknown&1)<<7))>>10)&1 +; ASM: enable_vgpr_workitem_id = (((0&4294967167)|((unknown&1)<<7))>>11)&3 +; ASM: enable_exception_msb = (((0&4294967167)|((unknown&1)<<7))>>13)&3 +; ASM: granulated_lds_size = (((0&4294967167)|((unknown&1)<<7))>>15)&511 +; ASM: enable_exception = (((0&4294967167)|((unknown&1)<<7))>>24)&127 +.section .unknown_enable_sgpr_workgroup_id_x +unknown_enable_sgpr_workgroup_id_x: + .amd_kernel_code_t + enable_sgpr_workgroup_id_x = unknown + .end_amd_kernel_code_t + s_endpgm + +.set unknown, 1