diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index b2d1011eb506c..eaa1870f4be28 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1238,6 +1238,19 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst", // Subtarget Features (options and debugging) //===------------------------------------------------------------===// +// Ugly hack to accomodate assembling modules with mixed +// wavesizes. Ideally we would have a mapping symbol in assembly which +// would keep track of which sections of code should be treated as +// wave32 and wave64. Instead what users do is assemble with both +// wavesizes enabled. We translate this into this special mode so this +// only influences assembler behavior and nothing else. +def FeatureAssemblerPermissiveWavesize : SubtargetFeature< + "assembler-permissive-wavesize", + "AssemblerPermissiveWavesize", + "true", + "allow parsing wave32 and wave64 variants of instructions" +>; + class FeatureMaxPrivateElementSize : SubtargetFeature< "max-private-element-size-"#size, "MaxPrivateElementSize", diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 21dfdfd6bed04..dfbde85231a6e 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1246,6 +1246,12 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { // AsmParser //===----------------------------------------------------------------------===// +// TODO: define GET_SUBTARGET_FEATURE_NAME +#define GET_REGISTER_MATCHER +#include "AMDGPUGenAsmMatcher.inc" +#undef GET_REGISTER_MATCHER +#undef GET_SUBTARGET_FEATURE_NAME + // Holds info related to the current kernel, e.g. count of SGPRs used. // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next // .amdgpu_hsa_kernel or at EOF. @@ -1536,6 +1542,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser { return AMDGPU::isGFX10_BEncoding(getSTI()); } + bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; } + + bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; } + bool hasInv2PiInlineImm() const { return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; } @@ -1603,6 +1613,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser { return &MII; } + // FIXME: This should not be used. Instead, should use queries derived from + // getAvailableFeatures(). const FeatureBitset &getFeatureBits() const { return getSTI().getFeatureBits(); } @@ -2259,9 +2271,8 @@ bool AMDGPUOperand::isSDWAInt32Operand() const { } bool AMDGPUOperand::isBoolReg() const { - auto FB = AsmParser->getFeatureBits(); - return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) || - (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32())); + return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) || + (AsmParser->isWave32() && isSCSrc_b32())); } uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const @@ -5025,9 +5036,8 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, // Check if VCC register matches wavefront size bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const { - auto FB = getFeatureBits(); - return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || - (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); + return (Reg == AMDGPU::VCC && isWave64()) || + (Reg == AMDGPU::VCC_LO && isWave32()); } // One unique literal can be used. VOP3 literal is only allowed in GFX10+ @@ -5717,7 +5727,7 @@ bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, // Check if this instruction may be used with a different wavesize. if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) { - + // FIXME: Use getAvailableFeatures, and do not manually recompute FeatureBitset FeaturesWS32 = getFeatureBits(); FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64) .flip(AMDGPU::FeatureWavefrontSize32); @@ -6472,10 +6482,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { if (!isGFX10Plus()) return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); - if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) + if (!isWave32()) return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); } else { - if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) + if (!isWave64()) return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); } } @@ -6484,10 +6494,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, if (C.wavefront_size == 5) { if (!isGFX10Plus()) return TokError("wavefront_size=5 is only allowed on GFX10+"); - if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) + if (!isWave32()) return TokError("wavefront_size=5 requires +WavefrontSize32"); } else if (C.wavefront_size == 6) { - if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) + if (!isWave64()) return TokError("wavefront_size=6 requires +WavefrontSize64"); } } @@ -10390,7 +10400,6 @@ LLVMInitializeAMDGPUAsmParser() { RegisterMCAsmParser B(getTheGCNTarget()); } -#define GET_REGISTER_MATCHER #define GET_MATCHER_IMPLEMENTATION #define GET_MNEMONIC_SPELL_CHECKER #define GET_MNEMONIC_CHECKER diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f5367f3b88920..a54d6651c25c1 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -99,6 +99,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool EnableDS128 = false; bool EnablePRTStrictNull = false; bool DumpCode = false; + bool AssemblerPermissiveWavesize = false; // Subtarget statically properties set by tablegen bool FP64 = false; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index f2e2d0ed3f8a6..013cfeb364048 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -82,20 +82,36 @@ createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *STI = createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); + bool IsWave64 = STI->hasFeature(AMDGPU::FeatureWavefrontSize64); + bool IsWave32 = STI->hasFeature(AMDGPU::FeatureWavefrontSize32); + // FIXME: We should error for the default target. if (STI->getFeatureBits().none()) STI->ToggleFeature(AMDGPU::FeatureSouthernIslands); - if (!STI->hasFeature(AMDGPU::FeatureWavefrontSize64) && - !STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) { + if (!IsWave64 && !IsWave32) { // If there is no default wave size it must be a generation before gfx10, // these have FeatureWavefrontSize64 in their definition already. For gfx10+ // set wave32 as a default. STI->ToggleFeature(AMDGPU::isGFX10Plus(*STI) ? AMDGPU::FeatureWavefrontSize32 : AMDGPU::FeatureWavefrontSize64); + } else if (IsWave64 && IsWave32) { + // The wave size is mutually exclusive. If both somehow end up set, wave32 + // wins if supported. + STI->ToggleFeature(AMDGPU::supportsWave32(*STI) + ? AMDGPU::FeatureWavefrontSize64 + : AMDGPU::FeatureWavefrontSize32); + + // If both wavesizes were manually requested, hack in a feature to permit + // assembling modules with mixed wavesizes. + STI->ToggleFeature(AMDGPU::FeatureAssemblerPermissiveWavesize); } + assert((STI->hasFeature(AMDGPU::FeatureWavefrontSize64) != + STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) && + "wavesize features are mutually exclusive"); + return STI; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index fb2cd04b364d7..18a53931a6390 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -7,9 +7,11 @@ //===----------------------------------------------------------------------===// def isWave32 : Predicate<"Subtarget->isWave32()">, - AssemblerPredicate <(all_of FeatureWavefrontSize32)>; + AssemblerPredicate <(any_of FeatureWavefrontSize32, + FeatureAssemblerPermissiveWavesize)>; def isWave64 : Predicate<"Subtarget->isWave64()">, - AssemblerPredicate <(all_of FeatureWavefrontSize64)>; + AssemblerPredicate <(any_of FeatureWavefrontSize64, + FeatureAssemblerPermissiveWavesize)>; class AMDGPUMnemonicAlias : MnemonicAlias, PredicateControl; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 37b0262966160..2b9c063f42a5e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1568,6 +1568,11 @@ bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); bool hasMAIInsts(const MCSubtargetInfo &STI); bool hasVOPD(const MCSubtargetInfo &STI); bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI); + +inline bool supportsWave32(const MCSubtargetInfo &STI) { + return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI); +} + int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); unsigned hasKernargPreload(const MCSubtargetInfo &STI); bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST); diff --git a/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s b/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s new file mode 100644 index 0000000000000..3a8656c392ff5 --- /dev/null +++ b/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s @@ -0,0 +1,23 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -o - %s | FileCheck -check-prefix=GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32 -o - %s | FileCheck -check-prefix=GFX900 %s + +// Make sure setting both modes is supported at the same time. +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 %s | FileCheck -check-prefixes=GFX10 %s + +// Test that there is no assertion when using an explicit +// wavefrontsize attribute on a target which does not support it. + +// GFX1250: v_add_f64_e32 v[0:1], 1.0, v[0:1] +// GFX900: v_add_f64 v[0:1], 1.0, v[0:1] +// GFX10: v_add_f64 v[0:1], 1.0, v[0:1] +v_add_f64 v[0:1], 1.0, v[0:1] + +// GFX1250: v_cmp_eq_u32_e64 s[0:1], 1.0, s1 +// GFX900: v_cmp_eq_u32_e64 s[0:1], 1.0, s1 +// GFX10: v_cmp_eq_u32_e64 s[0:1], 1.0, s1 +v_cmp_eq_u32_e64 s[0:1], 1.0, s1 + +// GFX1250: v_cndmask_b32_e64 v1, v2, v3, s[0:1] +// GFX900: v_cndmask_b32_e64 v1, v2, v3, s[0:1] +// GFX10: v_cndmask_b32_e64 v1, v2, v3, s[0:1] +v_cndmask_b32 v1, v2, v3, s[0:1] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt index 2156a682337e8..336f4b2e88f47 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt @@ -1,6 +1,6 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W32 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W64 %s - +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W32 %s # W32: v_cmp_class_f32_e32 vcc_lo, -1, v2 ; encoding: [0xc1,0x04,0x10,0x7d] # W64: v_cmp_class_f32_e32 vcc, -1, v2 ; encoding: [0xc1,0x04,0x10,0x7d] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_wave64_feature.s b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_wave64_feature.s new file mode 100644 index 0000000000000..bdea636a9efe3 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_wave64_feature.s @@ -0,0 +1,13 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -disassemble -o - %s | FileCheck %s + +# Make sure there's no assertion when trying to use an unsupported +# wave64 on a wave32-only target + +# CHECK: v_add_f64_e32 v[0:1], 1.0, v[0:1] +0xf2,0x00,0x00,0x04 + +# CHECK: v_cmp_eq_u32_e64 s[0:1], 1.0, s1 +0x00,0x00,0x4a,0xd4,0xf2,0x02,0x00,0x00 + +# CHECK: v_cndmask_b32_e64 v1, v2, v3, s[0:1] +0x01,0x00,0x01,0xd5,0x02,0x07,0x02,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_wave32_feature.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_wave32_feature.txt new file mode 100644 index 0000000000000..40494b3dfa1ea --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_wave32_feature.txt @@ -0,0 +1,13 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32 -disassemble -o - %s | FileCheck %s + +# Make sure there's no assertion when trying to use an unsupported +# wave32 on a wave64-only target + +# CHECK: v_add_f64 v[0:1], 1.0, v[0:1] +0x00,0x00,0x80,0xd2,0xf2,0x00,0x02,0x00 + +# CHECK: v_cmp_eq_u32_e64 s[0:1], 1.0, s1 +0x00,0x00,0xca,0xd0,0xf2,0x02,0x00,0x00 + +# CHECK: v_cndmask_b32_e64 v1, v2, v3, s[0:1] +0x01,0x00,0x00,0xd1,0x02,0x07,0x02,0x00