diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 6628e8f265fe4..61ec8b79bf054 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -256,10 +256,10 @@ TARGET_BUILTIN(__builtin_amdgcn_sudot4, "iIbiIbiiIb", "nc", "dot8-insts") TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSiIb", "nc", "dot1-insts") TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot7-insts") TARGET_BUILTIN(__builtin_amdgcn_sudot8, "iIbiIbiiIb", "nc", "dot8-insts") -TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_bf8, "fUiUif", "nc", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_fp8, "fUiUif", "nc", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_fp8, "fUiUif", "nc", "gfx12-insts") -TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_bf8, "fUiUif", "nc", "gfx12-insts") +TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_bf8, "fUiUif", "nc", "dot11-insts") +TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_fp8, "fUiUif", "nc", "dot11-insts") +TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_fp8, "fUiUif", "nc", "dot11-insts") +TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_bf8, "fUiUif", "nc", "dot11-insts") //===----------------------------------------------------------------------===// // GFX10+ only builtins. diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 9c8ca0bb96f61..7387f9a22f0df 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -100,8 +100,8 @@ // GFX1103: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1150: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl index f5317683d0ff9..ce36a807a6c0e 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl @@ -50,8 +50,8 @@ kernel void builtins_amdgcn_dl_insts_err( iOut[3] = __builtin_amdgcn_sudot8(false, A, true, B, C, false); // expected-error {{'__builtin_amdgcn_sudot8' needs target feature dot8-insts}} iOut[4] = __builtin_amdgcn_sudot8(true, A, false, B, C, true); // expected-error {{'__builtin_amdgcn_sudot8' needs target feature dot8-insts}} - fOut[5] = __builtin_amdgcn_dot4_f32_fp8_bf8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_fp8_bf8' needs target feature gfx12-insts}} - fOut[6] = __builtin_amdgcn_dot4_f32_bf8_fp8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_bf8_fp8' needs target feature gfx12-insts}} - fOut[7] = __builtin_amdgcn_dot4_f32_fp8_fp8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_fp8_fp8' needs target feature gfx12-insts}} - fOut[8] = __builtin_amdgcn_dot4_f32_bf8_bf8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_bf8_bf8' needs target feature gfx12-insts}} + fOut[5] = __builtin_amdgcn_dot4_f32_fp8_bf8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_fp8_bf8' needs target feature dot11-insts}} + fOut[6] = __builtin_amdgcn_dot4_f32_bf8_fp8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_bf8_fp8' needs target feature dot11-insts}} + fOut[7] = __builtin_amdgcn_dot4_f32_fp8_fp8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_fp8_fp8' needs target feature dot11-insts}} + fOut[8] = __builtin_amdgcn_dot4_f32_bf8_bf8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_bf8_bf8' needs target feature dot11-insts}} } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 8906c46f279e2..3942354767691 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -650,6 +650,12 @@ def FeatureDot10Insts : SubtargetFeature<"dot10-insts", "Has v_dot2_f32_f16 instruction" >; +def FeatureDot11Insts : SubtargetFeature<"dot11-insts", + "HasDot11Insts", + "true", + "Has v_dot4_f32_fp8_fp8, v_dot4_f32_fp8_bf8, v_dot4_f32_bf8_fp8, v_dot4_f32_bf8_bf8 instructions" +>; + def FeatureMAIInsts : SubtargetFeature<"mai-insts", "HasMAIInsts", "true", @@ -1521,6 +1527,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureDot8Insts, FeatureDot9Insts, FeatureDot10Insts, + FeatureDot11Insts, FeatureNSAEncoding, FeaturePartialNSAEncoding, FeatureWavefrontSize32, @@ -2029,6 +2036,9 @@ def HasDot9Insts : Predicate<"Subtarget->hasDot9Insts()">, def HasDot10Insts : Predicate<"Subtarget->hasDot10Insts()">, AssemblerPredicate<(all_of FeatureDot10Insts)>; +def HasDot11Insts : Predicate<"Subtarget->hasDot11Insts()">, + AssemblerPredicate<(all_of FeatureDot11Insts)>; + def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">, AssemblerPredicate<(all_of FeatureGetWaveIdInst)>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 3283ac72aa4dd..b6c01da7d98eb 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -153,6 +153,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasDot8Insts = false; bool HasDot9Insts = false; bool HasDot10Insts = false; + bool HasDot11Insts = false; bool HasMAIInsts = false; bool HasFP8Insts = false; bool HasFP8ConversionInsts = false; @@ -793,6 +794,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return HasDot10Insts; } + bool hasDot11Insts() const { + return HasDot11Insts; + } + bool hasMAIInsts() const { return HasMAIInsts; } diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index ac3c8f95306bc..e1131bbb78d3f 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -480,10 +480,12 @@ multiclass VOP3PDOTF8Inst { i32:$src2_modifiers, f32:$src2)>; } +let OtherPredicates = [HasDot11Insts] in { defm V_DOT4_F32_FP8_BF8 : VOP3PDOTF8Inst<"v_dot4_f32_fp8_bf8", int_amdgcn_dot4_f32_fp8_bf8>; defm V_DOT4_F32_BF8_FP8 : VOP3PDOTF8Inst<"v_dot4_f32_bf8_fp8", int_amdgcn_dot4_f32_bf8_fp8>; defm V_DOT4_F32_FP8_FP8 : VOP3PDOTF8Inst<"v_dot4_f32_fp8_fp8", int_amdgcn_dot4_f32_fp8_fp8>; defm V_DOT4_F32_BF8_BF8 : VOP3PDOTF8Inst<"v_dot4_f32_bf8_bf8", int_amdgcn_dot4_f32_bf8_bf8>; +} def : UDot2Pat; def : SDot2Pat; diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index a31027c59ee9d..0d784a79e5bac 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -318,6 +318,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["dot8-insts"] = true; Features["dot9-insts"] = true; Features["dot10-insts"] = true; + Features["dot11-insts"] = true; Features["dl-insts"] = true; Features["atomic-ds-pk-add-16-insts"] = true; Features["atomic-flat-pk-add-16-insts"] = true;