diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 63044b08f4830..facaf8fd7145b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1862,6 +1862,9 @@ def HasGDS : Predicate<"Subtarget->hasGDS()">; def HasGWS : Predicate<"Subtarget->hasGWS()">; +def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">; +def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index e1a4a1e76f84a..27438e9a9fe35 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1172,6 +1172,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // \returns true if the target supports the pre-NGG legacy geometry path. bool hasLegacyGeometry() const { return getGeneration() < GFX11; } + // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable. + bool hasCvtFP8VOP1Bug() const { return true; } + /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { return AMDGPU::IsaInfo::getSGPRAllocGranule(this); diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 1a8efc6e3df20..6275daee14426 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -584,18 +584,28 @@ let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0, } class Cvt_F32_F8_Pat : GCNPat< + VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< (f32 (node i32:$src, index)), - !if (index, - (inst_sdwa 0, $src, 0, 0, index), - (inst_e32 $src)) + (inst_sdwa 0, $src, 0, 0, index) >; -foreach Index = [0, 1, 2, 3] in { - def : Cvt_F32_F8_Pat; - def : Cvt_F32_F8_Pat; +let OtherPredicates = [HasCvtFP8VOP1Bug] in { + def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), + (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>; + def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), + (V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>; +} + +let OtherPredicates = [HasNoCvtFP8VOP1Bug] in { + def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), + (V_CVT_F32_FP8_e32 $src)>; + def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), + (V_CVT_F32_BF8_e32 $src)>; +} + +foreach Index = [1, 2, 3] in { + def : Cvt_F32_F8_Pat; + def : Cvt_F32_F8_Pat; } class Cvt_PK_F32_F8_Pat