Skip to content

Commit

Permalink
[AMDGPU] W/a for gfx940 byte0 fp8 conversion bug
Browse files Browse the repository at this point in the history
VOP1 form of these do not work.

Differential Revision: https://reviews.llvm.org/D157683
  • Loading branch information
rampitec committed Aug 11, 2023
1 parent 3e596ed commit 02046ad
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 11 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1862,6 +1862,9 @@ def HasGDS : Predicate<"Subtarget->hasGDS()">;

def HasGWS : Predicate<"Subtarget->hasGWS()">;

def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">;
def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;

// Include AMDGPU TD files
include "SISchedule.td"
include "GCNProcessors.td"
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1172,6 +1172,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// \returns true if the target supports the pre-NGG legacy geometry path.
bool hasLegacyGeometry() const { return getGeneration() < GFX11; }

// \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
bool hasCvtFP8VOP1Bug() const { return true; }

/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
Expand Down
28 changes: 19 additions & 9 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -584,18 +584,28 @@ let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0,
}

class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
(f32 (node i32:$src, index)),
!if (index,
(inst_sdwa 0, $src, 0, 0, index),
(inst_e32 $src))
(inst_sdwa 0, $src, 0, 0, index)
>;

foreach Index = [0, 1, 2, 3] in {
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index,
V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>;
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index,
V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>;
let OtherPredicates = [HasCvtFP8VOP1Bug] in {
def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
(V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
(V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>;
}

let OtherPredicates = [HasNoCvtFP8VOP1Bug] in {
def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
(V_CVT_F32_FP8_e32 $src)>;
def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)),
(V_CVT_F32_BF8_e32 $src)>;
}

foreach Index = [1, 2, 3] in {
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
}

class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ declare i32 @llvm.amdgcn.cvt.sr.bf8.f32(float, i32, i32, i32)
declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32)

; GCN-LABEL: {{^}}test_cvt_f32_bf8_byte0:
; GCN: v_cvt_f32_bf8_e32 v0, v0{{$}}
; GCN: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_0{{$}}
define float @test_cvt_f32_bf8_byte0(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
ret float %ret
Expand Down Expand Up @@ -38,7 +38,7 @@ define float @test_cvt_f32_bf8_byte3(i32 %a) {
}

; GCN-LABEL: {{^}}test_cvt_f32_fp8_byte0:
; GCN: v_cvt_f32_fp8_e32 v0, v0{{$}}
; GCN: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_0{{$}}
define float @test_cvt_f32_fp8_byte0(i32 %a) {
%ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0)
ret float %ret
Expand Down

0 comments on commit 02046ad

Please sign in to comment.