Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMDGPU: Use HasFP8ConversionInsts appropriately, NFC #82024

Closed
wants to merge 0 commits into from

Conversation

changpeng
Copy link
Contributor

The corresponding fp8 conversion instructions are available for a sub-target when and only when the subtarget "HasFP8ConversionInsts". We should not assume all the future sub-targets (gfx12+) have this feature.

@llvmbot
Copy link
Collaborator

llvmbot commented Feb 16, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Changpeng Fang (changpeng)

Changes

The corresponding fp8 conversion instructions are available for a sub-target when and only when the subtarget "HasFP8ConversionInsts". We should not assume all the future sub-targets (gfx12+) have this feature.


Full diff: https://github.com/llvm/llvm-project/pull/82024.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+5-4)
  • (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+4)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 5461c645e608fe..80071f919fc88b 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -635,8 +635,8 @@ def VOPProfile_Base_CVT_F32_F8_OpSel : VOPProfile<[f32, i32, untyped, untyped]>
   let Src1VOP3DPP = Src1RC64;
 }
 
-let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 0,
-    SchedRW = [WriteFloatCvt] in {
+let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts],
+    mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in {
   defm V_CVT_F32_FP8_OP_SEL    : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
   defm V_CVT_F32_BF8_OP_SEL    : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
   defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>;
@@ -653,7 +653,7 @@ class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index,
          (inst_e32 $src))
 >;
 
-let SubtargetPredicate = isGFX12Plus in {
+let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] in {
   foreach Index = [0, 1, 2, 3] in {
     def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_fp8, Index,
                                V_CVT_F32_FP8_e32, V_CVT_F32_FP8_OP_SEL_e64>;
@@ -898,7 +898,7 @@ multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op,
   VOP1_Real_e32_with_name<Gen, op, opName, asmName>,
   VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>;
 
-
+let OtherPredicates = [HasFP8ConversionInsts] in {
 // Define VOP1 instructions using the pseudo instruction with its old profile and
 // VOP3 using the OpSel profile for the pseudo instruction.
 defm V_CVT_F32_FP8      : VOP1_Real_NO_VOP3_with_name_gfx12<0x06c, "V_CVT_F32_FP8", "v_cvt_f32_fp8">;
@@ -912,6 +912,7 @@ defm V_CVT_PK_F32_FP8   : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8
 
 defm V_CVT_PK_F32_BF8   : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8", "v_cvt_pk_f32_bf8">;
 defm V_CVT_PK_F32_BF8   : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_OP_SEL", "v_cvt_pk_f32_bf8">;
+}
 
 defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c,
   "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 8d965d3b9041d5..e8766174a40c9a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -667,6 +667,7 @@ class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst>
           !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, 0)
 >;
 
+let SubtargetPredicate = HasFP8ConversionInsts in {
 foreach Index = [0, -1] in {
   def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_fp8_f32, Index, V_CVT_PK_FP8_F32_e64>;
   def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>;
@@ -676,6 +677,7 @@ foreach Index = [0, 1, 2, 3] in {
   def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>;
   def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>;
 }
+} // End SubtargetPredicate = HasFP8ConversionInsts.
 
 class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
   // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
@@ -1038,10 +1040,12 @@ defm V_MAXIMUM_F16        : VOP3Only_Realtriple_t16_gfx12<0x368>;
 defm V_PERMLANE16_VAR_B32  : VOP3Only_Real_Base_gfx12<0x30f>;
 defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
 
+let SubtargetPredicate = HasFP8ConversionInsts in {
 defm V_CVT_PK_FP8_F32  : VOP3Only_Realtriple_gfx12<0x369>;
 defm V_CVT_PK_BF8_F32  : VOP3Only_Realtriple_gfx12<0x36a>;
 defm V_CVT_SR_FP8_F32  : VOP3Only_Realtriple_gfx12<0x36b>;
 defm V_CVT_SR_BF8_F32  : VOP3Only_Realtriple_gfx12<0x36c>;
+}
 
 //===----------------------------------------------------------------------===//
 // GFX11, GFX12

@@ -898,7 +898,7 @@ multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op,
VOP1_Real_e32_with_name<Gen, op, opName, asmName>,
VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>;


let OtherPredicates = [HasFP8ConversionInsts] in {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Real shall just copy it from pseudo.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I am going to double check whether we missed something in the pseudo definitions. Thanks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a little complicate. For _DPP8 and DPP16 reals, only OtherPredicates is copied from pseudo.
However, in VOP3_Real_dpp8_with_name, OtherPredicates is over-written after copied from pseudo:
OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], [TruePredicate])
Can we also keep original OtherPredcates (copied from pseudo) when we add UseRealTrue16Insts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cc @kosarev @Sisyph

I don't think there's an easy way to add UseRealTrue16Insts to OtherPredicates.

As a workaround you could set SubtargetPredicate = HasFP8ConversionInsts here, instead of using OtherPredicates? Or perhaps cleaner we could introduce a new True16Predicate = ... for use in VOP3_Real_dpp8_with_name to avoid overwriting OtherPredicates?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was thinking about adding True16Predicate as well. #82245 attempts to implement that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cc @kosarev @Sisyph

I don't think there's an easy way to add UseRealTrue16Insts to OtherPredicates.

As a workaround you could set SubtargetPredicate = HasFP8ConversionInsts here, instead of using OtherPredicates? Or perhaps cleaner we could introduce a new True16Predicate = ... for use in VOP3_Real_dpp8_with_name to avoid overwriting OtherPredicates?

Do you think listconcat works for OtherPredicates to add an additional predicate?

SubtargetPredicate does not work for two reasons"

  1. SubtargetPredicate is not copied from pseudo to real for dpp8 and dpp16, which means SubtargetPrediacte will get lost (unused) in real.
  2. We have the same issue to over-write SubtargetPredicate (Base_VOP3_DPP16)
    let SubtargetPredicate = HasDPP16;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was thinking about adding True16Predicate as well. #82245 attempts to implement that.

Can you explain why listconcat does not work for OtherPredicates to add an additional predicate?
I think it should work, and we don;t have to introduce a new predicate.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed in #82245 (comment).

@@ -635,8 +635,8 @@ def VOPProfile_Base_CVT_F32_F8_OpSel : VOPProfile<[f32, i32, untyped, untyped]>
let Src1VOP3DPP = Src1RC64;
}

let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 0,
SchedRW = [WriteFloatCvt] in {
let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just SubtargetPredicate = HasFP8ConversionInsts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these instructions have gfx940 and gfx12+ versions. Not sure whether it will affect gfx940 if we only use SubtargetPredicate = HasFP8ConversionInsts.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see... That sound right.

@changpeng
Copy link
Contributor Author

This PR was inadvertently closed when I tried to rebase. I opened a new PR for the same work:
#82433

changpeng added a commit that referenced this pull request Feb 21, 2024
The corresponding fp8 conversion instructions are available for a
subtarget when and only when the subtarget "HasFP8ConversionInsts". We
should not assume all the future subtargets (gfx12+) have
FP8ConversionInsts.
  In this patch, we use OtherPredicates to carry HasFP8ConversionInsts
feature. This is because SubtargetPredicate is not copied from pseudos
to reals for DPP16 and DPP6. To avoid overriding OtherPredicates in a
few places, we use the newly introduced True16Predicate to hold
UseRealTrue16Insts instead.
 This work repalces the inadvertently closed pull request:
#82024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

5 participants