Skip to content

Conversation

rampitec
Copy link
Collaborator

@rampitec rampitec commented Oct 1, 2025

No description provided.

Copy link
Collaborator Author

rampitec commented Oct 1, 2025

This stack of pull requests is managed by Graphite. Learn more about stacking.

@rampitec rampitec marked this pull request as ready for review October 1, 2025 20:20
@llvmbot
Copy link
Member

llvmbot commented Oct 1, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/161578.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+6-5)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt (+6-3)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt (+30-15)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt (+8-4)
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 77df72111605e..3c97f808fae19 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -313,10 +313,11 @@ let OtherPredicates = [UseFakeTrue16Insts] in
 let SubtargetPredicate = HasGFX950Insts, OtherPredicates = [HasBF16ConversionInsts] in {
   defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
 }
-let SubtargetPredicate = isGFX1250Plus, OtherPredicates = [HasBF16ConversionInsts] in {
-  defm V_CVT_F32_BF16_gfx1250 : VOP1Inst_t16_with_profiles <"v_cvt_f32_bf16_gfx1250", VOP_F32_BF16,
-                                                            VOPProfile_CVT_F32_BF16_gfx1250_t16,
-                                                            VOPProfile_CVT_F32_BF16_gfx1250_fake16>;
+let SubtargetPredicate = isGFX1250Plus in {
+  let OtherPredicates = [UseRealTrue16Insts, HasBF16ConversionInsts] in
+    defm V_CVT_F32_BF16_gfx1250_t16 : VOP1Inst <"V_CVT_F32_BF16_gfx1250_t16", VOPProfile_CVT_F32_BF16_gfx1250_t16>;
+  let OtherPredicates = [UseFakeTrue16Insts, HasBF16ConversionInsts] in
+    defm V_CVT_F32_BF16_gfx1250_fake16 : VOP1Inst <"V_CVT_F32_BF16_gfx1250_fake16", VOPProfile_CVT_F32_BF16_gfx1250_fake16>;
 }
 
 let ReadsModeReg = 0, mayRaiseFPException = 0 in {
@@ -1149,7 +1150,7 @@ defm V_TANH_F16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>;
 defm V_PERMLANE16_SWAP_B32   : VOP1_Real_OpSelIsDPP_gfx1250<0x049>;
 defm V_TANH_BF16             : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>;
 defm V_PRNG_B32              : VOP1_Real_FULL<GFX1250Gen, 0x04b>;
-defm V_CVT_F32_BF16          : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
+defm V_CVT_F32_BF16_gfx1250  : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16">;
 defm V_SAT_PK4_I4_I8         : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x073>;
 defm V_SAT_PK4_U4_U8         : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x074>;
 defm V_CVT_PK_F16_FP8        : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
index 07dbbddcdc2f9..94edf22e36acf 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
@@ -720,10 +720,12 @@
 # GFX1250: v_cvt_f32_bf16_e32 v5, ttmp15           ; encoding: [0x7b,0xe4,0x0a,0x7e]
 
 0x01,0xe5,0x0a,0x7e
-# GFX1250: v_cvt_f32_bf16_e32 v5, v1.l             ; encoding: [0x01,0xe5,0x0a,0x7e]
+# GFX1250-REAL16: v_cvt_f32_bf16_e32 v5, v1.l             ; encoding: [0x01,0xe5,0x0a,0x7e]
+# GFX1250-FAKE16: v_cvt_f32_bf16_e32 v5, v1               ; encoding: [0x01,0xe5,0x0a,0x7e]
 
 0x7f,0xe5,0x0a,0x7e
-# GFX1250: v_cvt_f32_bf16_e32 v5, v127.l           ; encoding: [0x7f,0xe5,0x0a,0x7e]
+# GFX1250-REAL16: v_cvt_f32_bf16_e32 v5, v127.l           ; encoding: [0x7f,0xe5,0x0a,0x7e]
+# GFX1250-FAKE16: v_cvt_f32_bf16_e32 v5, v127             ; encoding: [0x7f,0xe5,0x0a,0x7e]
 
 0x6b,0xe4,0x0a,0x7e
 # GFX1250: v_cvt_f32_bf16_e32 v5, vcc_hi           ; encoding: [0x6b,0xe4,0x0a,0x7e]
@@ -732,7 +734,8 @@
 # GFX1250: v_cvt_f32_bf16_e32 v5, vcc_lo           ; encoding: [0x6a,0xe4,0x0a,0x7e]
 
 0x81,0xe5,0x0a,0x7e
-# GFX1250: v_cvt_f32_bf16_e32 v5, v1.h             ; encoding: [0x81,0xe5,0x0a,0x7e]
+# GFX1250-REAL16: v_cvt_f32_bf16_e32 v5, v1.h             ; encoding: [0x81,0xe5,0x0a,0x7e]
+# GFX1250-FAKE16: v_cvt_f32_bf16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/ ; encoding: [0x81,0xe5,0x0a,0x7e]
 
 0xff,0xf0,0x02,0x7e,0x34,0x12,0x00,0x00
 # GFX1250-REAL16: v_cvt_f16_bf8_e32 v1.l, 0x1234          ; encoding: [0xff,0xf0,0x02,0x7e,0x34,0x12,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
index c12ecb8d868aa..93286caa4fa2c 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt
@@ -615,49 +615,64 @@
 # GFX1250-REAL16: v_cos_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7f,0x81,0x1b,0x00,0xff]
 
 0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30
-# GFX1250: v_cvt_f32_bf16_dpp v127, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v127, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xe4,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0xe4,0x00,0xff
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0xe4,0x00,0xff]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0x41,0x01,0xff
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x41,0x01,0xff]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x41,0x01,0xff]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0x40,0x01,0xff
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x40,0x01,0xff]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x40,0x01,0xff]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0x21,0x01,0xff
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x21,0x01,0xff]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x21,0x01,0xff]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0x2f,0x01,0xff
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x2f,0x01,0xff]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0x50,0x01,0xff
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x50,0x01,0xff]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x50,0x01,0xff]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0x5f,0x01,0x01
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x5f,0x01,0x01]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0x01,0x01,0xff
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x01,0x01,0xff]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x01,0x01,0xff]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0x0f,0x01,0xff
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x0f,0x01,0xff]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0x11,0x01,0xff
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x11,0x01,0xff]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x11,0x01,0xff]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0x1f,0x01,0xff
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1f,0x01,0xff]
 
 0xfa,0xe4,0x0a,0x7e,0x01,0x60,0x09,0x13
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x60,0x09,0x13]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x60,0x09,0x13]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x60,0x09,0x13]
 
 0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x81,0x1b,0x00,0xff]
 
 0xfa,0xf0,0x02,0x7e,0x02,0x39,0x00,0xff
 # GFX1250-REAL16: v_cvt_f16_bf8_dpp v1.l, v2 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf0,0x02,0x7e,0x02,0x39,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
index fa7b940132f0c..fb3f1b25c6c7f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
@@ -165,16 +165,20 @@
 # GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
 
 0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00
-# GFX1250: v_cvt_f32_bf16_dpp v127, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v127, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
 
 0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
 
 0xea,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xe4,0x0a,0x7e,0x01,0x77,0x39,0x05]
 
 0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05
-# GFX1250: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05]
+# GFX1250-REAL16: v_cvt_f32_bf16_dpp v5, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_cvt_f32_bf16_dpp v5, v129/*Invalid register, operand has 'VGPR_32_Lo128' register class*/ dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe4,0x0a,0x7e,0x81,0x77,0x39,0x05]
 
 0xe9,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05
 # GFX1250-REAL16: v_cvt_f16_bf8_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05]

Copy link
Contributor

@broxigarchen broxigarchen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

other than the minor comments, LGTM

@rampitec rampitec force-pushed the users/rampitec/10-01-_amdgpu_fix_real_and_fake_true16_v_cvt_f32_bf16_disasm branch from 0fdae02 to 1c7534b Compare October 1, 2025 20:42
@rampitec rampitec merged commit f1986d9 into main Oct 1, 2025
9 checks passed
@rampitec rampitec deleted the users/rampitec/10-01-_amdgpu_fix_real_and_fake_true16_v_cvt_f32_bf16_disasm branch October 1, 2025 21:22
mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Oct 3, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants