Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2218,7 +2218,7 @@ class VOP3PX2e <bits<8> op, bits<8> LdScaleOp, VOP3PWMMA_Profile P> : Enc128, VO
let Inst{23-16} = LdScaleOp;
let Inst{40-32} = scale_src0;
let Inst{49-41} = scale_src1;
let Inst{58-50} = 0; // scale src2
let Inst{58-50} = 0x100; // scale src2 = vgpr0 (dummy)
let Inst{59} = matrix_b_scale{0}; // scale_op_sel_hi(0)
let Inst{60} = 0; // scale_op_sel_hi(1)
let Inst{63-61} = {0, matrix_a_scale_fmt{1-0}}; // neg (lo)
Expand Down Expand Up @@ -2433,6 +2433,15 @@ multiclass VOP3P_Real_with_name_gfx12<bits<8> op,
string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> :
VOP3P_Real_with_name<GFX12Gen, op, backing_ps_name, asmName>;

multiclass VOP3P_Real_LD_SCALE_gfx1250<bits<8> op> {
defvar ps = !cast<VOP3P_Pseudo>(NAME);
def _gfx1250 :
VOP3P_Real_Gen<ps, GFX1250Gen, ps.Mnemonic>,
VOP3Pe_gfx11_gfx12<op, ps.Pfl> {
let Inst{58-50} = 0x100; // scale src2 = vgpr0 (dummy)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need both?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this one is for v_wmma_ld_scale_pared itself. The VOP3PX2e is for v_wmma_scale instructions.

}
}

defm V_PK_MIN_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1b, "V_PK_MIN_F16", "v_pk_min_num_f16">;
defm V_PK_MAX_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1c, "V_PK_MAX_F16", "v_pk_max_num_f16">;

Expand Down Expand Up @@ -2462,8 +2471,8 @@ defm V_FMA_MIX_F32_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3d>;
defm V_FMA_MIXLO_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3e>;
defm V_FMA_MIXHI_BF16 : VOP3P_Realtriple<GFX1250Gen, 0x3f>;

defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3P_Real_gfx1250<0x35>;
defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3P_Real_gfx1250<0x3a>;
defm V_WMMA_LD_SCALE_PAIRED_B32 : VOP3P_Real_LD_SCALE_gfx1250<0x35>;
defm V_WMMA_LD_SCALE16_PAIRED_B64 : VOP3P_Real_LD_SCALE_gfx1250<0x3a>;

let AssemblerPredicate = isGFX1250Plus in
def : AMDGPUMnemonicAlias<"v_fma_mix_f32_f16", "v_fma_mix_f32">;
Expand Down
Loading
Loading