Skip to content

Commit

Permalink
[AMDGPU][MC][GFX940] Correct tied operand decoding for smfmac opcodes
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D125790
  • Loading branch information
dpreobra committed May 18, 2022
1 parent 46d9a6e commit 32ca9bd
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 16 deletions.
15 changes: 12 additions & 3 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Expand Up @@ -148,7 +148,8 @@ DECODE_OPERAND_REG(AReg_1024)
DECODE_OPERAND_REG(AV_32)
DECODE_OPERAND_REG(AV_64)
DECODE_OPERAND_REG(AV_128)
DECODE_OPERAND_REG(AV_512)
DECODE_OPERAND_REG(AVDst_128)
DECODE_OPERAND_REG(AVDst_512)

static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm,
uint64_t Addr,
Expand Down Expand Up @@ -972,8 +973,16 @@ MCOperand AMDGPUDisassembler::decodeOperand_AV_128(unsigned Val) const {
return decodeSrcOp(OPW128, Val);
}

MCOperand AMDGPUDisassembler::decodeOperand_AV_512(unsigned Val) const {
return decodeSrcOp(OPW512, Val);
MCOperand AMDGPUDisassembler::decodeOperand_AVDst_128(unsigned Val) const {
using namespace AMDGPU::EncValues;
assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
return decodeSrcOp(OPW128, Val | IS_VGPR);
}

MCOperand AMDGPUDisassembler::decodeOperand_AVDst_512(unsigned Val) const {
using namespace AMDGPU::EncValues;
assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
return decodeSrcOp(OPW512, Val | IS_VGPR);
}

MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
Expand Up @@ -142,7 +142,8 @@ class AMDGPUDisassembler : public MCDisassembler {
MCOperand decodeOperand_AV_32(unsigned Val) const;
MCOperand decodeOperand_AV_64(unsigned Val) const;
MCOperand decodeOperand_AV_128(unsigned Val) const;
MCOperand decodeOperand_AV_512(unsigned Val) const;
MCOperand decodeOperand_AVDst_128(unsigned Val) const;
MCOperand decodeOperand_AVDst_512(unsigned Val) const;

enum OpWidthTy {
OPW32,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIDefines.h
Expand Up @@ -280,7 +280,8 @@ enum : unsigned {
INLINE_FLOATING_C_MAX = 248,
LITERAL_CONST = 255,
VGPR_MIN = 256,
VGPR_MAX = 511
VGPR_MAX = 511,
IS_VGPR = 256 // Indicates VGPR or AGPR
};

} // namespace EncValues
Expand Down
11 changes: 8 additions & 3 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Expand Up @@ -1097,7 +1097,7 @@ defm VISrc_512 : RegInlineOperandAC<"VReg", "VISrc_512", "_512">;
defm VISrc_1024 : RegInlineOperandAC<"VReg", "VISrc_1024", "_1024">;

//===----------------------------------------------------------------------===//
// AVSrc_* Operands with an AGPR or VGPR
// AVSrc_*, AVDst_*, AVLdSt_* Operands with an AGPR or VGPR
//===----------------------------------------------------------------------===//

def AVSrc_32 : RegisterOperand<AV_32> {
Expand All @@ -1115,8 +1115,13 @@ def AVSrc_128 : RegisterOperand<AV_128> {
let EncoderMethod = "getAVOperandEncoding";
}

def AVSrc_512 : RegisterOperand<AV_512> {
let DecoderMethod = "DecodeAV_512RegisterClass";
def AVDst_128 : RegisterOperand<AV_128> {
let DecoderMethod = "DecodeAVDst_128RegisterClass";
let EncoderMethod = "getAVOperandEncoding";
}

def AVDst_512 : RegisterOperand<AV_512> {
let DecoderMethod = "DecodeAVDst_512RegisterClass";
let EncoderMethod = "getAVOperandEncoding";
}

Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Expand Up @@ -449,12 +449,12 @@ def VOPProfileMAI_I32_I64_X32_VCD : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32,
def VOPProfileMAI_F32_V2F32_X16_VCD : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>;
def VOPProfileMAI_F32_V2F32_X32_VCD : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>;

def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32, AVSrc_128, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVSrc_512, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I32, AVSrc_128, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVSrc_512, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32, AVSrc_128, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVSrc_512, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32, AVDst_128, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVDst_512, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I32, AVDst_128, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVDst_512, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>;
def VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>;

class MFMATable <bit is_mac, string Name> {
bit IsMac = is_mac;
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AMDGPU/VOPInstructions.td
Expand Up @@ -373,15 +373,14 @@ class VOP3Pe_MAI <bits<7> op, VOPProfile P, bit acc_cd = 0> : Enc64 {
}

class VOP3Pe_SMFMAC <bits<7> op> : Enc64 {
bits<10> vdst;
bits<10> vdst; // VGPR or AGPR, but not SGPR. vdst{8} is not encoded in the instruction.
bits<10> src0;
bits<10> src1;
bits<9> idx;
bits<3> blgp;
bits<3> cbsz;
bits<4> abid;

let vdst{8} = 1; // VGPR or AGPR, but not SGPR
let blgp = 0;

let Inst{7-0} = vdst{7-0};
Expand Down
216 changes: 216 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/mai-gfx940.txt
Expand Up @@ -69,18 +69,126 @@
# GFX940: v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 ; encoding: [0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14]
0x0a,0x80,0xe2,0xd3,0x02,0x09,0x06,0x14

# GFX940: v_smfmac_f32_16x16x32_f16 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xe2,0xd3,0x02,0x09,0x0e,0x04]
0xfc,0x00,0xe2,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_f16 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xe2,0xd3,0x02,0x09,0x0e,0x04]
0xfc,0x80,0xe2,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x04]
0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x0c]
0x0a,0x00,0xe2,0xd3,0xfe,0x09,0x0e,0x0c

# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x04]
0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x04

# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x14]
0x0a,0x00,0xe2,0xd3,0x02,0xf9,0x0f,0x14

# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe2,0xd3,0x02,0x09,0xfe,0x07]
0x0a,0x00,0xe2,0xd3,0x02,0x09,0xfe,0x07

# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe2,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x02,0xe2,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe2,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x07,0xe2,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe2,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x08,0xe2,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe2,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x38,0xe2,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_f16 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe2,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x78,0xe2,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c]
0x0a,0x0b,0xe4,0xd3,0x02,0x09,0x0a,0x0c

# GFX940: v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 ; encoding: [0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14]
0x0a,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x14

# GFX940: v_smfmac_f32_32x32x16_f16 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xe4,0xd3,0x02,0x09,0x0e,0x04]
0xf0,0x00,0xe4,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_f16 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x04]
0xf0,0x80,0xe4,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x04]
0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x0c]
0x0a,0x00,0xe4,0xd3,0xfe,0x09,0x0e,0x0c

# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x04]
0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x04

# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x14]
0x0a,0x00,0xe4,0xd3,0x02,0xf9,0x0f,0x14

# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe4,0xd3,0x02,0x09,0xfe,0x07]
0x0a,0x00,0xe4,0xd3,0x02,0x09,0xfe,0x07

# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe4,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x02,0xe4,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe4,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x07,0xe4,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe4,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x08,0xe4,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe4,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x38,0xe4,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_f16 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe4,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x78,0xe4,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c]
0x0a,0x0b,0xe6,0xd3,0x02,0x09,0x12,0x0c

# GFX940: v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 ; encoding: [0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14]
0x0a,0x80,0xe6,0xd3,0x02,0x09,0x16,0x14

# GFX940: v_smfmac_f32_16x16x32_bf16 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xe6,0xd3,0x02,0x09,0x0e,0x04]
0xfc,0x00,0xe6,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_bf16 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xe6,0xd3,0x02,0x09,0x0e,0x04]
0xfc,0x80,0xe6,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x04]
0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x0c]
0x0a,0x00,0xe6,0xd3,0xfe,0x09,0x0e,0x0c

# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x04]
0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x04

# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x14]
0x0a,0x00,0xe6,0xd3,0x02,0xf9,0x0f,0x14

# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe6,0xd3,0x02,0x09,0xfe,0x07]
0x0a,0x00,0xe6,0xd3,0x02,0x09,0xfe,0x07

# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe6,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x02,0xe6,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe6,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x07,0xe6,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe6,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x08,0xe6,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe6,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x38,0xe6,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_16x16x32_bf16 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe6,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x78,0xe6,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[2:3], v[4:7], v6 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c]
0x0a,0x0b,0xe8,0xd3,0x02,0x09,0x1a,0x0c

Expand All @@ -93,14 +201,122 @@
# GFX940: v_smfmac_f32_32x32x16_bf16 a[10:25], v[2:3], a[4:7], v9 ; encoding: [0x0a,0x80,0xe8,0xd3,0x02,0x09,0x26,0x14]
0x0a,0x80,0xe8,0xd3,0x02,0x09,0x26,0x14

# GFX940: v_smfmac_f32_32x32x16_bf16 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xe8,0xd3,0x02,0x09,0x0e,0x04]
0xf0,0x00,0xe8,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_bf16 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xe8,0xd3,0x02,0x09,0x0e,0x04]
0xf0,0x80,0xe8,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x04]
0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x0c]
0x0a,0x00,0xe8,0xd3,0xfe,0x09,0x0e,0x0c

# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x04]
0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x04

# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x14]
0x0a,0x00,0xe8,0xd3,0x02,0xf9,0x0f,0x14

# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xe8,0xd3,0x02,0x09,0xfe,0x07]
0x0a,0x00,0xe8,0xd3,0x02,0x09,0xfe,0x07

# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xe8,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x02,0xe8,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xe8,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x07,0xe8,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xe8,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x08,0xe8,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xe8,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x38,0xe8,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_f32_32x32x16_bf16 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xe8,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x78,0xe8,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v10 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xea,0xd3,0x02,0x09,0x2a,0x0c]
0x0a,0x0b,0xea,0xd3,0x02,0x09,0x2a,0x0c

# GFX940: v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v11 ; encoding: [0x0a,0x80,0xea,0xd3,0x02,0x09,0x2e,0x14]
0x0a,0x80,0xea,0xd3,0x02,0x09,0x2e,0x14

# GFX940: v_smfmac_i32_16x16x64_i8 v[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x00,0xea,0xd3,0x02,0x09,0x0e,0x04]
0xfc,0x00,0xea,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_16x16x64_i8 a[252:255], v[2:3], v[4:7], v3 ; encoding: [0xfc,0x80,0xea,0xd3,0x02,0x09,0x0e,0x04]
0xfc,0x80,0xea,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x04]
0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x0c]
0x0a,0x00,0xea,0xd3,0xfe,0x09,0x0e,0x0c

# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x04]
0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x04

# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x14]
0x0a,0x00,0xea,0xd3,0x02,0xf9,0x0f,0x14

# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xea,0xd3,0x02,0x09,0xfe,0x07]
0x0a,0x00,0xea,0xd3,0x02,0x09,0xfe,0x07

# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xea,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x02,0xea,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xea,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x07,0xea,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xea,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x08,0xea,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xea,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x38,0xea,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_16x16x64_i8 v[10:13], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xea,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x78,0xea,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v12 cbsz:3 abid:1 ; encoding: [0x0a,0x0b,0xec,0xd3,0x02,0x09,0x32,0x0c]
0x0a,0x0b,0xec,0xd3,0x02,0x09,0x32,0x0c

# GFX940: v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v13 ; encoding: [0x0a,0x80,0xec,0xd3,0x02,0x09,0x36,0x14]
0x0a,0x80,0xec,0xd3,0x02,0x09,0x36,0x14

# GFX940: v_smfmac_i32_32x32x32_i8 v[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x00,0xec,0xd3,0x02,0x09,0x0e,0x04]
0xf0,0x00,0xec,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_32x32x32_i8 a[240:255], v[2:3], v[4:7], v3 ; encoding: [0xf0,0x80,0xec,0xd3,0x02,0x09,0x0e,0x04]
0xf0,0x80,0xec,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x04]
0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], a[254:255], v[4:7], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x0c]
0x0a,0x00,0xec,0xd3,0xfe,0x09,0x0e,0x0c

# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[252:255], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x04]
0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x04

# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], a[252:255], v3 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x14]
0x0a,0x00,0xec,0xd3,0x02,0xf9,0x0f,0x14

# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v255 ; encoding: [0x0a,0x00,0xec,0xd3,0x02,0x09,0xfe,0x07]
0x0a,0x00,0xec,0xd3,0x02,0x09,0xfe,0x07

# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 cbsz:2 ; encoding: [0x0a,0x02,0xec,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x02,0xec,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 cbsz:7 ; encoding: [0x0a,0x07,0xec,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x07,0xec,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:1 ; encoding: [0x0a,0x08,0xec,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x08,0xec,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:7 ; encoding: [0x0a,0x38,0xec,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x38,0xec,0xd3,0x02,0x09,0x0e,0x04

# GFX940: v_smfmac_i32_32x32x32_i8 v[10:25], v[2:3], v[4:7], v3 abid:15 ; encoding: [0x0a,0x78,0xec,0xd3,0x02,0x09,0x0e,0x04]
0x0a,0x78,0xec,0xd3,0x02,0x09,0x0e,0x04

0 comments on commit 32ca9bd

Please sign in to comment.