Skip to content

Commit

Permalink
[AMDGPU][MC][MI100+] Enable VOP3 variants of dot2c/dot4c/dot8c opcodes
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D138494
  • Loading branch information
dpreobra committed Nov 29, 2022
1 parent e3f75c1 commit 869fc7e
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 32 deletions.
16 changes: 3 additions & 13 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Expand Up @@ -4242,8 +4242,8 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
}

// op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3) &&
!(TSFlags & SIInstrFlags::VOP3P)) {
if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
(TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
if (OpSel & 3)
Expand Down Expand Up @@ -8228,17 +8228,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
// it has src2 register operand that is tied to dst operand
// we don't allow modifiers for this operand in assembler so src2_modifiers
// should be 0.
if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
Opc == AMDGPU::V_MAC_F32_e64_gfx10 || Opc == AMDGPU::V_MAC_F32_e64_vi ||
Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_MAC_F16_e64_vi || Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || Opc == AMDGPU::V_FMAC_F32_e64_vi ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11) {
if (isMAC(Opc)) {
auto it = Inst.begin();
std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
Expand Down
15 changes: 1 addition & 14 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Expand Up @@ -599,20 +599,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address);
} while (false);

if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
MI.getOpcode() == AMDGPU::V_FMAC_F64_e64_gfx90a ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx11 ||
MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_FMAC_F16_t16_e64_gfx11)) {
if (Res && AMDGPU::isMAC(MI.getOpcode())) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::src2_modifiers);
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Expand Up @@ -434,6 +434,27 @@ bool isVOPD(unsigned Opc) {
return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
}

bool isMAC(unsigned Opc) {
return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_MAC_F32_e64_vi ||
Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_MAC_F16_e64_vi ||
Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
Opc == AMDGPU::V_FMAC_F32_e64_vi ||
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
}

bool isTrue16Inst(unsigned Opc) {
const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
return Info ? Info->IsTrue16 : false;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Expand Up @@ -509,6 +509,9 @@ int getVOPDFull(unsigned OpX, unsigned OpY);
LLVM_READONLY
bool isVOPD(unsigned Opc);

LLVM_READNONE
bool isMAC(unsigned Opc);

namespace VOPD {

enum Component : unsigned {
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/VOP2Instructions.td
Expand Up @@ -509,12 +509,22 @@ class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> {
def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> {
let Src0ModDPP = FPVRegInputMods;
let Src1ModDPP = FPVRegInputMods;
let HasClamp = 1;
}

def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> {
let HasExtVOP3DPP = 0;
let HasSrc0Mods = 1;
let HasSrc1Mods = 1;
let HasClamp = 1;

let Src0Mod = Int32InputMods;
let Src1Mod = Int32InputMods;
let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret,
3 /*NumSrcArgs*/, HasClamp, 1 /*HasModifiers*/,
1 /*HasSrc2Mods*/, HasOMod,
Src0Mod, Src1Mod, Src2Mod>.ret;
let Asm64 = "$vdst, $src0, $src1$clamp";
}

// Write out to vcc or arbitrary SGPR.
Expand Down Expand Up @@ -2281,7 +2291,7 @@ defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>;
defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>;
}

multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> {
multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : Base_VOP2_Real_e32e64_vi<op> {
def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
}

Expand Down
4 changes: 0 additions & 4 deletions llvm/test/MC/AMDGPU/xdl-insts-err.s
Expand Up @@ -5,7 +5,6 @@
v_dot2c_f32_f16 v0, v1, v2

// GFX906-ERR: error: instruction not supported on this GPU
// GFX908-ERR: error: e64 variant of this instruction is not supported
v_dot2c_f32_f16_e64 v0, v1, v2

// GFX906-ERR: error: instruction not supported on this GPU
Expand All @@ -16,7 +15,6 @@ v_dot2c_f32_f16_sdwa v0, v1, v2
v_dot2c_i32_i16 v0, v1, v2

// GFX906-ERR: error: instruction not supported on this GPU
// GFX908-ERR: error: e64 variant of this instruction is not supported
v_dot2c_i32_i16_e64 v0, v1, v2

// GFX906-ERR: error: instruction not supported on this GPU
Expand All @@ -27,7 +25,6 @@ v_dot2c_i32_i16_sdwa v0, v1, v2
v_dot4c_i32_i8 v0, v1, v2

// GFX906-ERR: error: instruction not supported on this GPU
// GFX908-ERR: error: e64 variant of this instruction is not supported
v_dot4c_i32_i8_e64 v0, v1, v2

// GFX906-ERR: error: instruction not supported on this GPU
Expand All @@ -38,7 +35,6 @@ v_dot4c_i32_i8_sdwa v0, v1, v2
v_dot8c_i32_i4 v0, v1, v2

// GFX906-ERR: error: instruction not supported on this GPU
// GFX908-ERR: error: e64 variant of this instruction is not supported
v_dot8c_i32_i4_e64 v0, v1, v2

// GFX906-ERR: error: instruction not supported on this GPU
Expand Down
86 changes: 86 additions & 0 deletions llvm/test/MC/AMDGPU/xdl-insts-gfx908.s
@@ -1,4 +1,6 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx90a -show-encoding %s | FileCheck %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck %s

// CHECK: encoding: [0x01,0x05,0x0a,0x6e]
v_dot2c_f32_f16 v5, v1, v2
Expand Down Expand Up @@ -102,6 +104,27 @@ v_dot2c_f32_f16_dpp v5, v1, -v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x6e,0x01,0xe4,0x80,0x00]
v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0

// CHECK: encoding: [0x05,0x00,0x37,0xd1,0x01,0xfb,0x01,0x00]
v_dot2c_f32_f16_e64 v5, v1, src_scc

// CHECK: encoding: [0x05,0x00,0x37,0xd1,0xff,0xf9,0x01,0x00]
v_dot2c_f32_f16_e64 v5, v255, src_execz

// CHECK: encoding: [0x05,0x00,0x37,0xd1,0x65,0xca,0x00,0x00]
v_dot2c_f32_f16_e64 v5, s101, s101

// CHECK: encoding: [0x05,0x00,0x37,0xd1,0xc1,0xcc,0x00,0x00]
v_dot2c_f32_f16_e64 v5, -1, flat_scratch_lo

// CHECK: encoding: [0x05,0x02,0x37,0xd1,0xf0,0xce,0x00,0x40]
v_dot2c_f32_f16_e64 v5, 0.5, -|flat_scratch_hi|

// CHECK: encoding: [0x05,0x00,0x37,0xd1,0xfc,0xe0,0x01,0x10]
v_dot2c_f32_f16_e64 v5, src_execz, 0.5 mul:4

// CHECK: encoding: [0xff,0x81,0x37,0xd1,0xfd,0x82,0x01,0x38]
v_dot2c_f32_f16_e64 v255, -|src_scc|, -1 clamp div:2

// CHECK: encoding: [0x01,0x05,0x0a,0x70]
v_dot2c_i32_i16 v5, v1, v2

Expand Down Expand Up @@ -192,6 +215,27 @@ v_dot2c_i32_i16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x70,0x01,0xe4,0x08,0x00]
v_dot2c_i32_i16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0

// CHECK: encoding: [0x05,0x00,0x38,0xd1,0x01,0xfb,0x01,0x00]
v_dot2c_i32_i16_e64 v5, v1, src_scc

// CHECK: encoding: [0x05,0x00,0x38,0xd1,0xff,0xf9,0x01,0x00]
v_dot2c_i32_i16_e64 v5, v255, src_execz

// CHECK: encoding: [0x05,0x00,0x38,0xd1,0x65,0xca,0x00,0x00]
v_dot2c_i32_i16_e64 v5, s101, s101

// CHECK: encoding: [0x05,0x00,0x38,0xd1,0xc1,0xcc,0x00,0x00]
v_dot2c_i32_i16_e64 v5, -1, flat_scratch_lo

// CHECK: encoding: [0x05,0x00,0x38,0xd1,0xf0,0xce,0x00,0x00]
v_dot2c_i32_i16_e64 v5, 0.5, flat_scratch_hi

// CHECK: encoding: [0x05,0x00,0x38,0xd1,0xfc,0xe0,0x01,0x00]
v_dot2c_i32_i16_e64 v5, src_execz, 0.5

// CHECK: encoding: [0xff,0x80,0x38,0xd1,0xfd,0x82,0x01,0x00]
v_dot2c_i32_i16_e64 v255, src_scc, -1 clamp

// CHECK: encoding: [0x01,0x05,0x0a,0x72]
v_dot4c_i32_i8 v5, v1, v2

Expand Down Expand Up @@ -282,6 +326,27 @@ v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x08,0x00]
v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0

// CHECK: encoding: [0x05,0x00,0x39,0xd1,0x01,0xfb,0x01,0x00]
v_dot4c_i32_i8_e64 v5, v1, src_scc

// CHECK: encoding: [0x05,0x00,0x39,0xd1,0xff,0xf9,0x01,0x00]
v_dot4c_i32_i8_e64 v5, v255, src_execz

// CHECK: encoding: [0x05,0x00,0x39,0xd1,0x65,0xca,0x00,0x00]
v_dot4c_i32_i8_e64 v5, s101, s101

// CHECK: encoding: [0x05,0x00,0x39,0xd1,0xc1,0xcc,0x00,0x00]
v_dot4c_i32_i8_e64 v5, -1, flat_scratch_lo

// CHECK: encoding: [0x05,0x00,0x39,0xd1,0xf0,0xce,0x00,0x00]
v_dot4c_i32_i8_e64 v5, 0.5, flat_scratch_hi

// CHECK: encoding: [0x05,0x00,0x39,0xd1,0xfc,0xe0,0x01,0x00]
v_dot4c_i32_i8_e64 v5, src_execz, 0.5

// CHECK: encoding: [0xff,0x80,0x39,0xd1,0xfd,0x82,0x01,0x00]
v_dot4c_i32_i8_e64 v255, src_scc, -1 clamp

// CHECK: encoding: [0x01,0x05,0x0a,0x74]
v_dot8c_i32_i4 v5, v1, v2

Expand Down Expand Up @@ -372,6 +437,27 @@ v_dot8c_i32_i4_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0
// CHECK: encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x08,0x00]
v_dot8c_i32_i4_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0

// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0x01,0xfb,0x01,0x00]
v_dot8c_i32_i4_e64 v5, v1, src_scc

// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0xff,0xf9,0x01,0x00]
v_dot8c_i32_i4_e64 v5, v255, src_execz

// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0x65,0xca,0x00,0x00]
v_dot8c_i32_i4_e64 v5, s101, s101

// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0xc1,0xcc,0x00,0x00]
v_dot8c_i32_i4_e64 v5, -1, flat_scratch_lo

// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0xf0,0xce,0x00,0x00]
v_dot8c_i32_i4_e64 v5, 0.5, flat_scratch_hi

// CHECK: encoding: [0x05,0x00,0x3a,0xd1,0xfc,0xe0,0x01,0x00]
v_dot8c_i32_i4_e64 v5, src_execz, 0.5

// CHECK: encoding: [0xff,0x80,0x3a,0xd1,0xfd,0x82,0x01,0x00]
v_dot8c_i32_i4_e64 v255, src_scc, -1 clamp

// CHECK: encoding: [0x01,0x05,0x0a,0x78]
v_pk_fmac_f16 v5, v1, v2

Expand Down
86 changes: 86 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx908-xdl-insts.txt
@@ -1,4 +1,6 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -disassemble -show-encoding < %s | FileCheck %s
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx90a -disassemble -show-encoding < %s | FileCheck %s
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -disassemble -show-encoding < %s | FileCheck %s

# CHECK: v_dot2c_f32_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6e]
0x01,0x05,0x0a,0x6e
Expand Down Expand Up @@ -96,6 +98,27 @@
# CHECK: v_dot2c_f32_f16_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x6e,0x01,0xe4,0x80,0x00]
0xfa,0x04,0x0a,0x6e,0x01,0xe4,0x80,0x00

# CHECK: v_dot2c_f32_f16_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x37,0xd1,0x01,0xfb,0x01,0x00]
0x05,0x00,0x37,0xd1,0x01,0xfb,0x01,0x00

# CHECK: v_dot2c_f32_f16_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x37,0xd1,0xff,0xf9,0x01,0x00]
0x05,0x00,0x37,0xd1,0xff,0xf9,0x01,0x00

# CHECK: v_dot2c_f32_f16_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x37,0xd1,0x65,0xca,0x00,0x00]
0x05,0x00,0x37,0xd1,0x65,0xca,0x00,0x00

# CHECK: v_dot2c_f32_f16_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x37,0xd1,0xc1,0xcc,0x00,0x00]
0x05,0x00,0x37,0xd1,0xc1,0xcc,0x00,0x00

# CHECK: v_dot2c_f32_f16_e64 v5, 0.5, -|flat_scratch_hi| ; encoding: [0x05,0x02,0x37,0xd1,0xf0,0xce,0x00,0x40]
0x05,0x02,0x37,0xd1,0xf0,0xce,0x00,0x40

# CHECK: v_dot2c_f32_f16_e64 v5, src_execz, 0.5 mul:4 ; encoding: [0x05,0x00,0x37,0xd1,0xfc,0xe0,0x01,0x10]
0x05,0x00,0x37,0xd1,0xfc,0xe0,0x01,0x10

# CHECK: v_dot2c_f32_f16_e64 v255, -|src_scc|, -1 clamp div:2 ; encoding: [0xff,0x81,0x37,0xd1,0xfd,0x82,0x01,0x38]
0xff,0x81,0x37,0xd1,0xfd,0x82,0x01,0x38

# CHECK: v_dot2c_i32_i16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x70]
0x01,0x05,0x0a,0x70

Expand Down Expand Up @@ -180,6 +203,27 @@
# CHECK: v_dot2c_i32_i16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x70,0x01,0xe4,0x08,0x00]
0xfa,0x04,0x0a,0x70,0x01,0xe4,0x08,0x00

# CHECK: v_dot2c_i32_i16_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x38,0xd1,0x01,0xfb,0x01,0x00]
0x05,0x00,0x38,0xd1,0x01,0xfb,0x01,0x00

# CHECK: v_dot2c_i32_i16_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x38,0xd1,0xff,0xf9,0x01,0x00]
0x05,0x00,0x38,0xd1,0xff,0xf9,0x01,0x00

# CHECK: v_dot2c_i32_i16_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x38,0xd1,0x65,0xca,0x00,0x00]
0x05,0x00,0x38,0xd1,0x65,0xca,0x00,0x00

# CHECK: v_dot2c_i32_i16_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x38,0xd1,0xc1,0xcc,0x00,0x00]
0x05,0x00,0x38,0xd1,0xc1,0xcc,0x00,0x00

# CHECK: v_dot2c_i32_i16_e64 v5, 0.5, flat_scratch_hi ; encoding: [0x05,0x00,0x38,0xd1,0xf0,0xce,0x00,0x00]
0x05,0x00,0x38,0xd1,0xf0,0xce,0x00,0x00

# CHECK: v_dot2c_i32_i16_e64 v5, src_execz, 0.5 ; encoding: [0x05,0x00,0x38,0xd1,0xfc,0xe0,0x01,0x00]
0x05,0x00,0x38,0xd1,0xfc,0xe0,0x01,0x00

# CHECK: v_dot2c_i32_i16_e64 v255, src_scc, -1 clamp ; encoding: [0xff,0x80,0x38,0xd1,0xfd,0x82,0x01,0x00]
0xff,0x80,0x38,0xd1,0xfd,0x82,0x01,0x00

# CHECK: v_dot4c_i32_i8_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x72]
0x01,0x05,0x0a,0x72

Expand Down Expand Up @@ -264,6 +308,27 @@
# CHECK: v_dot4c_i32_i8_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x08,0x00]
0xfa,0x04,0x0a,0x72,0x01,0xe4,0x08,0x00

# CHECK: v_dot4c_i32_i8_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x39,0xd1,0x01,0xfb,0x01,0x00]
0x05,0x00,0x39,0xd1,0x01,0xfb,0x01,0x00

# CHECK: v_dot4c_i32_i8_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x39,0xd1,0xff,0xf9,0x01,0x00]
0x05,0x00,0x39,0xd1,0xff,0xf9,0x01,0x00

# CHECK: v_dot4c_i32_i8_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x39,0xd1,0x65,0xca,0x00,0x00]
0x05,0x00,0x39,0xd1,0x65,0xca,0x00,0x00

# CHECK: v_dot4c_i32_i8_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x39,0xd1,0xc1,0xcc,0x00,0x00]
0x05,0x00,0x39,0xd1,0xc1,0xcc,0x00,0x00

# CHECK: v_dot4c_i32_i8_e64 v5, 0.5, flat_scratch_hi ; encoding: [0x05,0x00,0x39,0xd1,0xf0,0xce,0x00,0x00]
0x05,0x00,0x39,0xd1,0xf0,0xce,0x00,0x00

# CHECK: v_dot4c_i32_i8_e64 v5, src_execz, 0.5 ; encoding: [0x05,0x00,0x39,0xd1,0xfc,0xe0,0x01,0x00]
0x05,0x00,0x39,0xd1,0xfc,0xe0,0x01,0x00

# CHECK: v_dot4c_i32_i8_e64 v255, src_scc, -1 clamp ; encoding: [0xff,0x80,0x39,0xd1,0xfd,0x82,0x01,0x00]
0xff,0x80,0x39,0xd1,0xfd,0x82,0x01,0x00

# CHECK: v_dot8c_i32_i4_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x74]
0x01,0x05,0x0a,0x74

Expand Down Expand Up @@ -348,6 +413,27 @@
# CHECK: v_dot8c_i32_i4_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x08,0x00]
0xfa,0x04,0x0a,0x74,0x01,0xe4,0x08,0x00

# CHECK: v_dot8c_i32_i4_e64 v5, v1, src_scc ; encoding: [0x05,0x00,0x3a,0xd1,0x01,0xfb,0x01,0x00]
0x05,0x00,0x3a,0xd1,0x01,0xfb,0x01,0x00

# CHECK: v_dot8c_i32_i4_e64 v5, v255, src_execz ; encoding: [0x05,0x00,0x3a,0xd1,0xff,0xf9,0x01,0x00]
0x05,0x00,0x3a,0xd1,0xff,0xf9,0x01,0x00

# CHECK: v_dot8c_i32_i4_e64 v5, s101, s101 ; encoding: [0x05,0x00,0x3a,0xd1,0x65,0xca,0x00,0x00]
0x05,0x00,0x3a,0xd1,0x65,0xca,0x00,0x00

# CHECK: v_dot8c_i32_i4_e64 v5, -1, flat_scratch_lo ; encoding: [0x05,0x00,0x3a,0xd1,0xc1,0xcc,0x00,0x00]
0x05,0x00,0x3a,0xd1,0xc1,0xcc,0x00,0x00

# CHECK: v_dot8c_i32_i4_e64 v5, 0.5, flat_scratch_hi ; encoding: [0x05,0x00,0x3a,0xd1,0xf0,0xce,0x00,0x00]
0x05,0x00,0x3a,0xd1,0xf0,0xce,0x00,0x00

# CHECK: v_dot8c_i32_i4_e64 v5, src_execz, 0.5 ; encoding: [0x05,0x00,0x3a,0xd1,0xfc,0xe0,0x01,0x00]
0x05,0x00,0x3a,0xd1,0xfc,0xe0,0x01,0x00

# CHECK: v_dot8c_i32_i4_e64 v255, src_scc, -1 clamp ; encoding: [0xff,0x80,0x3a,0xd1,0xfd,0x82,0x01,0x00]
0xff,0x80,0x3a,0xd1,0xfd,0x82,0x01,0x00

# CHECK: v_pk_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78]
0x01,0x05,0x0a,0x78

Expand Down

0 comments on commit 869fc7e

Please sign in to comment.