diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index f02b4d321328f..202184fa5b0f5 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -255,6 +255,10 @@ TARGET_BUILTIN(__builtin_amdgcn_sudot4, "iIbiIbiiIb", "nc", "dot8-insts") TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSiIb", "nc", "dot1-insts") TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot7-insts") TARGET_BUILTIN(__builtin_amdgcn_sudot8, "iIbiIbiiIb", "nc", "dot8-insts") +TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_bf8, "fUiUif", "nc", "gfx12-insts") +TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_fp8, "fUiUif", "nc", "gfx12-insts") +TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_fp8_fp8, "fUiUif", "nc", "gfx12-insts") +TARGET_BUILTIN(__builtin_amdgcn_dot4_f32_bf8_bf8, "fUiUif", "nc", "gfx12-insts") //===----------------------------------------------------------------------===// // GFX10+ only builtins. diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl index 6573325150d95..f5317683d0ff9 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl @@ -49,4 +49,9 @@ kernel void builtins_amdgcn_dl_insts_err( iOut[3] = __builtin_amdgcn_sudot8(false, A, true, B, C, false); // expected-error {{'__builtin_amdgcn_sudot8' needs target feature dot8-insts}} iOut[4] = __builtin_amdgcn_sudot8(true, A, false, B, C, true); // expected-error {{'__builtin_amdgcn_sudot8' needs target feature dot8-insts}} + + fOut[5] = __builtin_amdgcn_dot4_f32_fp8_bf8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_fp8_bf8' needs target feature gfx12-insts}} + fOut[6] = __builtin_amdgcn_dot4_f32_bf8_fp8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_bf8_fp8' needs target feature gfx12-insts}} + fOut[7] = __builtin_amdgcn_dot4_f32_fp8_fp8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_fp8_fp8' needs target feature gfx12-insts}} + fOut[8] = __builtin_amdgcn_dot4_f32_bf8_bf8(uiA, uiB, fC); // expected-error {{'__builtin_amdgcn_dot4_f32_bf8_bf8' needs target feature gfx12-insts}} } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl new file mode 100644 index 0000000000000..087883e9f5608 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-gfx12.cl @@ -0,0 +1,20 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S -emit-llvm -o - %s | FileCheck %s + +typedef unsigned int uint; + +// CHECK-LABEL: @builtins_amdgcn_dl_insts +// CHECK: call float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %uiA, i32 %uiB, float %fC) +// CHECK: call float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %uiA, i32 %uiB, float %fC) +// CHECK: call float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %uiA, i32 %uiB, float %fC) +// CHECK: call float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %uiA, i32 %uiB, float %fC) + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +kernel void builtins_amdgcn_dl_insts_err(global float *fOut, + uint uiA, uint uiB, float fC) { + fOut[0] = __builtin_amdgcn_dot4_f32_fp8_bf8(uiA, uiB, fC); + fOut[1] = __builtin_amdgcn_dot4_f32_bf8_fp8(uiA, uiB, fC); + fOut[2] = __builtin_amdgcn_dot4_f32_fp8_fp8(uiA, uiB, fC); + fOut[3] = __builtin_amdgcn_dot4_f32_bf8_bf8(uiA, uiB, fC); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 53d9b97e7edf1..3f20fa8bf6c7e 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2727,6 +2727,25 @@ def int_amdgcn_udot8 : ImmArg>, ImmArg>, ImmArg>] >; +// f32 %r = llvm.amdgcn.dot4.f32.type_a.type_b (v4type_a (as i32) %a, v4type_b (as i32) %b, f32 %c) +// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c +class AMDGPU8bitFloatDot4Intrinsic : + ClangBuiltin, + DefaultAttrsIntrinsic< + [llvm_float_ty], // %r + [ + llvm_i32_ty, // %a + llvm_i32_ty, // %b + llvm_float_ty, // %c + ], + [IntrNoMem, IntrSpeculatable] + >; + +def int_amdgcn_dot4_f32_fp8_bf8 : AMDGPU8bitFloatDot4Intrinsic; +def int_amdgcn_dot4_f32_bf8_fp8 : AMDGPU8bitFloatDot4Intrinsic; +def int_amdgcn_dot4_f32_fp8_fp8 : AMDGPU8bitFloatDot4Intrinsic; +def int_amdgcn_dot4_f32_bf8_bf8 : AMDGPU8bitFloatDot4Intrinsic; + //===----------------------------------------------------------------------===// // gfx908 intrinsics // ===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 147b37c40ceef..64c4406ebbefc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4491,6 +4491,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_fdot2_f32_bf16: case Intrinsic::amdgcn_sudot4: case Intrinsic::amdgcn_sudot8: + case Intrinsic::amdgcn_dot4_f32_fp8_bf8: + case Intrinsic::amdgcn_dot4_f32_bf8_fp8: + case Intrinsic::amdgcn_dot4_f32_fp8_fp8: + case Intrinsic::amdgcn_dot4_f32_bf8_bf8: case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16: case Intrinsic::amdgcn_wmma_f16_16x16x16_f16: case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied: diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index ba79affe683d6..bd68054589b11 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1688,6 +1688,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool validateMIMGD16(const MCInst &Inst); bool validateMIMGMSAA(const MCInst &Inst); bool validateOpSel(const MCInst &Inst); + bool validateNeg(const MCInst &Inst, int OpName); bool validateDPP(const MCInst &Inst, const OperandVector &Operands); bool validateVccOperand(unsigned Reg) const; bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); @@ -4357,6 +4358,41 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { return true; } +bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) { + assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi); + + const unsigned Opc = Inst.getOpcode(); + uint64_t TSFlags = MII.get(Opc).TSFlags; + + // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2) + if (!(TSFlags & SIInstrFlags::IsDOT)) + return true; + + int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName); + if (NegIdx == -1) + return true; + + unsigned Neg = Inst.getOperand(NegIdx).getImm(); + + // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed + // on some src operands but not allowed on other. + // It is convenient that such instructions don't have src_modifiers operand + // for src operands that don't allow neg because they also don't allow opsel. + + int SrcMods[3] = {AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers}; + + for (unsigned i = 0; i < 3; ++i) { + if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) { + if (Neg & (1 << i)) + return false; + } + } + + return true; +} + bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, const OperandVector &Operands) { const unsigned Opc = Inst.getOpcode(); @@ -4834,6 +4870,16 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, "invalid op_sel operand"); return false; } + if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) { + Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands), + "invalid neg_lo operand"); + return false; + } + if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) { + Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands), + "invalid neg_hi operand"); + return false; + } if (!validateDPP(Inst, Operands)) { return false; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 6c7977e22599c..e73e53aa270f9 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -1260,14 +1260,19 @@ void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI, int NumOps = 0; int Ops[3]; - for (int OpName : { AMDGPU::OpName::src0_modifiers, - AMDGPU::OpName::src1_modifiers, - AMDGPU::OpName::src2_modifiers }) { - int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName); - if (Idx == -1) + std::pair MOps[] = { + {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src0}, + {AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src1}, + {AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::src2}}; + int DefaultValue = (Mod == SISrcMods::OP_SEL_1); + + for (auto [SrcMod, Src] : MOps) { + if (!AMDGPU::hasNamedOperand(Opc, Src)) break; - Ops[NumOps++] = MI->getOperand(Idx).getImm(); + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, SrcMod); + Ops[NumOps++] = + (ModIdx != -1) ? MI->getOperand(ModIdx).getImm() : DefaultValue; } const bool HasDstSel = diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 3e3247e130f9e..0c7a08cd4bc91 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -443,6 +443,48 @@ def : GCNPat < (int_amdgcn_sdot4 i32:$src0, >; } // End SubtargetPredicate = HasDot8Insts +// Does not use opsel, no src_modifiers on src0 and src1. +// src_modifiers on src2(f32) are f32 fneg(neg_lo[2]) and f32 fabs(neg_hi[2]). +def VOP3P_DOTF8_Profile : VOP3P_Profile, + VOP3_PACKED, 1> { + let HasClamp = 0; + let HasOpSel = 0; + let HasOMod = 0; + let IsDOT = 1; + let HasSrc0Mods = 0; + let HasSrc1Mods = 0; + let HasSrc2Mods = 1; + + let InsVOP3P = (ins VSrc_b32:$src0, VSrc_b32:$src1, + PackedF16InputMods:$src2_modifiers, VSrc_f32:$src2, + neg_lo0:$neg_lo, neg_hi0:$neg_hi); + + let InsVOP3DPP8 = (ins DstRC:$old, VGPR_32:$src0, VRegSrc_32:$src1, + PackedF16InputMods:$src2_modifiers, VRegSrc_32:$src2, + neg_lo0:$neg_lo, neg_hi0:$neg_hi, dpp8:$dpp8, FI:$fi); + + let InsVOP3DPP16 = (ins DstRC:$old, VGPR_32:$src0, VRegSrc_32:$src1, + PackedF16InputMods:$src2_modifiers, VRegSrc_32:$src2, + neg_lo0:$neg_lo, neg_hi0:$neg_hi, dpp_ctrl:$dpp_ctrl, + row_mask:$row_mask, bank_mask:$bank_mask, + bound_ctrl:$bound_ctrl, FI:$fi); +} + +multiclass VOP3PDOTF8Inst { + defm NAME : VOP3PInst; + + let SubtargetPredicate = isGFX12Plus in + def : GCNPat <(intrinsic_node i32:$src0, i32:$src1, + (VOP3Mods f32:$src2, i32:$src2_modifiers)), + (!cast(NAME) i32:$src0, i32:$src1, + i32:$src2_modifiers, f32:$src2)>; +} + +defm V_DOT4_F32_FP8_BF8 : VOP3PDOTF8Inst<"v_dot4_f32_fp8_bf8", int_amdgcn_dot4_f32_fp8_bf8>; +defm V_DOT4_F32_BF8_FP8 : VOP3PDOTF8Inst<"v_dot4_f32_bf8_fp8", int_amdgcn_dot4_f32_bf8_fp8>; +defm V_DOT4_F32_FP8_FP8 : VOP3PDOTF8Inst<"v_dot4_f32_fp8_fp8", int_amdgcn_dot4_f32_fp8_fp8>; +defm V_DOT4_F32_BF8_BF8 : VOP3PDOTF8Inst<"v_dot4_f32_bf8_bf8", int_amdgcn_dot4_f32_bf8_bf8>; + def : UDot2Pat; def : SDot2Pat; @@ -1019,6 +1061,11 @@ defm V_PK_MAX_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1c, "V_PK_MAX_F16", "v_pk_m defm V_PK_MINIMUM_F16 : VOP3P_Real_gfx12<0x1d>; defm V_PK_MAXIMUM_F16 : VOP3P_Real_gfx12<0x1e>; +defm V_DOT4_F32_FP8_BF8 : VOP3P_Realtriple; +defm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple; +defm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple; +defm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple; + //===----------------------------------------------------------------------===// // GFX11 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index a923c4b71788d..df505c3365cbd 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -169,6 +169,7 @@ class VOP3_Pseudo pattern = [], class VOP3P_Pseudo pattern = []> : VOP3_Pseudo { let VOP3P = 1; + let IsDOT = P.IsDOT; } class VOP_Real { @@ -387,7 +388,7 @@ class VOP3Pe op, VOPProfile P> : Enc64 { let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1) let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2) - let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, ?); // op_sel_hi(2) + let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, !if(P.IsDOT, 1, ?)); // op_sel_hi(2) let Inst{15} = !if(P.HasClamp, clamp{0}, 0); @@ -396,8 +397,8 @@ class VOP3Pe op, VOPProfile P> : Enc64 { let Inst{40-32} = !if(P.HasSrc0, src0, 0); let Inst{49-41} = !if(P.HasSrc1, src1, 0); let Inst{58-50} = !if(P.HasSrc2, src2, 0); - let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, ?); // op_sel_hi(0) - let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, ?); // op_sel_hi(1) + let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, !if(P.IsDOT, 1, ?)); // op_sel_hi(0) + let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, !if(P.IsDOT, 1, ?)); // op_sel_hi(1) let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo) let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo) let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo) @@ -772,12 +773,12 @@ class VOP3P_DPPe_Common_Base op, VOPProfile P> : Enc96 { let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0) let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1) let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2) - let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, ?); // op_sel_hi(2) + let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, !if(P.IsDOT, 1, ?)); // op_sel_hi(2) let Inst{15} = !if(P.HasClamp, clamp{0}, 0); let Inst{22-16} = op; let Inst{31-23} = 0x198; // encoding - let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, ?); // op_sel_hi(0) - let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, ?); // op_sel_hi(1) + let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, !if(P.IsDOT, 1, ?)); // op_sel_hi(0) + let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, !if(P.IsDOT, 1, ?)); // op_sel_hi(1) let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo) let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo) let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll new file mode 100644 index 0000000000000..f4a7b2024b506 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll @@ -0,0 +1,255 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s + +define float @test_amdgcn_dot4_f32_fp8_bf8(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %ret = call float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_fp8_bf8_fabs(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fabs: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fabs.c = call float @llvm.fabs.f32(float %c) + %ret = call float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %fabs.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_fp8_bf8_fneg(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fneg: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fneg.c = fneg float %c + %ret = call float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %fneg.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_fp8_bf8_fabs_fneg(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fabs_fneg: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fneg.c = fneg float %c + %fabs.fneg.c = call float @llvm.fabs.f32(float %fneg.c) + %ret = call float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %fabs.fneg.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_fp8_bf8_fneg_fabs(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fneg_fabs: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fabs.c = call float @llvm.fabs.f32(float %c) + %fneg.fabs.c = fneg float %fabs.c + %ret = call float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %fneg.fabs.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_bf8_fp8(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %ret = call float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_bf8_fp8_fabs(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fabs: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fabs.c = call float @llvm.fabs.f32(float %c) + %ret = call float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %fabs.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_bf8_fp8_fneg(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fneg: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fneg.c = fneg float %c + %ret = call float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %fneg.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_bf8_fp8_fabs_fneg(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fabs_fneg: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fneg.c = fneg float %c + %fabs.fneg.c = call float @llvm.fabs.f32(float %fneg.c) + %ret = call float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %fabs.fneg.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_bf8_fp8_fneg_fabs(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fneg_fabs: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fabs.c = call float @llvm.fabs.f32(float %c) + %fneg.fabs.c = fneg float %fabs.c + %ret = call float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %fneg.fabs.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_fp8_fp8(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %ret = call float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_fp8_fp8_fabs(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fabs: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fabs.c = call float @llvm.fabs.f32(float %c) + %ret = call float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %fabs.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_fp8_fp8_fneg(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fneg: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fneg.c = fneg float %c + %ret = call float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %fneg.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_fp8_fp8_fabs_fneg(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fabs_fneg: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fneg.c = fneg float %c + %fabs.fneg.c = call float @llvm.fabs.f32(float %fneg.c) + %ret = call float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %fabs.fneg.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_fp8_fp8_fneg_fabs(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fneg_fabs: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fabs.c = call float @llvm.fabs.f32(float %c) + %fneg.fabs.c = fneg float %fabs.c + %ret = call float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %fneg.fabs.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_bf8_bf8(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %ret = call float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_bf8_bf8_fabs(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fabs: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fabs.c = call float @llvm.fabs.f32(float %c) + %ret = call float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %fabs.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_bf8_bf8_fneg(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fneg: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fneg.c = fneg float %c + %ret = call float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %fneg.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_bf8_bf8_fabs_fneg(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fabs_fneg: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fneg.c = fneg float %c + %fabs.fneg.c = call float @llvm.fabs.f32(float %fneg.c) + %ret = call float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %fabs.fneg.c) + ret float %ret +} + +define float @test_amdgcn_dot4_f32_bf8_bf8_fneg_fabs(i32 %a, i32 %b, float %c) { +; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fneg_fabs: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX12-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX12-NEXT: s_setpc_b64 s[30:31] +entry: + %fabs.c = call float @llvm.fabs.f32(float %c) + %fneg.fabs.c = fneg float %fabs.c + %ret = call float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %fneg.fabs.c) + ret float %ret +} + +declare float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %c) +declare float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %c) +declare float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %c) +declare float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %c) + +declare float @llvm.fabs.f32(float %a) + diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s index a8347fb7f08bd..567fc61d0b932 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s @@ -1343,3 +1343,123 @@ v_pk_maximum_f16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] neg_lo:[0,0] n v_pk_maximum_f16 v255, 0xfe0b, vcc_hi op_sel:[0,1] op_sel_hi:[1,0] neg_lo:[1,1] neg_hi:[1,1] clamp // GFX12: [0xff,0xd3,0x1e,0xcc,0xff,0xd6,0x00,0x68,0x0b,0xfe,0x00,0x00] + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 +// GFX12: v_dot4_f32_fp8_bf8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1] +// GFX12: v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x0e,0x9c] + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1] +// GFX12: v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x24,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_fp8_bf8 v0, s0, v2, v3 +// GFX12: v_dot4_f32_fp8_bf8 v0, s0, v2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x00,0x04,0x0e,0x1c] + +v_dot4_f32_fp8_bf8 v0, v1, s0, v3 +// GFX12: v_dot4_f32_fp8_bf8 v0, v1, s0, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x01,0x0c,0x1c] + +v_dot4_f32_fp8_bf8 v0, v1, v2, s0 +// GFX12: v_dot4_f32_fp8_bf8 v0, v1, v2, s0 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x02,0x18] + +v_dot4_f32_fp8_bf8 v0, 1.0, v2, v3 +// GFX12: v_dot4_f32_fp8_bf8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0xf2,0x04,0x0e,0x1c] + +v_dot4_f32_fp8_bf8 v0, v1, 1.0, v3 +// GFX12: v_dot4_f32_fp8_bf8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0xe5,0x0d,0x1c] + +v_dot4_f32_fp8_bf8 v0, v1, v2, 1.0 +// GFX12: v_dot4_f32_fp8_bf8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0xca,0x1b] + +v_dot4_f32_fp8_bf8 v0, v1, v2, 1 +// GFX12: v_dot4_f32_fp8_bf8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x06,0x1a] + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 +// GFX12: v_dot4_f32_bf8_fp8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1] +// GFX12: v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x0e,0x9c] + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1] +// GFX12: v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x25,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_bf8_fp8 v0, s0, v2, v3 +// GFX12: v_dot4_f32_bf8_fp8 v0, s0, v2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x00,0x04,0x0e,0x1c] + +v_dot4_f32_bf8_fp8 v0, v1, s0, v3 +// GFX12: v_dot4_f32_bf8_fp8 v0, v1, s0, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x01,0x0c,0x1c] + +v_dot4_f32_bf8_fp8 v0, v1, v2, s0 +// GFX12: v_dot4_f32_bf8_fp8 v0, v1, v2, s0 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x02,0x18] + +v_dot4_f32_bf8_fp8 v0, 1.0, v2, v3 +// GFX12: v_dot4_f32_bf8_fp8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0xf2,0x04,0x0e,0x1c] + +v_dot4_f32_bf8_fp8 v0, v1, 1.0, v3 +// GFX12: v_dot4_f32_bf8_fp8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0xe5,0x0d,0x1c] + +v_dot4_f32_bf8_fp8 v0, v1, v2, 1.0 +// GFX12: v_dot4_f32_bf8_fp8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0xca,0x1b] + +v_dot4_f32_bf8_fp8 v0, v1, v2, 1 +// GFX12: v_dot4_f32_bf8_fp8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x06,0x1a] + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 +// GFX12: v_dot4_f32_fp8_fp8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1] +// GFX12: v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x0e,0x9c] + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1] +// GFX12: v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x26,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_fp8_fp8 v0, s0, v2, v3 +// GFX12: v_dot4_f32_fp8_fp8 v0, s0, v2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x00,0x04,0x0e,0x1c] + +v_dot4_f32_fp8_fp8 v0, v1, s0, v3 +// GFX12: v_dot4_f32_fp8_fp8 v0, v1, s0, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x01,0x0c,0x1c] + +v_dot4_f32_fp8_fp8 v0, v1, v2, s0 +// GFX12: v_dot4_f32_fp8_fp8 v0, v1, v2, s0 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x02,0x18] + +v_dot4_f32_fp8_fp8 v0, 1.0, v2, v3 +// GFX12: v_dot4_f32_fp8_fp8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0xf2,0x04,0x0e,0x1c] + +v_dot4_f32_fp8_fp8 v0, v1, 1.0, v3 +// GFX12: v_dot4_f32_fp8_fp8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0xe5,0x0d,0x1c] + +v_dot4_f32_fp8_fp8 v0, v1, v2, 1.0 +// GFX12: v_dot4_f32_fp8_fp8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0xca,0x1b] + +v_dot4_f32_fp8_fp8 v0, v1, v2, 1 +// GFX12: v_dot4_f32_fp8_fp8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x06,0x1a] + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 +// GFX12: v_dot4_f32_bf8_bf8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1] +// GFX12: v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x0e,0x9c] + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1] +// GFX12: _dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x27,0xcc,0x01,0x05,0x0e,0x1c] + +v_dot4_f32_bf8_bf8 v0, s0, v2, v3 +// GFX12: v_dot4_f32_bf8_bf8 v0, s0, v2, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x00,0x04,0x0e,0x1c] + +v_dot4_f32_bf8_bf8 v0, v1, s0, v3 +// GFX12: v_dot4_f32_bf8_bf8 v0, v1, s0, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x01,0x0c,0x1c] + +v_dot4_f32_bf8_bf8 v0, v1, v2, s0 +// GFX12: v_dot4_f32_bf8_bf8 v0, v1, v2, s0 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x02,0x18] + +v_dot4_f32_bf8_bf8 v0, 1.0, v2, v3 +// GFX12: v_dot4_f32_bf8_bf8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x27,0xcc,0xf2,0x04,0x0e,0x1c] + +v_dot4_f32_bf8_bf8 v0, v1, 1.0, v3 +// GFX12: v_dot4_f32_bf8_bf8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0xe5,0x0d,0x1c] + +v_dot4_f32_bf8_bf8 v0, v1, v2, 1.0 +// GFX12: v_dot4_f32_bf8_bf8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0xca,0x1b] + +v_dot4_f32_bf8_bf8 v0, v1, v2, 1 +// GFX12: v_dot4_f32_bf8_bf8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x06,0x1a] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp16.s index 3b4bbff1efeb1..75bd1696e10bb 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp16.s @@ -12,3 +12,27 @@ v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0] row_ror:7 bank_mask:0x1 bound_ctrl:0 v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,1,1] clamp quad_perm:[0,2,3,1] row_mask:0x0 // GFX12: v_fma_mixhi_f16_e64_dpp v0, v1, v2, v3 op_sel_hi:[1,1,1] clamp quad_perm:[0,2,3,1] row_mask:0x0 bank_mask:0xf ; encoding: [0x00,0xc0,0x22,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x78,0x00,0x0f] + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_dot4_f32_fp8_bf8_e64_dpp v0, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x24,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 row_shr:15 row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 +// GFX12: v_dot4_f32_fp8_bf8_e64_dpp v0, v1, v2, v3 row_shr:15 row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 ; encoding: [0x00,0x40,0x24,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x1f,0x0d,0x11] + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 row_shl:15 +// GFX12: v_dot4_f32_bf8_fp8_e64_dpp v0, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x25,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x0f,0x01,0xff] + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 row_ror:15 row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 +// GFX12: v_dot4_f32_bf8_fp8_e64_dpp v0, v1, v2, v3 row_ror:15 row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 ; encoding: [0x00,0x40,0x25,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x2f,0x0d,0x11] + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 row_mirror +// GFX12: v_dot4_f32_fp8_fp8_e64_dpp v0, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x26,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x40,0x01,0xff] + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 row_half_mirror row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 +// GFX12: v_dot4_f32_fp8_fp8_e64_dpp v0, v1, v2, v3 row_half_mirror row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 ; encoding: [0x00,0x40,0x26,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x41,0x0d,0x11] + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 row_share:15 +// GFX12: v_dot4_f32_bf8_bf8_e64_dpp v0, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x27,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x5f,0x01,0xff] + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 row_xmask:15 row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 +// GFX12: v_dot4_f32_bf8_bf8_e64_dpp v0, v1, v2, v3 row_xmask:15 row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 ; encoding: [0x00,0x40,0x27,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x6f,0x0d,0x11] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp16_err.s new file mode 100644 index 0000000000000..b76754123207c --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp16_err.s @@ -0,0 +1,24 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error: %s + +// check for error with sgpr or imm operands + +v_dot4_f32_fp8_bf8 v0, s0, v2, v3 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_fp8_bf8 v0, v1, s0, v3 row_shr:15 row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_bf8_fp8 v0, v1, v2, s0 row_shl:15 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_bf8_fp8 v0, 1.0, v2, v3 row_ror:15 row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_fp8_fp8 v0, v1, 1.0, v3 row_mirror +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_fp8_fp8 v0, v1, v2, 1.0 row_half_mirror row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_bf8_bf8 v0, v1, v2, 1 row_share:15 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp8.s index 71eea18bc13dc..14cf169d4b424 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp8.s @@ -16,3 +16,27 @@ v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3) op_sel:[1,0,0] op_sel_hi:[1,0,0] dpp8: v_dot2_f32_f16_e64_dpp v0, v1, v2, v3 neg_lo:[0,1,1] neg_hi:[1,0,1] dpp8:[7,6,5,4,3,2,1,0] // GFX12: encoding: [0x00,0x05,0x13,0xcc,0xe9,0x04,0x0e,0xc4,0x01,0x77,0x39,0x05] + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: v_dot4_f32_fp8_bf8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x40,0x24,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX12: v_dot4_f32_fp8_bf8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x40,0x24,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: v_dot4_f32_bf8_fp8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x40,0x25,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX12: v_dot4_f32_bf8_fp8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x40,0x25,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: v_dot4_f32_fp8_fp8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x40,0x26,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX12: v_dot4_f32_fp8_fp8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x40,0x26,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: v_dot4_f32_bf8_bf8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x40,0x27,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// GFX12: v_dot4_f32_bf8_bf8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x40,0x27,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp8_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp8_err.s new file mode 100644 index 0000000000000..50d3b6aca41bc --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_dpp8_err.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error: %s + +// check for error with sgpr or imm operands + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] row_mask:0x1 +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_fp8_bf8 v0, s0, v2, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_bf8_fp8 v0, v1, s0, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_bf8_fp8 v0, v1, v2, s0 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_fp8_fp8 v0, 1.0, v2, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_fp8_fp8 v0, v1, 1.0, v3 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_bf8_bf8 v0, v1, v2, 1.0 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_bf8_bf8 v0, v1, v2, 1 dpp8:[0,1,2,3,4,5,6,7] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_err.s new file mode 100644 index 0000000000000..269989a26e287 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_err.s @@ -0,0 +1,133 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error: %s + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 clamp +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 op_sel:[1,0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 op_sel:[0,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 op_sel:[0,0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 op_sel_hi:[0,1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 op_sel_hi:[1,0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 op_sel_hi:[1,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_lo:[1,0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_lo:[0,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_hi:[1,0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand + +v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_hi:[0,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 clamp +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 op_sel:[1,0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 op_sel:[0,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 op_sel:[0,0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 op_sel_hi:[0,1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 op_sel_hi:[1,0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 op_sel_hi:[1,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_lo:[1,0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_lo:[0,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_hi:[1,0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand + +v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_hi:[0,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 clamp +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 op_sel:[1,0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 op_sel:[0,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 op_sel:[0,0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 op_sel_hi:[0,1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 op_sel_hi:[1,0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 op_sel_hi:[1,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_lo:[1,0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_lo:[0,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_hi:[1,0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand + +v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_hi:[0,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 clamp +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 op_sel:[1,0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 op_sel:[0,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 op_sel:[0,0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 op_sel_hi:[0,1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 op_sel_hi:[1,0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 op_sel_hi:[1,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_lo:[1,0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_lo:[0,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_lo operand + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[1,0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand + +v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[0,1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt index 44d8995c5c436..0f6debcbd9e30 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt @@ -1341,3 +1341,123 @@ # GFX12: v_pk_minimum_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x40,0x1d,0xcc,0x6a,0xf6,0x00,0x18] 0x05,0x40,0x1d,0xcc,0x6a,0xf6,0x00,0x18 + +# GFX12: v_dot4_f32_fp8_bf8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x24,0xcc,0x01,0x05,0x0e,0x1c + +# GFX12: v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x0e,0x9c] +0x00,0x40,0x24,0xcc,0x01,0x05,0x0e,0x9c + +# GFX12: v_dot4_f32_fp8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x24,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x44,0x24,0xcc,0x01,0x05,0x0e,0x1c + +# GFX12: v_dot4_f32_fp8_bf8 v0, s1, v2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x04,0x0e,0x1c] +0x00,0x40,0x24,0xcc,0x01,0x04,0x0e,0x1c + +# GFX12: v_dot4_f32_fp8_bf8 v0, v1, s2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x0c,0x1c] +0x00,0x40,0x24,0xcc,0x01,0x05,0x0c,0x1c + +# GFX12: v_dot4_f32_fp8_bf8 v0, v1, v2, s3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x0e,0x18] +0x00,0x40,0x24,0xcc,0x01,0x05,0x0e,0x18 + +# GFX12: v_dot4_f32_fp8_bf8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x24,0xcc,0xf2,0x04,0x0e,0x1c] +0x00,0x40,0x24,0xcc,0xf2,0x04,0x0e,0x1c + +# GFX12: v_dot4_f32_fp8_bf8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0xe5,0x0d,0x1c] +0x00,0x40,0x24,0xcc,0x01,0xe5,0x0d,0x1c + +# GFX12: v_dot4_f32_fp8_bf8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0xca,0x1b] +0x00,0x40,0x24,0xcc,0x01,0x05,0xca,0x1b + +# GFX12: v_dot4_f32_fp8_bf8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x24,0xcc,0x01,0x05,0x06,0x1a] +0x00,0x40,0x24,0xcc,0x01,0x05,0x06,0x1a + +# GFX12: v_dot4_f32_bf8_fp8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x25,0xcc,0x01,0x05,0x0e,0x1c + +# GFX12: v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x0e,0x9c] +0x00,0x40,0x25,0xcc,0x01,0x05,0x0e,0x9c + +# GFX12: v_dot4_f32_bf8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x25,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x44,0x25,0xcc,0x01,0x05,0x0e,0x1c + +# GFX12: v_dot4_f32_bf8_fp8 v0, s1, v2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x04,0x0e,0x1c] +0x00,0x40,0x25,0xcc,0x01,0x04,0x0e,0x1c + +# GFX12: v_dot4_f32_bf8_fp8 v0, v1, s2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x0c,0x1c] +0x00,0x40,0x25,0xcc,0x01,0x05,0x0c,0x1c + +# GFX12: v_dot4_f32_bf8_fp8 v0, v1, v2, s3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x0e,0x18] +0x00,0x40,0x25,0xcc,0x01,0x05,0x0e,0x18 + +# GFX12: v_dot4_f32_bf8_fp8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x25,0xcc,0xf2,0x04,0x0e,0x1c] +0x00,0x40,0x25,0xcc,0xf2,0x04,0x0e,0x1c + +# GFX12: v_dot4_f32_bf8_fp8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0xe5,0x0d,0x1c] +0x00,0x40,0x25,0xcc,0x01,0xe5,0x0d,0x1c + +# GFX12: v_dot4_f32_bf8_fp8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0xca,0x1b] +0x00,0x40,0x25,0xcc,0x01,0x05,0xca,0x1b + +# GFX12: v_dot4_f32_bf8_fp8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x25,0xcc,0x01,0x05,0x06,0x1a] +0x00,0x40,0x25,0xcc,0x01,0x05,0x06,0x1a + +# GFX12: v_dot4_f32_fp8_fp8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x26,0xcc,0x01,0x05,0x0e,0x1c + +# GFX12: v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x0e,0x9c] +0x00,0x40,0x26,0xcc,0x01,0x05,0x0e,0x9c + +# GFX12: v_dot4_f32_fp8_fp8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x26,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x44,0x26,0xcc,0x01,0x05,0x0e,0x1c + +# GFX12: v_dot4_f32_fp8_fp8 v0, s1, v2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x04,0x0e,0x1c] +0x00,0x40,0x26,0xcc,0x01,0x04,0x0e,0x1c + +# GFX12: v_dot4_f32_fp8_fp8 v0, v1, s2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x0c,0x1c] +0x00,0x40,0x26,0xcc,0x01,0x05,0x0c,0x1c + +# GFX12: v_dot4_f32_fp8_fp8 v0, v1, v2, s3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x0e,0x18] +0x00,0x40,0x26,0xcc,0x01,0x05,0x0e,0x18 + +# GFX12: v_dot4_f32_fp8_fp8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x26,0xcc,0xf2,0x04,0x0e,0x1c] +0x00,0x40,0x26,0xcc,0xf2,0x04,0x0e,0x1c + +# GFX12: v_dot4_f32_fp8_fp8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0xe5,0x0d,0x1c] +0x00,0x40,0x26,0xcc,0x01,0xe5,0x0d,0x1c + +# GFX12: v_dot4_f32_fp8_fp8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0xca,0x1b] +0x00,0x40,0x26,0xcc,0x01,0x05,0xca,0x1b + +# GFX12: v_dot4_f32_fp8_fp8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x26,0xcc,0x01,0x05,0x06,0x1a] +0x00,0x40,0x26,0xcc,0x01,0x05,0x06,0x1a + +# GFX12: v_dot4_f32_bf8_bf8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x40,0x27,0xcc,0x01,0x05,0x0e,0x1c + +# GFX12: v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_lo:[0,0,1] ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x0e,0x9c] +0x00,0x40,0x27,0xcc,0x01,0x05,0x0e,0x9c + +# GFX12: v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[0,0,1] ; encoding: [0x00,0x44,0x27,0xcc,0x01,0x05,0x0e,0x1c] +0x00,0x44,0x27,0xcc,0x01,0x05,0x0e,0x1c + +# GFX12: v_dot4_f32_bf8_bf8 v0, s1, v2, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x04,0x0e,0x1c] +0x00,0x40,0x27,0xcc,0x01,0x04,0x0e,0x1c + +# GFX12: v_dot4_f32_bf8_bf8 v0, v1, s2, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x0c,0x1c] +0x00,0x40,0x27,0xcc,0x01,0x05,0x0c,0x1c + +# GFX12: v_dot4_f32_bf8_bf8 v0, v1, v2, s3 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x0e,0x18] +0x00,0x40,0x27,0xcc,0x01,0x05,0x0e,0x18 + +# GFX12: v_dot4_f32_bf8_bf8 v0, 1.0, v2, v3 ; encoding: [0x00,0x40,0x27,0xcc,0xf2,0x04,0x0e,0x1c] +0x00,0x40,0x27,0xcc,0xf2,0x04,0x0e,0x1c + +# GFX12: v_dot4_f32_bf8_bf8 v0, v1, 1.0, v3 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0xe5,0x0d,0x1c] +0x00,0x40,0x27,0xcc,0x01,0xe5,0x0d,0x1c + +# GFX12: v_dot4_f32_bf8_bf8 v0, v1, v2, 1.0 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0xca,0x1b] +0x00,0x40,0x27,0xcc,0x01,0x05,0xca,0x1b + +# GFX12: v_dot4_f32_bf8_bf8 v0, v1, v2, 1 ; encoding: [0x00,0x40,0x27,0xcc,0x01,0x05,0x06,0x1a] +0x00,0x40,0x27,0xcc,0x01,0x05,0x06,0x1a diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p_dpp16.txt index 2b902878b87f6..52fd0530681cf 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p_dpp16.txt @@ -12,3 +12,27 @@ # GFX12: v_fma_mixhi_f16_e64_dpp v0, v1, v2, v3 op_sel_hi:[1,1,1] clamp quad_perm:[0,2,3,1] row_mask:0x0 bank_mask:0xf ; encoding: [0x00,0xc0,0x22,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x78,0x00,0x0f] 0x00,0xc0,0x22,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x78,0x00,0x0f + +# GFX12: v_dot4_f32_fp8_bf8_e64_dpp v0, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x24,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] +0x00,0x40,0x24,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff + +# GFX12: v_dot4_f32_fp8_bf8_e64_dpp v0, v1, v2, v3 row_shr:15 row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 ; encoding: [0x00,0x40,0x24,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x1f,0x0d,0x11] +0x00,0x40,0x24,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x1f,0x0d,0x11 + +# GFX12: v_dot4_f32_bf8_fp8_e64_dpp v0, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x25,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x0f,0x01,0xff] +0x00,0x40,0x25,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x0f,0x01,0xff + +# GFX12: v_dot4_f32_bf8_fp8_e64_dpp v0, v1, v2, v3 row_ror:15 row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 ; encoding: [0x00,0x40,0x25,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x2f,0x0d,0x11] +0x00,0x40,0x25,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x2f,0x0d,0x11 + +# GFX12: v_dot4_f32_fp8_fp8_e64_dpp v0, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x26,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x40,0x01,0xff] +0x00,0x40,0x26,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x40,0x01,0xff + +# GFX12: v_dot4_f32_fp8_fp8_e64_dpp v0, v1, v2, v3 row_half_mirror row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 ; encoding: [0x00,0x40,0x26,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x41,0x0d,0x11] +0x00,0x40,0x26,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x41,0x0d,0x11 + +# GFX12: v_dot4_f32_bf8_bf8_e64_dpp v0, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x27,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x5f,0x01,0xff] +0x00,0x40,0x27,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x5f,0x01,0xff + +# GFX12: v_dot4_f32_bf8_bf8_e64_dpp v0, v1, v2, v3 row_xmask:15 row_mask:0x1 bank_mask:0x1 bound_ctrl:1 fi:1 ; encoding: [0x00,0x40,0x27,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x6f,0x0d,0x11] +0x00,0x40,0x27,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x6f,0x0d,0x11 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p_dpp8.txt index 474db1bdcc0e2..688212e51c427 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p_dpp8.txt @@ -15,3 +15,27 @@ # GFX12: v_fma_mixlo_f16_e64_dpp v0, |v1|, -v2, |v3| op_sel:[1,0,0] op_sel_hi:[1,0,0] dpp8:[2,2,2,2,4,4,4,4] ; encoding: [0x00,0x0d,0x21,0xcc,0xe9,0x04,0x0e,0x4c,0x01,0x92,0x44,0x92] 0x00,0x0d,0x21,0xcc,0xe9,0x04,0x0e,0x4c,0x01,0x92,0x44,0x92 + +# GFX12: v_dot4_f32_fp8_bf8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x40,0x24,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] +0x00,0x40,0x24,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa + +# GFX12: v_dot4_f32_fp8_bf8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x40,0x24,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] +0x00,0x40,0x24,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa + +# GFX12: v_dot4_f32_bf8_fp8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x40,0x25,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] +0x00,0x40,0x25,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa + +# GFX12: v_dot4_f32_bf8_fp8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x40,0x25,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] +0x00,0x40,0x25,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa + +# GFX12: v_dot4_f32_fp8_fp8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x40,0x26,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] +0x00,0x40,0x26,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa + +# GFX12: v_dot4_f32_fp8_fp8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x40,0x26,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] +0x00,0x40,0x26,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa + +# GFX12: v_dot4_f32_bf8_bf8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x40,0x27,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] +0x00,0x40,0x27,0xcc,0xe9,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa + +# GFX12: v_dot4_f32_bf8_bf8_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0x00,0x40,0x27,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa] +0x00,0x40,0x27,0xcc,0xea,0x04,0x0e,0x1c,0x01,0x88,0xc6,0xfa