diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index d51cee2b94ae0..cd9c2ec20c560 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4004,6 +4004,9 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const { } unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64; + if (!IsB32 && STI.hasTrue16BitInsts()) + Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64 + : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64; unsigned CBL = STI.getConstantBusLimit(Opc); MachineBasicBlock *MBB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 22447d33aad75..2fef4f029951b 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1447,34 +1447,72 @@ let SubtargetPredicate = isGFX12Plus in { } // End SubtargetPredicate = isGFX12Plus -let SubtargetPredicate = HasBitOp3Insts in { +let HasClamp = 0, HasModifiers = 1 in { +def BitOp3_B16_Profile : VOP3_BITOP3_Profile, VOP3_OPSEL>; +def BitOp3_B16_t16_Profile : VOP3_Profile_True16; +def BitOp3_B16_fake16_Profile : VOP3_Profile_Fake16; +} + +let OtherPredicates = [HasBitOp3Insts] in { let isReMaterializable = 1 in { - defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16", - VOP3_BITOP3_Profile, VOP3_OPSEL>>; + let SubtargetPredicate = isGFX940Plus in + defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16", BitOp3_B16_Profile>; + let SubtargetPredicate = isGFX1250Plus in + defm V_BITOP3_B16_gfx1250 : VOP3Inst_t16_with_profiles <"v_bitop3_b16_gfx1250", BitOp3_B16_Profile, + BitOp3_B16_t16_Profile, BitOp3_B16_fake16_Profile>; defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32", VOP3_BITOP3_Profile, VOP3_REGULAR>>, VOPD_Component<0x12, "v_bitop2_b32">; } + def : GCNPat< (i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)), (i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3)) >; - def : GCNPat< - (i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)), - (i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0)) - >; - def : GCNPat< (i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)), (i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3)) >; - def : GCNPat< - (i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)), - (i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0)) - >; -} // End SubtargetPredicate = HasBitOp3Insts + let SubtargetPredicate = isGFX940Plus in { + def : GCNPat< + (i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)), + (i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0)) + >; + + def : GCNPat< + (i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)), + (i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0)) + >; + } // End SubtargetPredicate = isGFX940Plus + + let SubtargetPredicate = isGFX1250Plus in { + let True16Predicate = UseFakeTrue16Insts in { + def : GCNPat< + (i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)), + (i16 (V_BITOP3_B16_gfx1250_fake16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0)) + >; + + def : GCNPat< + (i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)), + (i16 (V_BITOP3_B16_gfx1250_fake16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0)) + >; + } + let True16Predicate = UseRealTrue16Insts in { + def : GCNPat< + (i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)), + (i16 (V_BITOP3_B16_gfx1250_t16_e64 0, VSrcT_b16:$src0, 0, VSrcT_b16:$src1, 0, VSrcT_b16:$src2, timm:$bitop3, 0)) + >; + + def : GCNPat< + (i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)), + (i16 (V_BITOP3_B16_gfx1250_t16_e64 0, VSrcT_b16:$src0, 0, VSrcT_b16:$src1, 0, VSrcT_b16:$src2, timm:$bitop3, 0)) + >; + } + } // End SubtargetPredicate = isGFX1250Plus + +} // End OtherPredicates = [HasBitOp3Insts] class DivFmasPat : GCNPat< (AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), @@ -1766,6 +1804,9 @@ defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x368, "v_m defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>; defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>; +defm V_BITOP3_B16_gfx1250 : VOP3_Real_BITOP3_t16_and_fake16_gfx1250<0x233, "v_bitop3_b16">; +defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx1250<0x234>; + defm V_MAD_U32 : VOP3Only_Realtriple_gfx1250<0x235>; defm V_MAD_NC_U64_U32 : VOP3Only_Realtriple_gfx1250<0x2fa>; defm V_MAD_NC_I64_I32 : VOP3Only_Realtriple_gfx1250<0x2fb>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index c21e2d38398fa..badbba9487d63 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -401,6 +401,19 @@ class VOP3Interp_vi op, VOPProfile P> : VOP3e_vi { let Inst{49-41} = src0; } +class VOP3a_BITOP3_gfx12 op, VOPProfile p> : VOP3e_gfx11_gfx12 { + bits<8> bitop3; + + let Inst{60-59} = bitop3{7-6}; + let Inst{10-8} = bitop3{5-3}; + let Inst{63-61} = bitop3{2-0}; + + let Inst{11} = !if(p.HasOpSel, src0_modifiers{2}, 0); + let Inst{12} = !if(p.HasOpSel, src1_modifiers{2}, 0); + let Inst{13} = !if(p.HasOpSel, src2_modifiers{2}, 0); + let Inst{14} = !if(p.HasOpSel, src0_modifiers{3}, 0); +} + class VOP3Interp_gfx10 op, VOPProfile p> : VOP3e_gfx10 { bits<6> attr; bits<2> attrchan; @@ -1506,6 +1519,7 @@ class VOP3_Profile_Base : VO let HasFP8SrcByteSel = P.HasFP8SrcByteSel; let HasFP8DstByteSel = P.HasFP8DstByteSel; let HasOMod = P.HasOMod; + let HasBitOp3 = P.HasBitOp3; let HasModifiers = !if (Features.IsMAI, 0, @@ -1525,6 +1539,7 @@ class VOP3_Profile_True16 : let HasFP8SrcByteSel = P.HasFP8SrcByteSel; let HasFP8DstByteSel = P.HasFP8DstByteSel; let HasOMod = P.HasOMod; + let HasBitOp3 = P.HasBitOp3; let HasModifiers = !if (Features.IsMAI, 0, @@ -1540,6 +1555,7 @@ class VOP3_Profile_Fake16 : let HasFP8SrcByteSel = P.HasFP8SrcByteSel; let HasFP8DstByteSel = P.HasFP8DstByteSel; let HasOMod = P.HasOMod; + let HasBitOp3 = P.HasBitOp3; let HasModifiers = !if (Features.IsMAI, 0, @@ -1723,6 +1739,34 @@ class VOP3b_DPP8_Base op, VOP_Pseudo ps, string opName = ps.OpName> let Inst{14 - 8} = sdst; } +class VOP3_BITOP3_DPP16_Gen op, VOP_DPP_Pseudo p, GFXGen Gen, string asmName> + : VOP3_DPP16_Gen_t16 { + bits<8> bitop3; + + let Inst{60-59} = bitop3{7-6}; + let Inst{10-8} = bitop3{5-3}; + let Inst{63-61} = bitop3{2-0}; + + let Inst{11} = !if(p.Pfl.HasOpSel, src0_modifiers{2}, 0); + let Inst{12} = !if(p.Pfl.HasOpSel, src1_modifiers{2}, 0); + let Inst{13} = !if(p.Pfl.HasOpSel, src2_modifiers{2}, 0); + let Inst{14} = !if(p.Pfl.HasOpSel, src0_modifiers{3}, 0); +} + +class VOP3_BITOP3_DPP8 op, VOP_Pseudo p, string asmName> + : Base_VOP3_DPP8_t16 { + bits<8> bitop3; + + let Inst{60-59} = bitop3{7-6}; + let Inst{10-8} = bitop3{5-3}; + let Inst{63-61} = bitop3{2-0}; + + let Inst{11} = !if(p.Pfl.HasOpSel, src0_modifiers{2}, 0); + let Inst{12} = !if(p.Pfl.HasOpSel, src1_modifiers{2}, 0); + let Inst{13} = !if(p.Pfl.HasOpSel, src2_modifiers{2}, 0); + let Inst{14} = !if(p.Pfl.HasOpSel, src0_modifiers{3}, 0); +} + class VOP3b_DPP8_Base_t16 op, VOP_Pseudo ps, string opName = ps.OpName> : Base_VOP3_DPP8 { bits<8> sdst; @@ -1943,6 +1987,29 @@ multiclass VOP3be_Realtriple< multiclass VOP3beOnly_Realtriple op> : VOP3be_Realtriple; +multiclass VOP3_BITOP3_Real_dpp_Base op, string asmName> { + def _e64_dpp#Gen.Suffix : + VOP3_BITOP3_DPP16_Gen(NAME#"_e64"#"_dpp"), Gen, asmName>; +} + +multiclass VOP3_BITOP3_Real_dpp8_Base op, string asmName> { + defvar ps = !cast(NAME#"_e64"); + def _e64_dpp8#Gen.Suffix : VOP3_BITOP3_DPP8 { + let DecoderNamespace = + Gen.DecoderNamespace #!if (ps.Pfl.IsRealTrue16, "", "_FAKE16"); + let AssemblerPredicate = Gen.AssemblerPredicate; + } +} + +multiclass VOP3_BITOP3_Real_Base op, string asmName> { + defvar ps = !cast(NAME#"_e64"); + let IsSingle = ps.Pfl.IsSingle, AsmString = asmName # ps.AsmOperands in { + def _e64#Gen.Suffix : + VOP3_Real_Gen, + VOP3a_BITOP3_gfx12; + } +} + //===----------------------------------------------------------------------===// // VOP3 GFX11 //===----------------------------------------------------------------------===// @@ -2046,6 +2113,16 @@ multiclass VOP3Only_Realtriple_with_name_gfx11_gfx12 op, string opName, VOP3Only_Realtriple_with_name, VOP3Only_Realtriple_with_name; +multiclass VOP3_Real_BITOP3_gfx1250 op, string asmName = !cast(NAME#"_e64").Mnemonic> : + VOP3_BITOP3_Real_Base, + VOP3_BITOP3_Real_dpp_Base, + VOP3_BITOP3_Real_dpp8_Base; + +multiclass VOP3_Real_BITOP3_t16_and_fake16_gfx1250 op, string asmName = !cast(NAME#"_e64").Mnemonic> { + defm _t16 : VOP3_Real_BITOP3_gfx1250; + defm _fake16: VOP3_Real_BITOP3_gfx1250; +} + multiclass VOP3Dot_Realtriple_gfx11_gfx12 op, string asmName, bit isSingle = 0, string opName = NAME> : VOP3Dot_Realtriple, diff --git a/llvm/test/CodeGen/AMDGPU/bitop3.ll b/llvm/test/CodeGen/AMDGPU/bitop3.ll index eb149a93ee328..ba818f6ecc069 100644 --- a/llvm/test/CodeGen/AMDGPU/bitop3.ll +++ b/llvm/test/CodeGen/AMDGPU/bitop3.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-SDAG %s ; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-- -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-SDAG,GFX1250-SDAG-FAKE16,GFX1250-FAKE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-- -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-SDAG,GFX1250-SDAG-TRUE16,GFX1250-TRUE16 %s +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-GISEL,GFX1250-GISEL-FAKE16,GFX1250-FAKE16 %s +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-GISEL,GFX1250-GISEL-TRUE16,GFX1250-TRUE16 %s ; ========= Single bit functions ========= @@ -55,6 +59,18 @@ define amdgpu_ps float @not_and_and_and(i32 %a, i32 %b, i32 %c) { ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-LABEL: not_and_and_and: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:8 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: not_and_and_and: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %and1 = and i32 %nota, %c %and2 = and i32 %and1, %b @@ -87,6 +103,19 @@ define amdgpu_ps float @and_not_and_and(i32 %a, i32 %b, i32 %c) { ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-LABEL: and_not_and_and: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x20 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: and_not_and_and: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_not_b32_e32 v1, v1 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %notb = xor i32 %b, -1 %and1 = and i32 %a, %c %and2 = and i32 %and1, %notb @@ -105,6 +134,18 @@ define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) { ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0x30 ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-LABEL: and_and_not_and: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x40 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: and_and_not_and: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0x30 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %notc = xor i32 %c, -1 %and1 = and i32 %a, %notc %and2 = and i32 %and1, %b @@ -113,15 +154,10 @@ define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) { } define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) { -; GFX950-SDAG-LABEL: and_and_and: -; GFX950-SDAG: ; %bb.0: -; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80 -; GFX950-SDAG-NEXT: ; return to shader part epilog -; -; GFX950-GISEL-LABEL: and_and_and: -; GFX950-GISEL: ; %bb.0: -; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80 -; GFX950-GISEL-NEXT: ; return to shader part epilog +; GCN-LABEL: and_and_and: +; GCN: ; %bb.0: +; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80 +; GCN-NEXT: ; return to shader part epilog %and1 = and i32 %a, %c %and2 = and i32 %and1, %b %ret_cast = bitcast i32 %and2 to float @@ -131,15 +167,10 @@ define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) { ; ========= Multi bit functions ========= define amdgpu_ps float @test_12(i32 %a, i32 %b) { -; GFX950-SDAG-LABEL: test_12: -; GFX950-SDAG: ; %bb.0: -; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc -; GFX950-SDAG-NEXT: ; return to shader part epilog -; -; GFX950-GISEL-LABEL: test_12: -; GFX950-GISEL: ; %bb.0: -; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc -; GFX950-GISEL-NEXT: ; return to shader part epilog +; GCN-LABEL: test_12: +; GCN: ; %bb.0: +; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc +; GCN-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %and1 = and i32 %nota, %b %ret_cast = bitcast i32 %and1 to float @@ -158,6 +189,19 @@ define amdgpu_ps float @test_63(i32 %a, i32 %b) { ; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v1 ; GFX950-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-LABEL: test_63: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0x3f +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: test_63: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_not_b32_e32 v0, v0 +; GFX1250-GISEL-NEXT: v_not_b32_e32 v1, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %notb = xor i32 %b, -1 %or = or i32 %nota, %notb @@ -190,6 +234,19 @@ define amdgpu_ps float @test_126(i32 %a, i32 %b, i32 %c) { ; GFX950-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GFX950-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-LABEL: test_126: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_bitop3_b32 v0, v0, v2, v1 bitop3:0x7e +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: test_126: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_xor_b32_e32 v1, v0, v1 +; GFX1250-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %xor1 = xor i32 %a, %b %xor2 = xor i32 %a, %c %or = or i32 %xor1, %xor2 @@ -216,6 +273,21 @@ define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) { ; GFX950-GISEL-NEXT: v_and_b32_e32 v2, v3, v4 ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0xc8 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-LABEL: test_12_src_overflow: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: test_12_src_overflow: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_not_b32_e32 v3, v0 +; GFX1250-GISEL-NEXT: v_not_b32_e32 v4, v2 +; GFX1250-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_b32_e32 v2, v3, v4 +; GFX1250-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0xc8 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %notc = xor i32 %c, -1 %and1 = and i32 %nota, %c @@ -249,6 +321,29 @@ define amdgpu_ps float @test_100_src_overflow(i32 %a, i32 %b, i32 %c) { ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX950-GISEL-NEXT: v_or3_b32 v0, v3, v4, v0 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-LABEL: test_100_src_overflow: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_bitop3_b32 v3, v1, v2, v0 bitop3:0x10 +; GFX1250-SDAG-NEXT: v_bitop3_b32 v4, v0, v2, v1 bitop3:0x40 +; GFX1250-SDAG-NEXT: v_bitop3_b32 v0, v1, v2, v0 bitop3:0x20 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_or3_b32 v0, v3, v4, v0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: test_100_src_overflow: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_bitop3_b32 v3, v2, v0, v2 bitop3:3 +; GFX1250-GISEL-NEXT: v_bitop3_b32 v4, v0, v1, v0 bitop3:0x30 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, v1, v0 +; GFX1250-GISEL-NEXT: v_not_b32_e32 v5, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, v1, v3 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v2, v4, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, v0, v5 +; GFX1250-GISEL-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %or1 = or i32 %c, %a %not1 = xor i32 %or1, -1 %and1 = and i32 %b, %not1 @@ -267,11 +362,16 @@ define amdgpu_ps float @test_100_src_overflow(i32 %a, i32 %b, i32 %c) { ; ========= Ternary logical operations take precedence ========= define amdgpu_ps float @test_xor3(i32 %a, i32 %b, i32 %c) { -; GCN-LABEL: test_xor3: -; GCN: ; %bb.0: -; GCN-NEXT: v_xor_b32_e32 v0, v0, v1 -; GCN-NEXT: v_xor_b32_e32 v0, v0, v2 -; GCN-NEXT: ; return to shader part epilog +; GFX950-LABEL: test_xor3: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_xor_b32_e32 v0, v0, v1 +; GFX950-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX950-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: test_xor3: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_xor3_b32 v0, v0, v1, v2 +; GFX1250-NEXT: ; return to shader part epilog %xor1 = xor i32 %a, %b %xor2 = xor i32 %xor1, %c %ret_cast = bitcast i32 %xor2 to float @@ -303,12 +403,20 @@ define amdgpu_ps float @test_and_or(i32 %a, i32 %b, i32 %c) { ; ========= Uniform cases ========= define amdgpu_ps float @uniform_3_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) { -; GCN-LABEL: uniform_3_op: -; GCN: ; %bb.0: -; GCN-NEXT: s_andn2_b32 s0, s2, s0 -; GCN-NEXT: s_and_b32 s0, s0, s1 -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: ; return to shader part epilog +; GFX950-LABEL: uniform_3_op: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_andn2_b32 s0, s2, s0 +; GFX950-NEXT: s_and_b32 s0, s0, s1 +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: uniform_3_op: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_and_not1_b32 s0, s2, s0 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: s_and_b32 s0, s0, s1 +; GFX1250-NEXT: v_mov_b32_e32 v0, s0 +; GFX1250-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %and1 = and i32 %nota, %c %and2 = and i32 %and1, %b @@ -330,6 +438,21 @@ define amdgpu_ps float @uniform_4_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) { ; GFX950-GISEL-NEXT: s_andn2_b32 s0, s0, s1 ; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-LABEL: uniform_4_op: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_bitop3_b32 v0, s0, s1, v0 bitop3:2 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: uniform_4_op: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_and_not1_b32 s0, s2, s0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_and_not1_b32 s0, s0, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %notb = xor i32 %b, -1 %and1 = and i32 %nota, %c @@ -341,10 +464,30 @@ define amdgpu_ps float @uniform_4_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) { ; ========= 16 bit tests ========= define amdgpu_ps half @not_and_not_and_not_and_b16(i16 %a, i16 %b, i16 %c) { -; GCN-LABEL: not_and_not_and_not_and_b16: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:1 -; GCN-NEXT: ; return to shader part epilog +; GFX950-LABEL: not_and_not_and_not_and_b16: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:1 +; GFX950-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-FAKE16-LABEL: not_and_not_and_not_and_b16: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:1 +; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-TRUE16-LABEL: not_and_not_and_not_and_b16: +; GFX1250-SDAG-TRUE16: ; %bb.0: +; GFX1250-SDAG-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v1.l, v2.l bitop3:1 +; GFX1250-SDAG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-FAKE16-LABEL: not_and_not_and_not_and_b16: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:1 +; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-TRUE16-LABEL: not_and_not_and_not_and_b16: +; GFX1250-GISEL-TRUE16: ; %bb.0: +; GFX1250-GISEL-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v1.l, v2.l bitop3:1 +; GFX1250-GISEL-TRUE16-NEXT: ; return to shader part epilog %nota = xor i16 %a, -1 %notb = xor i16 %b, -1 %notc = xor i16 %c, -1 @@ -355,10 +498,30 @@ define amdgpu_ps half @not_and_not_and_not_and_b16(i16 %a, i16 %b, i16 %c) { } define amdgpu_ps half @not_and_not_and_and_b16(i16 %a, i16 %b, i16 %c) { -; GCN-LABEL: not_and_not_and_and_b16: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:2 -; GCN-NEXT: ; return to shader part epilog +; GFX950-LABEL: not_and_not_and_and_b16: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:2 +; GFX950-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-FAKE16-LABEL: not_and_not_and_and_b16: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:2 +; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-TRUE16-LABEL: not_and_not_and_and_b16: +; GFX1250-SDAG-TRUE16: ; %bb.0: +; GFX1250-SDAG-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v1.l, v2.l bitop3:2 +; GFX1250-SDAG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-FAKE16-LABEL: not_and_not_and_and_b16: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:2 +; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-TRUE16-LABEL: not_and_not_and_and_b16: +; GFX1250-GISEL-TRUE16: ; %bb.0: +; GFX1250-GISEL-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v1.l, v2.l bitop3:2 +; GFX1250-GISEL-TRUE16-NEXT: ; return to shader part epilog %nota = xor i16 %a, -1 %notb = xor i16 %b, -1 %and1 = and i16 %nota, %c @@ -368,10 +531,30 @@ define amdgpu_ps half @not_and_not_and_and_b16(i16 %a, i16 %b, i16 %c) { } define amdgpu_ps half @not_and_and_not_and_b16(i16 %a, i16 %b, i16 %c) { -; GCN-LABEL: not_and_and_not_and_b16: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:4 -; GCN-NEXT: ; return to shader part epilog +; GFX950-LABEL: not_and_and_not_and_b16: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:4 +; GFX950-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-FAKE16-LABEL: not_and_and_not_and_b16: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:4 +; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-TRUE16-LABEL: not_and_and_not_and_b16: +; GFX1250-SDAG-TRUE16: ; %bb.0: +; GFX1250-SDAG-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v1.l, v2.l bitop3:4 +; GFX1250-SDAG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-FAKE16-LABEL: not_and_and_not_and_b16: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:4 +; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-TRUE16-LABEL: not_and_and_not_and_b16: +; GFX1250-GISEL-TRUE16: ; %bb.0: +; GFX1250-GISEL-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v1.l, v2.l bitop3:4 +; GFX1250-GISEL-TRUE16-NEXT: ; return to shader part epilog %nota = xor i16 %a, -1 %notc = xor i16 %c, -1 %and1 = and i16 %nota, %notc @@ -391,6 +574,21 @@ define amdgpu_ps half @test_xor3_b16(i16 %a, i16 %b, i16 %c) { ; GFX950-GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 ; GFX950-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-FAKE16-LABEL: test_xor3_b16: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_bitop3_b16 v0, v0, v2, v1 bitop3:0x96 +; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-TRUE16-LABEL: test_xor3_b16: +; GFX1250-SDAG-TRUE16: ; %bb.0: +; GFX1250-SDAG-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v2.l, v1.l bitop3:0x96 +; GFX1250-SDAG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: test_xor3_b16: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_xor3_b32 v0, v0, v1, v2 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %xor1 = xor i16 %a, %b %xor2 = xor i16 %xor1, %c %ret_cast = bitcast i16 %xor2 to half @@ -407,6 +605,21 @@ define amdgpu_ps half @test_or3_b16(i16 %a, i16 %b, i16 %c) { ; GFX950-GISEL: ; %bb.0: ; GFX950-GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-FAKE16-LABEL: test_or3_b16: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_bitop3_b16 v0, v0, v2, v1 bitop3:0xfe +; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-TRUE16-LABEL: test_or3_b16: +; GFX1250-SDAG-TRUE16: ; %bb.0: +; GFX1250-SDAG-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v2.l, v1.l bitop3:0xfe +; GFX1250-SDAG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: test_or3_b16: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %or1 = or i16 %a, %b %or2 = or i16 %or1, %c %ret_cast = bitcast i16 %or2 to half @@ -423,10 +636,26 @@ define amdgpu_ps half @test_and_or_b16(i16 %a, i16 %b, i16 %c) { ; GFX950-GISEL: ; %bb.0: ; GFX950-GISEL-NEXT: v_and_or_b32 v0, v0, v1, v2 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-FAKE16-LABEL: test_and_or_b16: +; GFX1250-SDAG-FAKE16: ; %bb.0: +; GFX1250-SDAG-FAKE16-NEXT: v_bitop3_b16 v0, v0, v2, v1 bitop3:0xec +; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-TRUE16-LABEL: test_and_or_b16: +; GFX1250-SDAG-TRUE16: ; %bb.0: +; GFX1250-SDAG-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v2.l, v1.l bitop3:0xec +; GFX1250-SDAG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: test_and_or_b16: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, v0, v1, v2 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %and1 = and i16 %a, %b %or1 = or i16 %and1, %c %ret_cast = bitcast i16 %or1 to half ret half %ret_cast } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX950: {{.*}} +; GFX1250-FAKE16: {{.*}} +; GFX1250-TRUE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll index 546144dac6470..742d87f099ce4 100644 --- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll +++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll @@ -27,6 +27,9 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1200,GFX1200-GISEL,GFX1200-GISEL-TRUE16 %s ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1200,GFX1200-GISEL,GFX1200-GISEL-FAKE16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s + ; Test for integer mad formation for patterns used in clpeak define i32 @clpeak_imad_pat_i32(i32 %x, i32 %y) { @@ -221,6 +224,38 @@ define i32 @clpeak_imad_pat_i32(i32 %x, i32 %y) { ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_i32: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v0, v2, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u32 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_i32: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y18 = add i32 %x, 1 %add = mul i32 %y18, %y @@ -459,6 +494,37 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) { ; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-GISEL-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_i16: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_i16: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX1250-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %conv33 = add i16 %x, 1 %add = mul i16 %conv33, %y @@ -652,6 +718,21 @@ define <2 x i16> @clpeak_imad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) { ; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v3, v0 ; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX1200-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: clpeak_imad_pat_v2i16: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_pk_mad_u16 v2, v0, v1, v0 +; GFX1250-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: v_pk_mul_lo_u16 v3, v2, v1 +; GFX1250-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v3, v0 +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %y18 = add <2 x i16> %x, %add = mul <2 x i16> %y18, %y @@ -998,6 +1079,54 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) { ; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 ; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_v3i16: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] +; GFX1250-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_v3i16: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] +; GFX1250-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y48 = add <3 x i16> %x, %add = mul <3 x i16> %y48, %y @@ -1429,6 +1558,54 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) { ; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 ; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_v4i16: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] +; GFX1250-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_v4i16: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] +; GFX1250-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y18 = add <4 x i16> %x, %add = mul <4 x i16> %y18, %y @@ -1662,6 +1839,37 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) { ; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_umad_pat_i16: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_umad_pat_i16: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX1250-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %conv33 = add i16 %x, 1 %add = mul i16 %conv33, %y @@ -1855,6 +2063,21 @@ define <2 x i16> @clpeak_umad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) { ; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v3, v0 ; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX1200-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: clpeak_umad_pat_v2i16: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_pk_mad_u16 v2, v0, v1, v0 +; GFX1250-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: v_pk_mul_lo_u16 v3, v2, v1 +; GFX1250-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v3, v0 +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %y18 = add <2 x i16> %x, %add = mul <2 x i16> %y18, %y @@ -2201,6 +2424,54 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) { ; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 ; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_umad_pat_v3i16: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] +; GFX1250-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_umad_pat_v3i16: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] +; GFX1250-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y48 = add <3 x i16> %x, %add = mul <3 x i16> %y48, %y @@ -2632,6 +2903,54 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) { ; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 ; GFX1200-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_umad_pat_v4i16: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] +; GFX1250-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v4, v0, v2, v0 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v5, v1, v3, v1 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v6, v4, v2 +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v7, v5, v3 +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] +; GFX1250-SDAG-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v0, v6, v0 +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v1, v7, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2 +; GFX1250-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_umad_pat_v4i16: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] +; GFX1250-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v4, v0, v2, v0 +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v5, v1, v3, v1 +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v6, v4, v2 +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v7, v5, v3 +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0] +; GFX1250-GISEL-NEXT: v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v0, v6, v0 +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v1, v7, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2 +; GFX1250-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y18 = add <4 x i16> %x, %add = mul <4 x i16> %y18, %y @@ -2947,6 +3266,50 @@ define <2 x i32> @clpeak_imad_pat_v2i32(<2 x i32> %x, <2 x i32> %y) { ; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 ; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_v2i32: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v0, v4, v0 :: v_dual_add_nc_u32 v1, v5, v1 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3 +; GFX1250-SDAG-NEXT: v_mad_u32 v2, v0, v4, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_u32 v3, v1, v5, v1 +; GFX1250-SDAG-NEXT: v_mad_u32 v0, v2, v0, v2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v3, v1, v3 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_v2i32: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, v4, v0 :: v_dual_add_nc_u32 v1, v5, v1 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v2, 1, v4 :: v_dual_add_nc_u32 v3, 1, v5 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y18 = add <2 x i32> %x, %add = mul <2 x i32> %y18, %y @@ -3376,6 +3739,73 @@ define <3 x i32> @clpeak_imad_pat_v3i32(<3 x i32> %x, <3 x i32> %y) { ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_v3i32: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v6, v0, v3 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v7, v1, v4 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, v6, v0 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v8, v2, v5 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, v7, v1 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v0, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v2, v8, v2 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v1, v1, v4 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_mad_u32 v3, v0, v6, v0 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v2, v2, v5 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_mad_u32 v4, v1, v7, v1 +; GFX1250-SDAG-NEXT: v_mad_u32 v0, v3, v0, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_mad_u32 v5, v2, v8, v2 +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v4, v1, v4 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_u32 v2, v5, v2, v5 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_v3i32: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, v6, v0 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v1, v7, v1 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v6 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v2, v8, v2 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v7 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v3, v0, v3 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v8 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v4, v1, v4 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v2, v5 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v3, v0 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y48 = add <3 x i32> %x, %add = mul <3 x i32> %y48, %y @@ -3874,6 +4304,80 @@ define <4 x i32> @clpeak_imad_pat_v4i32(<4 x i32> %x, <4 x i32> %y) { ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1200-GISEL-NEXT: v_mul_lo_u32 v3, v6, v3 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_v4i32: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1 +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v2, 1, v2 :: v_dual_add_nc_u32 v3, 1, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v8, v0, v4 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v9, v1, v5 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v10, v2, v6 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v11, v3, v7 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v0, v8, v0 :: v_dual_add_nc_u32 v1, v9, v1 +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v2, v10, v2 :: v_dual_add_nc_u32 v3, v11, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v1, v1, v5 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v2, v2, v6 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v3, v3, v7 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_mad_u32 v4, v0, v8, v0 +; GFX1250-SDAG-NEXT: v_mad_u32 v5, v1, v9, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_mad_u32 v6, v2, v10, v2 +; GFX1250-SDAG-NEXT: v_mad_u32 v7, v3, v11, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_mad_u32 v0, v4, v0, v4 +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v5, v1, v5 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-SDAG-NEXT: v_mad_u32 v2, v6, v2, v6 +; GFX1250-SDAG-NEXT: v_mad_u32 v3, v7, v3, v7 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_v4i32: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v2, 1, v2 :: v_dual_add_nc_u32 v3, 1, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v8, v0, v4 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v9, v1, v5 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v10, v2, v6 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v11, v3, v7 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, v8, v0 :: v_dual_add_nc_u32 v1, v9, v1 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v2, v10, v2 :: v_dual_add_nc_u32 v3, v11, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v4, 1, v8 :: v_dual_add_nc_u32 v5, 1, v9 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v6, 1, v10 :: v_dual_add_nc_u32 v7, 1, v11 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v4, v0, v4 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v1, v5 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v6, v2, v6 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v7, v3, v7 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v2, 1, v2 :: v_dual_add_nc_u32 v3, 1, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v4, v0 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v5, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v6, v2 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v3, v7, v3 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y18 = add <4 x i32> %x, %add = mul <4 x i32> %y18, %y @@ -4106,6 +4610,42 @@ define i32 @clpeak_imad_pat_i24(i32 %x, i32 %y) { ; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 ; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_i24: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24 +; GFX1250-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v0, v2, v0 +; GFX1250-SDAG-NEXT: v_mad_u32 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_i24: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24 +; GFX1250-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %shl = shl i32 %x, 8 %shr = ashr exact i32 %shl, 8 @@ -4342,6 +4882,42 @@ define i32 @clpeak_imad_pat_u24(i32 %x, i32 %y) { ; GFX1200-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 ; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_u24: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v0, v2, v0 +; GFX1250-SDAG-NEXT: v_mad_u32 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_u24: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %shl = and i32 %x, 16777215 %shl1 = and i32 %y, 16777215 @@ -4582,6 +5158,37 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) { ; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-GISEL-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_i8: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_i8: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX1250-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %conv33 = add i8 %x, 1 %add = mul i8 %conv33, %y @@ -5001,6 +5608,56 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) { ; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v2 ; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v1, v3 ; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_v2i8: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX1250-SDAG-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_mad_u16 v4, v1, v3, v1 +; GFX1250-SDAG-NEXT: v_mul_lo_u16 v1, v1, v3 +; GFX1250-SDAG-NEXT: v_mad_u16 v5, v0, v2, v0 +; GFX1250-SDAG-NEXT: v_mul_lo_u16 v0, v0, v2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_mul_lo_u16 v3, v4, v3 +; GFX1250-SDAG-NEXT: v_mul_lo_u16 v2, v5, v2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v0, v2, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_lshlrev_b16 v2, 8, v1 +; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX1250-SDAG-NEXT: v_bitop3_b16 v0, v0, v2, 0xff bitop3:0xec +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_v2i8: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX1250-GISEL-NEXT: v_add_nc_u16 v1, v1, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0 +; GFX1250-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1 +; GFX1250-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3 +; GFX1250-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1 +; GFX1250-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y18 = add <2 x i8> %x, %add = mul <2 x i8> %y18, %y @@ -5508,6 +6165,44 @@ define i64 @clpeak_imad_pat_i64(i64 %x, i64 %y) { ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[1:2], null, v1, v8, v[2:3] ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_i64: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 1, v[0:1] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_u64_e32 v[4:5], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[4:5], v[0:1] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_u64_e32 v[2:3], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: v_mad_nc_u64_u32 v[6:7], v2, v4, v[2:3] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u32 v0, v3, v4, v7 +; GFX1250-SDAG-NEXT: v_mad_u32 v7, v2, v5, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_nc_u64_u32 v[0:1], v6, v2, v[6:7] +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v7, v2, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v6, v3, v1 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_i64: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[0:1], 1, v[0:1] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_u64_e32 v[4:5], v[0:1], v[2:3] +; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[0:1], v[4:5], v[0:1] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_u64_e32 v[0:1], v[0:1], v[2:3] +; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[2:3], 1, v[4:5] +; GFX1250-GISEL-NEXT: v_mul_u64_e32 v[2:3], v[0:1], v[2:3] +; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[0:1], 1, v[0:1] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_u64_e32 v[0:1], v[2:3], v[0:1] +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y18 = add i64 %x, 1 %add = mul i64 %y18, %y @@ -6416,6 +7111,68 @@ define <2 x i64> @clpeak_imad_pat_v2i64(<2 x i64> %x, <2 x i64> %y) { ; GFX1200-GISEL-NEXT: v_mul_lo_u32 v2, v15, v14 ; GFX1200-GISEL-NEXT: v_mad_co_u64_u32 v[3:4], null, v3, v14, v[7:8] ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_v2i64: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], 1, v[0:1] +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[2:3], 1, v[2:3] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mul_u64_e32 v[8:9], v[0:1], v[4:5] +; GFX1250-SDAG-NEXT: v_mul_u64_e32 v[10:11], v[2:3], v[6:7] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[8:9], v[0:1] +; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[2:3], v[10:11], v[2:3] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mul_u64_e32 v[4:5], v[0:1], v[4:5] +; GFX1250-SDAG-NEXT: v_mul_u64_e32 v[6:7], v[2:3], v[6:7] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_nc_u64_u32 v[12:13], v4, v8, v[4:5] +; GFX1250-SDAG-NEXT: v_mad_nc_u64_u32 v[14:15], v6, v10, v[6:7] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_u32 v0, v5, v8, v13 +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v7, v10, v15 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_u32 v13, v4, v9, v0 +; GFX1250-SDAG-NEXT: v_mad_u32 v15, v6, v11, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_nc_u64_u32 v[0:1], v12, v4, v[12:13] +; GFX1250-SDAG-NEXT: v_mad_nc_u64_u32 v[2:3], v14, v6, v[14:15] +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v13, v4, v1 +; GFX1250-SDAG-NEXT: v_mad_u32 v3, v15, v6, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v12, v5, v1 +; GFX1250-SDAG-NEXT: v_mad_u32 v3, v14, v7, v3 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_v2i64: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[0:1], 1, v[0:1] +; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[2:3], 1, v[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_u64_e32 v[8:9], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: v_mul_u64_e32 v[10:11], v[2:3], v[6:7] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[0:1], v[8:9], v[0:1] +; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[2:3], v[10:11], v[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_u64_e32 v[0:1], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: v_mul_u64_e32 v[2:3], v[2:3], v[6:7] +; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[4:5], 1, v[8:9] +; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[6:7], 1, v[10:11] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_u64_e32 v[4:5], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: v_mul_u64_e32 v[6:7], v[2:3], v[6:7] +; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[0:1], 1, v[0:1] +; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[2:3], 1, v[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_u64_e32 v[0:1], v[4:5], v[0:1] +; GFX1250-GISEL-NEXT: v_mul_u64_e32 v[2:3], v[6:7], v[2:3] +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y18 = add <2 x i64> %x, %add = mul <2 x i64> %y18, %y @@ -6673,6 +7430,50 @@ define i32 @v_multi_use_mul_chain_add_other_use_all(i32 %arg, i32 %arg1, i32 %ar ; GFX1200-NEXT: s_wait_storecnt 0x0 ; GFX1200-NEXT: v_add_nc_u32_e32 v0, v5, v0 ; GFX1200-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all: +; GFX1250-SDAG: ; %bb.0: ; %bb +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_add_nc_u32 v0, 1, v0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v4, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v1, v0, v1 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v2 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v3, v1, v0 +; GFX1250-SDAG-NEXT: global_store_b32 v[4:5], v2, off scope:SCOPE_SYS +; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0 +; GFX1250-SDAG-NEXT: global_store_b32 v[4:5], v1, off scope:SCOPE_SYS +; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0 +; GFX1250-SDAG-NEXT: global_store_b32 v[4:5], v3, off scope:SCOPE_SYS +; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, v3, v0 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all: +; GFX1250-GISEL: ; %bb.0: ; %bb +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_mov_b32 v2, v3 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v3, v4 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v4, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, v4, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v4 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v1, v0 +; GFX1250-GISEL-NEXT: global_store_b32 v[2:3], v4, off scope:SCOPE_SYS +; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0 +; GFX1250-GISEL-NEXT: global_store_b32 v[2:3], v1, off scope:SCOPE_SYS +; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0 +; GFX1250-GISEL-NEXT: global_store_b32 v[2:3], v5, off scope:SCOPE_SYS +; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, v5, v0 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] bb: %i = add i32 %arg, 1 %i3 = mul i32 %i, %arg1 @@ -6906,6 +7707,46 @@ define i32 @v_multi_use_mul_chain_add_other_use_some(i32 %arg, i32 %arg1, i32 %a ; GFX1200-NEXT: s_wait_storecnt 0x0 ; GFX1200-NEXT: v_add_nc_u32_e32 v0, v5, v1 ; GFX1200-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some: +; GFX1250-SDAG: ; %bb.0: ; %bb +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_add_nc_u32 v0, 1, v0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v4, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v2 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v3, v0, v1 +; GFX1250-SDAG-NEXT: global_store_b32 v[4:5], v2, off scope:SCOPE_SYS +; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0 +; GFX1250-SDAG-NEXT: global_store_b32 v[4:5], v3, off scope:SCOPE_SYS +; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, v3, v1 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some: +; GFX1250-GISEL: ; %bb.0: ; %bb +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_mov_b32 v2, v3 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v3, v4 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v4, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, v4, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v4 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v0, v1 +; GFX1250-GISEL-NEXT: global_store_b32 v[2:3], v4, off scope:SCOPE_SYS +; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0 +; GFX1250-GISEL-NEXT: global_store_b32 v[2:3], v5, off scope:SCOPE_SYS +; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, v5, v1 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] bb: %i = add i32 %arg, 1 %i3 = mul i32 %i, %arg1 @@ -7235,6 +8076,60 @@ define i32 @clpeak_imad_pat_i32_x2(i32 %x, i32 %y) { ; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_i32_x2: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v2 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, v2, v1 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 +; GFX1250-SDAG-NEXT: v_add_nc_u32_e32 v1, v2, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v0, v2, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u32 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_i32_x2: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v1, v2, v1 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v1, v2, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1 +; GFX1250-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y38 = add i32 %x, 1 %add = mul i32 %y38, %y @@ -7806,6 +8701,84 @@ define <2 x i32> @clpeak_imad_pat_v2i32_x2(<2 x i32> %x, <2 x i32> %y) { ; GFX1200-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 ; GFX1200-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 ; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_v2i32_x2: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v0, v4, v0 :: v_dual_add_nc_u32 v1, v5, v1 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3 +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v2, 1, v4 :: v_dual_add_nc_u32 v3, 1, v5 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v2, v4, v2 :: v_dual_add_nc_u32 v3, v5, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1 +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v2, 1, v4 :: v_dual_add_nc_u32 v3, 1, v5 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_dual_add_nc_u32 v2, v4, v2 :: v_dual_add_nc_u32 v3, v5, v3 +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1 +; GFX1250-SDAG-NEXT: v_mad_u32 v2, v0, v4, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_u32 v3, v1, v5, v1 +; GFX1250-SDAG-NEXT: v_mad_u32 v0, v2, v0, v2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_mad_u32 v1, v3, v1, v3 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_v2i32_x2: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, v4, v0 :: v_dual_add_nc_u32 v1, v5, v1 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v2, 1, v4 :: v_dual_add_nc_u32 v3, 1, v5 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v2, v4, v2 :: v_dual_add_nc_u32 v3, v5, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v2, 1, v4 :: v_dual_add_nc_u32 v3, 1, v5 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v2, v4, v2 :: v_dual_add_nc_u32 v3, v5, v3 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v2, 1, v4 :: v_dual_add_nc_u32 v3, 1, v5 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3 +; GFX1250-GISEL-NEXT: v_dual_add_nc_u32 v0, 1, v0 :: v_dual_add_nc_u32 v1, 1, v1 +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %y38 = add <2 x i32> %x, %add = mul <2 x i32> %y38, %y @@ -8168,6 +9141,53 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) { ; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-GISEL-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_imad_pat_i16_x2: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_imad_pat_i16_x2: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX1250-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1250-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1250-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %conv69 = add i16 %x, 1 %add = mul i16 %conv69, %y @@ -8525,6 +9545,53 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) { ; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: clpeak_umad_pat_i16_x2: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: clpeak_umad_pat_i16_x2: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_add_nc_u16 v0, v0, 1 +; GFX1250-GISEL-NEXT: v_add_nc_u16 v2, v1, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1250-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1250-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1 +; GFX1250-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %conv69 = add i16 %x, 1 %add = mul i16 %conv69, %y @@ -8842,6 +9909,29 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) { ; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v3, v0 ; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX1200-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: clpeak_imad_pat_v2i16_x2: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_pk_mad_u16 v2, v0, v1, v0 +; GFX1250-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: v_pk_mul_lo_u16 v1, v2, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_pk_mad_u16 v2, v1, v0, v0 +; GFX1250-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: v_pk_mul_lo_u16 v1, v2, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_pk_mad_u16 v2, v1, v0, v0 +; GFX1250-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: v_pk_mul_lo_u16 v3, v2, v1 +; GFX1250-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v3, v0 +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %y38 = add <2 x i16> %x, %add = mul <2 x i16> %y38, %y @@ -9159,6 +10249,29 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) { ; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v3, v0 ; GFX1200-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX1200-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: clpeak_umad_pat_v2i16_x2: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_pk_mad_u16 v2, v0, v1, v0 +; GFX1250-NEXT: v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: v_pk_mul_lo_u16 v1, v2, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_pk_mad_u16 v2, v1, v0, v0 +; GFX1250-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: v_pk_mul_lo_u16 v1, v2, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_pk_mad_u16 v2, v1, v0, v0 +; GFX1250-NEXT: v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: v_pk_mul_lo_u16 v3, v2, v1 +; GFX1250-NEXT: v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v3, v0 +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %y38 = add <2 x i16> %x, %add = mul <2 x i16> %y38, %y @@ -9234,6 +10347,15 @@ define <2 x i32> @multi_use_mul_mad_i32_var(i32 %x, i32 %y, i32 %z0, i32 %z1) { ; GFX1200-NEXT: v_add_nc_u32_e32 v0, v1, v2 ; GFX1200-NEXT: v_add_nc_u32_e32 v1, v1, v3 ; GFX1200-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: multi_use_mul_mad_i32_var: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mul_lo_u32 v1, v0, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_dual_add_nc_u32 v0, v1, v2 :: v_dual_add_nc_u32 v1, v1, v3 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %mul = mul i32 %x, %y %add0 = add i32 %mul, %z0 @@ -9394,6 +10516,27 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) { ; GFX1200-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v2 ; GFX1200-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1 ; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-SDAG-LABEL: multi_use_mul_mad_i16_var: +; GFX1250-SDAG: ; %bb.0: ; %entry +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_mad_u16 v2, v0, v1, v2 +; GFX1250-SDAG-NEXT: v_mad_u16 v0, v0, v1, v3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_perm_b32 v0, v0, v2, 0x5040100 +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: multi_use_mul_mad_i16_var: +; GFX1250-GISEL: ; %bb.0: ; %entry +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2 +; GFX1250-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2 +; GFX1250-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] entry: %mul = mul i16 %x, %y %add0 = add i16 %mul, %z0 @@ -9465,6 +10608,17 @@ define i32 @other_use_mul_mad_i32_var(i32 %x, i32 %y, i32 %z, ptr addrspace(3) % ; GFX1200-NEXT: ds_store_b32 v3, v1 ; GFX1200-NEXT: s_wait_dscnt 0x0 ; GFX1200-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: other_use_mul_mad_i32_var: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mul_lo_u32 v1, v0, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_add_nc_u32_e32 v0, v1, v2 +; GFX1250-NEXT: ds_store_b32 v3, v1 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %mul = mul i32 %x, %y %add0 = add i32 %mul, %z @@ -9600,6 +10754,16 @@ define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) % ; GFX1200-GISEL-FAKE16-NEXT: ds_store_b16 v3, v4 ; GFX1200-GISEL-FAKE16-NEXT: s_wait_dscnt 0x0 ; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: other_use_mul_mad_i16_var: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mul_lo_u16 v4, v0, v1 +; GFX1250-NEXT: v_mad_u16 v0, v0, v1, v2 +; GFX1250-NEXT: ds_store_b16 v3, v4 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %mul = mul i16 %x, %y %add0 = add i16 %mul, %z @@ -9715,6 +10879,16 @@ define <4 x i16> @multi_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1200-NEXT: v_mov_b32_e32 v0, v2 ; GFX1200-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: multi_use_mul_mad_v2i16_var: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_mad_u16 v2, v0, v1, v2 +; GFX1250-NEXT: v_pk_mad_u16 v1, v0, v1, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-NEXT: v_mov_b32_e32 v0, v2 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %mul = mul <2 x i16> %x, %y %add0 = add <2 x i16> %mul, %z0 @@ -9842,6 +11016,16 @@ define <2 x i16> @other_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i ; GFX1200-NEXT: ds_store_b32 v3, v4 ; GFX1200-NEXT: s_wait_dscnt 0x0 ; GFX1200-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: other_use_mul_mad_v2i16_var: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_mul_lo_u16 v4, v0, v1 +; GFX1250-NEXT: v_pk_mad_u16 v0, v0, v1, v2 +; GFX1250-NEXT: ds_store_b32 v3, v4 +; GFX1250-NEXT: s_wait_dscnt 0x0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %mul = mul <2 x i16> %x, %y %add0 = add <2 x i16> %mul, %z @@ -9925,6 +11109,13 @@ define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) { ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; GFX1200-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3] ; GFX1200-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mul_u24_add64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y) %add = add i64 %mul, %z ret i64 %add @@ -9985,6 +11176,16 @@ define i64 @mul_u24_zext_add64(i32 %x, i32 %y, i64 %z) { ; GFX1200-NEXT: s_wait_alu 0xfffd ; GFX1200-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo ; GFX1200-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mul_u24_zext_add64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v5, 0 +; GFX1250-NEXT: v_mul_u32_u24_e32 v4, v0, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], v[4:5], v[2:3] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = call i32 @llvm.amdgcn.mul.u24(i32 %x, i32 %y) %mul.zext = zext i32 %mul to i64 %add = add i64 %mul.zext, %z diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll index ea8513fe0368e..c985e76422e97 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950-SDAG %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950-GISEL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-GISEL %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-SDAG,GFX1250-TRUE16,GFX1250-SDG-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-SDAG,GFX1250-FAKE16,GFX1250-SDG-FAKE16 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-GISEL,GFX1250-TRUE16,GFX1250-GISEL-TRUE16 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-GISEL,GFX1250-FAKE16,GFX1250-GISEL-FAKE16 %s declare i32 @llvm.amdgcn.bitop3.i32(i32, i32, i32, i32) declare i16 @llvm.amdgcn.bitop3.i16(i16, i16, i16, i32) @@ -26,23 +30,35 @@ define amdgpu_ps float @bitop3_b32_svv(i32 inreg %a, i32 %b, i32 %c) { } define amdgpu_ps float @bitop3_b32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) { -; GCN-LABEL: bitop3_b32_ssv: -; GCN: ; %bb.0: -; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: v_bitop3_b32 v0, s0, v1, v0 bitop3:0x11 -; GCN-NEXT: ; return to shader part epilog +; GFX950-LABEL: bitop3_b32_ssv: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v1, s1 +; GFX950-NEXT: v_bitop3_b32 v0, s0, v1, v0 bitop3:0x11 +; GFX950-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: bitop3_b32_ssv: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_bitop3_b32 v0, s0, s1, v0 bitop3:0x11 +; GFX1250-NEXT: ; return to shader part epilog %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 17) %ret_cast = bitcast i32 %ret to float ret float %ret_cast } define amdgpu_ps float @bitop3_b32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c) { -; GCN-LABEL: bitop3_b32_sss: -; GCN: ; %bb.0: -; GCN-NEXT: v_mov_b32_e32 v0, s1 -; GCN-NEXT: v_mov_b32_e32 v1, s2 -; GCN-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:0x12 -; GCN-NEXT: ; return to shader part epilog +; GFX950-LABEL: bitop3_b32_sss: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v0, s1 +; GFX950-NEXT: v_mov_b32_e32 v1, s2 +; GFX950-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:0x12 +; GFX950-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: bitop3_b32_sss: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_bitop3_b32 v0, s0, s1, v0 bitop3:0x12 +; GFX1250-NEXT: ; return to shader part epilog %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 18) %ret_cast = bitcast i32 %ret to float ret float %ret_cast @@ -60,6 +76,11 @@ define amdgpu_ps float @bitop3_b32_vvi(i32 %a, i32 %b) { ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8 ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x13 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: bitop3_b32_vvi: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_bitop3_b32 v0, v0, v1, 0x3e8 bitop3:0x13 +; GFX1250-NEXT: ; return to shader part epilog %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 1000, i32 19) %ret_cast = bitcast i32 %ret to float ret float %ret_cast @@ -79,6 +100,20 @@ define amdgpu_ps float @bitop3_b32_vii(i32 %a) { ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8 ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x14 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-LABEL: bitop3_b32_vii: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_movk_i32 s0, 0x7d0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_bitop3_b32 v0, v0, s0, 0x3e8 bitop3:0x14 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: bitop3_b32_vii: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e8 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_bitop3_b32 v0, v0, 0x7d0, v1 bitop3:0x14 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 2000, i32 1000, i32 20) %ret_cast = bitcast i32 %ret to float ret float %ret_cast @@ -102,49 +137,109 @@ define amdgpu_ps float @bitop3_b32_iii() { ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8 ; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x15 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDAG-LABEL: bitop3_b32_iii: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0x3e8 +; GFX1250-SDAG-NEXT: s_movk_i32 s0, 0xbb8 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_bitop3_b32 v0, s0, 0x7d0, v0 bitop3:0x15 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: bitop3_b32_iii: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, 0x7d0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e8 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_bitop3_b32 v0, 0xbb8, v0, v1 bitop3:0x15 +; GFX1250-GISEL-NEXT: ; return to shader part epilog %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 3000, i32 2000, i32 1000, i32 21) %ret_cast = bitcast i32 %ret to float ret float %ret_cast } define amdgpu_ps half @bitop3_b16_vvv(i16 %a, i16 %b, i16 %c) { -; GCN-LABEL: bitop3_b16_vvv: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0xf -; GCN-NEXT: ; return to shader part epilog +; GFX950-LABEL: bitop3_b16_vvv: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0xf +; GFX950-NEXT: ; return to shader part epilog +; +; GFX1250-TRUE16-LABEL: bitop3_b16_vvv: +; GFX1250-TRUE16: ; %bb.0: +; GFX1250-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v1.l, v2.l bitop3:0xf +; GFX1250-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-FAKE16-LABEL: bitop3_b16_vvv: +; GFX1250-FAKE16: ; %bb.0: +; GFX1250-FAKE16-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0xf +; GFX1250-FAKE16-NEXT: ; return to shader part epilog %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 15) %ret_cast = bitcast i16 %ret to half ret half %ret_cast } define amdgpu_ps half @bitop3_b16_svv(i16 inreg %a, i16 %b, i16 %c) { -; GCN-LABEL: bitop3_b16_svv: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x10 -; GCN-NEXT: ; return to shader part epilog +; GFX950-LABEL: bitop3_b16_svv: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x10 +; GFX950-NEXT: ; return to shader part epilog +; +; GFX1250-TRUE16-LABEL: bitop3_b16_svv: +; GFX1250-TRUE16: ; %bb.0: +; GFX1250-TRUE16-NEXT: v_bitop3_b16 v0.l, s0, v0.l, v1.l bitop3:0x10 +; GFX1250-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-FAKE16-LABEL: bitop3_b16_svv: +; GFX1250-FAKE16: ; %bb.0: +; GFX1250-FAKE16-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x10 +; GFX1250-FAKE16-NEXT: ; return to shader part epilog %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 16) %ret_cast = bitcast i16 %ret to half ret half %ret_cast } define amdgpu_ps half @bitop3_b16_ssv(i16 inreg %a, i16 inreg %b, i16 %c) { -; GCN-LABEL: bitop3_b16_ssv: -; GCN: ; %bb.0: -; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: v_bitop3_b16 v0, s0, v1, v0 bitop3:0x11 -; GCN-NEXT: ; return to shader part epilog +; GFX950-LABEL: bitop3_b16_ssv: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v1, s1 +; GFX950-NEXT: v_bitop3_b16 v0, s0, v1, v0 bitop3:0x11 +; GFX950-NEXT: ; return to shader part epilog +; +; GFX1250-TRUE16-LABEL: bitop3_b16_ssv: +; GFX1250-TRUE16: ; %bb.0: +; GFX1250-TRUE16-NEXT: v_bitop3_b16 v0.l, s0, s1, v0.l bitop3:0x11 +; GFX1250-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-FAKE16-LABEL: bitop3_b16_ssv: +; GFX1250-FAKE16: ; %bb.0: +; GFX1250-FAKE16-NEXT: v_bitop3_b16 v0, s0, s1, v0 bitop3:0x11 +; GFX1250-FAKE16-NEXT: ; return to shader part epilog %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 17) %ret_cast = bitcast i16 %ret to half ret half %ret_cast } define amdgpu_ps half @bitop3_b16_sss(i16 inreg %a, i16 inreg %b, i16 inreg %c) { -; GCN-LABEL: bitop3_b16_sss: -; GCN: ; %bb.0: -; GCN-NEXT: v_mov_b32_e32 v0, s1 -; GCN-NEXT: v_mov_b32_e32 v1, s2 -; GCN-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x12 -; GCN-NEXT: ; return to shader part epilog +; GFX950-LABEL: bitop3_b16_sss: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v0, s1 +; GFX950-NEXT: v_mov_b32_e32 v1, s2 +; GFX950-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x12 +; GFX950-NEXT: ; return to shader part epilog +; +; GFX1250-TRUE16-LABEL: bitop3_b16_sss: +; GFX1250-TRUE16: ; %bb.0: +; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v0.l, s2 +; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-TRUE16-NEXT: v_bitop3_b16 v0.l, s0, s1, v0.l bitop3:0x12 +; GFX1250-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-FAKE16-LABEL: bitop3_b16_sss: +; GFX1250-FAKE16: ; %bb.0: +; GFX1250-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-FAKE16-NEXT: v_bitop3_b16 v0, s0, s1, v0 bitop3:0x12 +; GFX1250-FAKE16-NEXT: ; return to shader part epilog %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 18) %ret_cast = bitcast i16 %ret to half ret half %ret_cast @@ -162,6 +257,16 @@ define amdgpu_ps half @bitop3_b16_vvi(i16 %a, i16 %b) { ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8 ; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x13 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-TRUE16-LABEL: bitop3_b16_vvi: +; GFX1250-TRUE16: ; %bb.0: +; GFX1250-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v1.l, 0x3e8 bitop3:0x13 +; GFX1250-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-FAKE16-LABEL: bitop3_b16_vvi: +; GFX1250-FAKE16: ; %bb.0: +; GFX1250-FAKE16-NEXT: v_bitop3_b16 v0, v0, v1, 0x3e8 bitop3:0x13 +; GFX1250-FAKE16-NEXT: ; return to shader part epilog %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 1000, i32 19) %ret_cast = bitcast i16 %ret to half ret half %ret_cast @@ -181,6 +286,34 @@ define amdgpu_ps half @bitop3_b16_vii(i16 %a) { ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8 ; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x14 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDG-TRUE16-LABEL: bitop3_b16_vii: +; GFX1250-SDG-TRUE16: ; %bb.0: +; GFX1250-SDG-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0x7d0 +; GFX1250-SDG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDG-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v1.l, 0x3e8 bitop3:0x14 +; GFX1250-SDG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-SDG-FAKE16-LABEL: bitop3_b16_vii: +; GFX1250-SDG-FAKE16: ; %bb.0: +; GFX1250-SDG-FAKE16-NEXT: s_movk_i32 s0, 0x7d0 +; GFX1250-SDG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDG-FAKE16-NEXT: v_bitop3_b16 v0, v0, s0, 0x3e8 bitop3:0x14 +; GFX1250-SDG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-TRUE16-LABEL: bitop3_b16_vii: +; GFX1250-GISEL-TRUE16: ; %bb.0: +; GFX1250-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x3e8 +; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, 0x7d0, v0.h bitop3:0x14 +; GFX1250-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-FAKE16-LABEL: bitop3_b16_vii: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0x3e8 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-FAKE16-NEXT: v_bitop3_b16 v0, v0, 0x7d0, v1 bitop3:0x14 +; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 2000, i16 1000, i32 20) %ret_cast = bitcast i16 %ret to half ret half %ret_cast @@ -203,6 +336,38 @@ define amdgpu_ps half @bitop3_b16_iii() { ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8 ; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x15 ; GFX950-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-SDG-TRUE16-LABEL: bitop3_b16_iii: +; GFX1250-SDG-TRUE16: ; %bb.0: +; GFX1250-SDG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7d0 +; GFX1250-SDG-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0xbb8 +; GFX1250-SDG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDG-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.h, v0.l, 0x3e8 bitop3:0x15 +; GFX1250-SDG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-SDG-FAKE16-LABEL: bitop3_b16_iii: +; GFX1250-SDG-FAKE16: ; %bb.0: +; GFX1250-SDG-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3e8 +; GFX1250-SDG-FAKE16-NEXT: s_movk_i32 s0, 0xbb8 +; GFX1250-SDG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1250-SDG-FAKE16-NEXT: v_bitop3_b16 v0, s0, 0x7d0, v0 bitop3:0x15 +; GFX1250-SDG-FAKE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-TRUE16-LABEL: bitop3_b16_iii: +; GFX1250-GISEL-TRUE16: ; %bb.0: +; GFX1250-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7d0 +; GFX1250-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x3e8 +; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-TRUE16-NEXT: v_bitop3_b16 v0.l, 0xbb8, v0.l, v0.h bitop3:0x15 +; GFX1250-GISEL-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-FAKE16-LABEL: bitop3_b16_iii: +; GFX1250-GISEL-FAKE16: ; %bb.0: +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, 0x7d0 +; GFX1250-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0x3e8 +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-FAKE16-NEXT: v_bitop3_b16 v0, 0xbb8, v0, v1 bitop3:0x15 +; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 3000, i16 2000, i16 1000, i32 21) %ret_cast = bitcast i16 %ret to half ret half %ret_cast diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll index d89e57245e8ea..25609e881254e 100644 --- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll +++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll @@ -5,6 +5,7 @@ ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s ; We want to undo these canonicalizations to enable mad matching: ; (x * y) + x --> x * (y + 1) @@ -36,6 +37,13 @@ define i32 @v_mul_add_1_i32(i32 %x, i32 %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i32 %y, 1 %mul = mul i32 %x, %add ret i32 %mul @@ -67,6 +75,13 @@ define i32 @v_mul_add_1_i32_commute(i32 %x, i32 %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i32_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i32 %y, 1 %mul = mul i32 %add, %x ret i32 %mul @@ -98,6 +113,13 @@ define i32 @v_mul_add_x_i32(i32 %x, i32 %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_x_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i32 %x, %y %add = add i32 %x, %mul ret i32 %add @@ -131,6 +153,15 @@ define i32 @v_mul_sub_1_i32(i32 %x, i32 %y) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, -1, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i32 %y, 1 %mul = mul i32 %x, %sub ret i32 %mul @@ -164,6 +195,15 @@ define i32 @v_mul_sub_1_i32_commute(i32 %x, i32 %y) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_i32_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, -1, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i32 %y, 1 %mul = mul i32 %sub, %x ret i32 %mul @@ -197,6 +237,15 @@ define i32 @v_mul_sub_x_i32(i32 %x, i32 %y) { ; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_x_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mul_lo_u32 v1, v0, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_sub_nc_u32_e32 v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i32 %x, %y %sub = sub i32 %mul, %x ret i32 %sub @@ -230,6 +279,15 @@ define i32 @v_mul_add_2_i32(i32 %x, i32 %y) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, 2, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_2_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, 2, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i32 %y, 2 %mul = mul i32 %x, %add ret i32 %mul @@ -263,6 +321,15 @@ define i32 @v_mul_sub_2_i32(i32 %x, i32 %y) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, -2, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_2_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, -2, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i32 %y, 2 %mul = mul i32 %x, %sub ret i32 %mul @@ -296,6 +363,15 @@ define i32 @v_mul_add_65_i32(i32 %x, i32 %y) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x41, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_65_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, 0x41, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i32 %y, 65 %mul = mul i32 %x, %add ret i32 %mul @@ -329,6 +405,15 @@ define i32 @v_mul_sub_65_i32(i32 %x, i32 %y) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_65_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i32 %y, 65 %mul = mul i32 %x, %sub ret i32 %mul @@ -362,6 +447,15 @@ define i24 @v_mul_add_1_i24_zext(i24 zeroext %x, i24 zeroext %y) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i24_zext: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, 1, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_u32_u24_e32 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i24 %y, 1 %mul = mul i24 %x, %add ret i24 %mul @@ -395,6 +489,15 @@ define i24 @v_mul_sub_1_i24_zext(i24 zeroext %x, i24 zeroext %y) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_i24_zext: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, -1, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_u32_u24_e32 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i24 %y, 1 %mul = mul i24 %x, %sub ret i24 %mul @@ -424,6 +527,13 @@ define i24 @v_add_mul_i24_zext_1(i24 zeroext %x, i24 zeroext %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_add_mul_i24_zext_1: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32_u24 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i24 %x, %y %add = add i24 %mul, %x ret i24 %add @@ -457,6 +567,15 @@ define i24 @v_mul_add_1_i24_sext(i24 signext %x, i24 signext %y) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i24_sext: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, 1, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_u32_u24_e32 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i24 %y, 1 %mul = mul i24 %x, %add ret i24 %mul @@ -486,6 +605,13 @@ define i24 @v_add_mul_i24_sext_1(i24 signext %x, i24 signext %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_add_mul_i24_sext_1: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32_u24 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i24 %x, %y %add = add i24 %mul, %x ret i24 %add @@ -519,6 +645,15 @@ define i24 @v_mul_sub_1_i24_sext(i24 signext %x, i24 signext %y) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_i24_sext: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, -1, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_u32_u24_e32 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i24 %y, 1 %mul = mul i24 %x, %sub ret i24 %mul @@ -550,6 +685,13 @@ define i25 @v_mul_add_1_i25_zext(i25 zeroext %x, i25 zeroext %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i25_zext: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i25 %y, 1 %mul = mul i25 %x, %add ret i25 %mul @@ -583,6 +725,15 @@ define i25 @v_mul_sub_1_i25_zext(i25 zeroext %x, i25 zeroext %y) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x1ffffff, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_i25_zext: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, 0x1ffffff, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i25 %y, 1 %mul = mul i25 %x, %sub ret i25 %mul @@ -614,6 +765,13 @@ define i25 @v_mul_add_1_i25_sext(i25 signext %x, i25 signext %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i25_sext: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i25 %y, 1 %mul = mul i25 %x, %add ret i25 %mul @@ -647,6 +805,15 @@ define i25 @v_mul_sub_1_i25_sext(i25 signext %x, i25 signext %y) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, 0x1ffffff, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_i25_sext: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, 0x1ffffff, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i25 %y, 1 %mul = mul i25 %x, %sub ret i25 %mul @@ -679,6 +846,13 @@ define i16 @v_mul_add_1_i16(i16 %x, i16 %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i16 %y, 1 %mul = mul i16 %x, %add ret i16 %mul @@ -713,6 +887,15 @@ define i32 @v_mul_add_1_i16_zext_result(i16 %x, i16 %y) { ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i16_zext_result: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i16 %y, 1 %mul = mul i16 %x, %add %zext = zext i16 %mul to i32 @@ -746,6 +929,13 @@ define i16 @v_mul_add_1_i16_commute(i16 %x, i16 %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i16_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i16 %y, 1 %mul = mul i16 %add, %x ret i16 %mul @@ -777,6 +967,13 @@ define i16 @v_mul_add_x_i16(i16 %x, i16 %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_x_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i16 %x, %y %add = add i16 %x, %mul ret i16 %add @@ -812,6 +1009,15 @@ define i16 @v_mul_sub_1_i16(i16 %x, i16 %y) { ; GFX10-NEXT: v_add_nc_u16 v1, v1, -1 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u16 v1, v1, -1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i16 %y, 1 %mul = mul i16 %x, %sub ret i16 %mul @@ -847,6 +1053,15 @@ define i16 @v_mul_sub_1_i16_commute(i16 %x, i16 %y) { ; GFX10-NEXT: v_add_nc_u16 v1, v1, -1 ; GFX10-NEXT: v_mul_lo_u16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_i16_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u16 v1, v1, -1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u16 v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i16 %y, 1 %mul = mul i16 %sub, %x ret i16 %mul @@ -882,6 +1097,15 @@ define i16 @v_mul_sub_x_i16(i16 %x, i16 %y) { ; GFX10-NEXT: v_mul_lo_u16 v1, v0, v1 ; GFX10-NEXT: v_sub_nc_u16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_x_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mul_lo_u16 v1, v0, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_sub_nc_u16 v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i16 %x, %y %sub = sub i16 %mul, %x ret i16 %sub @@ -917,6 +1141,15 @@ define i16 @v_mul_add_2_i16(i16 %x, i16 %y) { ; GFX10-NEXT: v_add_nc_u16 v1, v1, 2 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_2_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u16 v1, v1, 2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i16 %y, 2 %mul = mul i16 %x, %add ret i16 %mul @@ -952,6 +1185,15 @@ define i16 @v_mul_sub_2_i16(i16 %x, i16 %y) { ; GFX10-NEXT: v_add_nc_u16 v1, v1, -2 ; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_2_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u16 v1, v1, -2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i16 %y, 2 %mul = mul i16 %x, %sub ret i16 %mul @@ -1012,6 +1254,18 @@ define i64 @v_mul_add_1_i64(i64 %x, i64 %y) { ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[4:5], v0, v2, v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mad_u32 v1, v1, v2, v5 +; GFX1250-NEXT: v_mad_u32 v1, v0, v3, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1250-NEXT: v_mov_b32_e32 v0, v4 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i64 %y, 1 %mul = mul i64 %x, %add ret i64 %mul @@ -1072,6 +1326,18 @@ define i64 @v_mul_add_1_i64_commute(i64 %x, i64 %y) { ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i64_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[4:5], v0, v2, v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mad_u32 v1, v1, v2, v5 +; GFX1250-NEXT: v_mad_u32 v1, v0, v3, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1250-NEXT: v_mov_b32_e32 v0, v4 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i64 %y, 1 %mul = mul i64 %add, %x ret i64 %mul @@ -1132,6 +1398,18 @@ define i64 @v_mul_add_x_i64(i64 %x, i64 %y) { ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_x_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[4:5], v0, v2, v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mad_u32 v1, v1, v2, v5 +; GFX1250-NEXT: v_mad_u32 v1, v0, v3, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1250-NEXT: v_mov_b32_e32 v0, v4 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i64 %x, %y %add = add i64 %x, %mul ret i64 %add @@ -1198,6 +1476,15 @@ define i64 @v_mul_sub_1_i64(i64 %x, i64 %y) { ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, 0 ; GFX10-NEXT: v_add3_u32 v1, v1, v3, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], -1, v[2:3] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_u64_e32 v[0:1], v[0:1], v[2:3] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i64 %y, 1 %mul = mul i64 %x, %sub ret i64 %mul @@ -1264,6 +1551,15 @@ define i64 @v_mul_sub_1_i64_commute(i64 %x, i64 %y) { ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v2, v0, 0 ; GFX10-NEXT: v_add3_u32 v1, v1, v4, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_i64_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], -1, v[2:3] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_u64_e32 v[0:1], v[2:3], v[0:1] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i64 %y, 1 %mul = mul i64 %sub, %x ret i64 %mul @@ -1328,6 +1624,15 @@ define i64 @v_mul_sub_x_i64(i64 %x, i64 %y) { ; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v2, v0 ; GFX10-NEXT: v_sub_co_ci_u32_e64 v1, null, v3, v1, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_x_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mul_u64_e32 v[2:3], v[0:1], v[2:3] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_sub_nc_u64_e32 v[0:1], v[2:3], v[0:1] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i64 %x, %y %sub = sub i64 %mul, %x ret i64 %sub @@ -1394,6 +1699,15 @@ define i64 @v_mul_add_2_i64(i64 %x, i64 %y) { ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, 0 ; GFX10-NEXT: v_add3_u32 v1, v1, v3, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_2_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], 2, v[2:3] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_u64_e32 v[0:1], v[0:1], v[2:3] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i64 %y, 2 %mul = mul i64 %x, %add ret i64 %mul @@ -1460,6 +1774,15 @@ define i64 @v_mul_sub_2_i64(i64 %x, i64 %y) { ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, 0 ; GFX10-NEXT: v_add3_u32 v1, v1, v3, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_2_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], -2, v[2:3] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_u64_e32 v[0:1], v[0:1], v[2:3] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub i64 %y, 2 %mul = mul i64 %x, %sub ret i64 %mul @@ -1508,6 +1831,14 @@ define <2 x i32> @v_mul_add_1_i32_multiple(i32 %x, i32 %y, i32 %z) { ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v3, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, v3, v[2:3] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i32_multiple: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32 v0, v0, v1, v0 +; GFX1250-NEXT: v_mad_u32 v1, v2, v1, v2 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i32 %y, 1 %mul0 = mul i32 %x, %add %mul1 = mul i32 %z, %add @@ -1544,6 +1875,15 @@ define <2 x i32> @v_mul_add_1_i32_other_use(i32 %x, i32 %y, i32 %z) { ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i32_other_use: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, 1, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i32 %y, 1 %mul0 = mul i32 %x, %add %mul1 = mul i32 %z, %add @@ -1594,6 +1934,19 @@ define i32 @v_mul_add_1_i32_chain(i32 %arg0, i32 %arg1, i32 %arg2) { ; GFX10-NEXT: v_mul_lo_u32 v0, v2, v0 ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[0:1] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i32_chain: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v2, 1, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v1, v2, v1 +; GFX1250-NEXT: v_add_nc_u32_e32 v2, v1, v2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v0, v2, v0 +; GFX1250-NEXT: v_mad_u32 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %i2 = add i32 %arg0, 1 %i3 = mul i32 %i2, %arg1 %i4 = add i32 %i3, %i2 @@ -1640,6 +1993,15 @@ define <2 x i16> @v_mul_add_1_v2i16(<2 x i16> %x, <2 x i16> %y) { ; GFX10-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_v2i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add <2 x i16> %y, %mul = mul <2 x i16> %x, %add ret <2 x i16> %mul @@ -1683,6 +2045,15 @@ define <2 x i16> @v_mul_add_1_v2i16_commute(<2 x i16> %x, <2 x i16> %y) { ; GFX10-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_v2i16_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add <2 x i16> %y, %mul = mul <2 x i16> %add, %x ret <2 x i16> %mul @@ -1726,6 +2097,13 @@ define <2 x i16> @v_mul_add_x_v2i16(<2 x i16> %x, <2 x i16> %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_pk_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_x_v2i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_mad_u16 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul <2 x i16> %x, %y %add = add <2 x i16> %x, %mul ret <2 x i16> %add @@ -1769,6 +2147,15 @@ define <2 x i16> @v_mul_sub_1_v2i16(<2 x i16> %x, <2 x i16> %y) { ; GFX10-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_v2i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub <2 x i16> %y, %mul = mul <2 x i16> %x, %sub ret <2 x i16> %mul @@ -1812,6 +2199,15 @@ define <2 x i16> @v_mul_sub_1_v2i16_commute(<2 x i16> %x, <2 x i16> %y) { ; GFX10-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_v2i16_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_sub_i16 v1, v1, 1 op_sel_hi:[1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub <2 x i16> %y, %mul = mul <2 x i16> %sub, %x ret <2 x i16> %mul @@ -1858,6 +2254,15 @@ define <2 x i16> @v_mul_sub_x_v2i16(<2 x i16> %x, <2 x i16> %y) { ; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1 ; GFX10-NEXT: v_pk_sub_i16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_x_v2i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_mul_lo_u16 v1, v0, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_pk_sub_i16 v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul <2 x i16> %x, %y %sub = sub <2 x i16> %mul, %x ret <2 x i16> %sub @@ -1901,6 +2306,15 @@ define <2 x i16> @v_mul_add_2_v2i16(<2 x i16> %x, <2 x i16> %y) { ; GFX10-NEXT: v_pk_add_u16 v1, v1, 2 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_2_v2i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_add_u16 v1, v1, 2 op_sel_hi:[1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add <2 x i16> %y, %mul = mul <2 x i16> %x, %add ret <2 x i16> %mul @@ -1944,6 +2358,15 @@ define <2 x i16> @v_mul_sub_2_v2i16(<2 x i16> %x, <2 x i16> %y) { ; GFX10-NEXT: v_pk_sub_i16 v1, v1, 2 op_sel_hi:[1,0] ; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_2_v2i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_sub_i16 v1, v1, 2 op_sel_hi:[1,0] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_pk_mul_lo_u16 v0, v0, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub <2 x i16> %y, %mul = mul <2 x i16> %x, %sub ret <2 x i16> %mul @@ -1992,6 +2415,14 @@ define <2 x i32> @v_mul_add_1_v2i32(<2 x i32> %x, <2 x i32> %y) { ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2] ; GFX10-NEXT: v_mov_b32_e32 v0, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_v2i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32 v0, v0, v2, v0 +; GFX1250-NEXT: v_mad_u32 v1, v1, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add <2 x i32> %y, %mul = mul <2 x i32> %x, %add ret <2 x i32> %mul @@ -2040,6 +2471,14 @@ define <2 x i32> @v_mul_add_1_v2i32_commute(<2 x i32> %x, <2 x i32> %y) { ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2] ; GFX10-NEXT: v_mov_b32_e32 v0, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_v2i32_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32 v0, v0, v2, v0 +; GFX1250-NEXT: v_mad_u32 v1, v1, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add <2 x i32> %y, %mul = mul <2 x i32> %add, %x ret <2 x i32> %mul @@ -2088,6 +2527,14 @@ define <2 x i32> @v_mul_add_x_v2i32(<2 x i32> %x, <2 x i32> %y) { ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v1, v3, v[1:2] ; GFX10-NEXT: v_mov_b32_e32 v0, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_x_v2i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32 v0, v0, v2, v0 +; GFX1250-NEXT: v_mad_u32 v1, v1, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul <2 x i32> %x, %y %add = add <2 x i32> %x, %mul ret <2 x i32> %add @@ -2129,6 +2576,16 @@ define <2 x i32> @v_mul_sub_1_v2i32(<2 x i32> %x, <2 x i32> %y) { ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_v2i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_add_nc_u32 v2, -1, v2 :: v_dual_add_nc_u32 v3, -1, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v2 +; GFX1250-NEXT: v_mul_lo_u32 v1, v1, v3 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub <2 x i32> %y, %mul = mul <2 x i32> %x, %sub ret <2 x i32> %mul @@ -2170,6 +2627,16 @@ define <2 x i32> @v_mul_sub_1_v2i32_commute(<2 x i32> %x, <2 x i32> %y) { ; GFX10-NEXT: v_mul_lo_u32 v0, v2, v0 ; GFX10-NEXT: v_mul_lo_u32 v1, v3, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_v2i32_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_add_nc_u32 v2, -1, v2 :: v_dual_add_nc_u32 v3, -1, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mul_lo_u32 v0, v2, v0 +; GFX1250-NEXT: v_mul_lo_u32 v1, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub <2 x i32> %y, %mul = mul <2 x i32> %sub, %x ret <2 x i32> %mul @@ -2220,6 +2687,16 @@ define <2 x i32> @v_mul_sub_x_v2i32(<2 x i32> %x, <2 x i32> %y) { ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v2, v0 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, v3, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_x_v2i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mul_lo_u32 v2, v0, v2 +; GFX1250-NEXT: v_mul_lo_u32 v3, v1, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_dual_sub_nc_u32 v0, v2, v0 :: v_dual_sub_nc_u32 v1, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul <2 x i32> %x, %y %sub = sub <2 x i32> %mul, %x ret <2 x i32> %sub @@ -2261,6 +2738,16 @@ define <2 x i32> @v_mul_add_2_v2i32(<2 x i32> %x, <2 x i32> %y) { ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_2_v2i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_add_nc_u32 v2, 2, v2 :: v_dual_add_nc_u32 v3, 2, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v2 +; GFX1250-NEXT: v_mul_lo_u32 v1, v1, v3 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add <2 x i32> %y, %mul = mul <2 x i32> %x, %add ret <2 x i32> %mul @@ -2302,6 +2789,16 @@ define <2 x i32> @v_mul_sub_2_v2i32(<2 x i32> %x, <2 x i32> %y) { ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_2_v2i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_add_nc_u32 v2, -2, v2 :: v_dual_add_nc_u32 v3, -2, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mul_lo_u32 v0, v0, v2 +; GFX1250-NEXT: v_mul_lo_u32 v1, v1, v3 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub <2 x i32> %y, %mul = mul <2 x i32> %x, %sub ret <2 x i32> %mul @@ -2343,6 +2840,16 @@ define <2 x i24> @v_mul_add_1_v2i24(<2 x i24> %x, <2 x i24> %y) { ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_v2i24: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_add_nc_u32 v2, 1, v2 :: v_dual_add_nc_u32 v3, 1, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mul_u32_u24_e32 v0, v0, v2 +; GFX1250-NEXT: v_mul_u32_u24_e32 v1, v1, v3 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add <2 x i24> %y, %mul = mul <2 x i24> %x, %add ret <2 x i24> %mul @@ -2384,6 +2891,16 @@ define <2 x i24> @v_mul_add_1_v2i24_commute(<2 x i24> %x, <2 x i24> %y) { ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v2, v0 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v3, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_v2i24_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_add_nc_u32 v2, 1, v2 :: v_dual_add_nc_u32 v3, 1, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mul_u32_u24_e32 v0, v2, v0 +; GFX1250-NEXT: v_mul_u32_u24_e32 v1, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add <2 x i24> %y, %mul = mul <2 x i24> %add, %x ret <2 x i24> %mul @@ -2417,6 +2934,14 @@ define <2 x i24> @v_mul_add_x_v2i24(<2 x i24> %x, <2 x i24> %y) { ; GFX10-NEXT: v_mad_u32_u24 v0, v0, v2, v0 ; GFX10-NEXT: v_mad_u32_u24 v1, v1, v3, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_x_v2i24: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32_u24 v0, v0, v2, v0 +; GFX1250-NEXT: v_mad_u32_u24 v1, v1, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul <2 x i24> %x, %y %add = add <2 x i24> %x, %mul ret <2 x i24> %add @@ -2458,6 +2983,16 @@ define <2 x i24> @v_mul_sub_1_v2i24(<2 x i24> %x, <2 x i24> %y) { ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_v2i24: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_add_nc_u32 v2, -1, v2 :: v_dual_add_nc_u32 v3, -1, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mul_u32_u24_e32 v0, v0, v2 +; GFX1250-NEXT: v_mul_u32_u24_e32 v1, v1, v3 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub <2 x i24> %y, %mul = mul <2 x i24> %x, %sub ret <2 x i24> %mul @@ -2499,6 +3034,16 @@ define <2 x i24> @v_mul_sub_1_v2i24_commute(<2 x i24> %x, <2 x i24> %y) { ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v2, v0 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v3, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_1_v2i24_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_add_nc_u32 v2, -1, v2 :: v_dual_add_nc_u32 v3, -1, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mul_u32_u24_e32 v0, v2, v0 +; GFX1250-NEXT: v_mul_u32_u24_e32 v1, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub <2 x i24> %y, %mul = mul <2 x i24> %sub, %x ret <2 x i24> %mul @@ -2540,6 +3085,16 @@ define <2 x i24> @v_mul_sub_x_v2i24(<2 x i24> %x, <2 x i24> %y) { ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v2, v0 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, v3, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_x_v2i24: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mul_u32_u24_e32 v2, v0, v2 +; GFX1250-NEXT: v_mul_u32_u24_e32 v3, v1, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_dual_sub_nc_u32 v0, v2, v0 :: v_dual_sub_nc_u32 v1, v3, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul <2 x i24> %x, %y %sub = sub <2 x i24> %mul, %x ret <2 x i24> %sub @@ -2581,6 +3136,16 @@ define <2 x i24> @v_mul_add_2_v2i24(<2 x i24> %x, <2 x i24> %y) { ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_2_v2i24: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_add_nc_u32 v2, 2, v2 :: v_dual_add_nc_u32 v3, 2, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mul_u32_u24_e32 v0, v0, v2 +; GFX1250-NEXT: v_mul_u32_u24_e32 v1, v1, v3 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add <2 x i24> %y, %mul = mul <2 x i24> %x, %add ret <2 x i24> %mul @@ -2622,6 +3187,16 @@ define <2 x i24> @v_mul_sub_2_v2i24(<2 x i24> %x, <2 x i24> %y) { ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_u32_u24_e32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_sub_2_v2i24: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_add_nc_u32 v2, -2, v2 :: v_dual_add_nc_u32 v3, -2, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mul_u32_u24_e32 v0, v0, v2 +; GFX1250-NEXT: v_mul_u32_u24_e32 v1, v1, v3 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %sub = sub <2 x i24> %y, %mul = mul <2 x i24> %x, %sub ret <2 x i24> %mul @@ -2653,6 +3228,13 @@ define i32 @v_mul_9_add_52_i32(i32 %arg) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 9, 52 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_9_add_52_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32 v0, v0, 9, 52 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i32 %arg, 9 %add = add i32 %mul, 52 ret i32 %add @@ -2683,6 +3265,13 @@ define i16 @v_mul_9_add_52_i16(i16 %arg) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u16 v0, v0, 9, 52 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_9_add_52_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u16 v0, v0, 9, 52 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i16 %arg, 9 %add = add i16 %mul, 52 ret i16 %add @@ -2723,6 +3312,13 @@ define <2 x i16> @v_mul_9_add_52_v2i16(<2 x i16> %arg) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_pk_mad_u16 v0, v0, 9, 52 op_sel_hi:[1,0,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_9_add_52_v2i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_mad_u16 v0, v0, 9, 52 op_sel_hi:[1,0,0] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul <2 x i16> %arg, %add = add <2 x i16> %mul, ret <2 x i16> %add @@ -2781,6 +3377,16 @@ define i64 @v_mul_9_add_52_i64(i64 %arg) { ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 9, 52 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 9, v[1:2] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_9_add_52_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v2, v1 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v0, 9, 52 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mad_u32 v1, v2, 9, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i64 %arg, 9 %add = add i64 %mul, 52 ret i64 %add @@ -2812,6 +3418,13 @@ define i32 @v_mul_5_add_1_i32(i32 %arg) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 5, 1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_5_add_1_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u32 v0, v0, 5, 1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i32 %arg, 5 %add = add i32 %mul, 1 ret i32 %add @@ -2848,6 +3461,15 @@ define i32 @v_mul_284_add_82_i32(i32 %arg) { ; GFX10-NEXT: s_movk_i32 s4, 0x11c ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x52 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_284_add_82_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_movk_i32 s0, 0x11c +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mad_u32 v0, v0, s0, 0x52 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i32 %arg, 284 %add = add i32 %mul, 82 ret i32 %add @@ -2878,6 +3500,13 @@ define i16 @v_mul_5_add_1_i16(i16 %arg) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u16 v0, v0, 5, 1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_5_add_1_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u16 v0, v0, 5, 1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i16 %arg, 5 %add = add i16 %mul, 1 ret i16 %add @@ -2915,6 +3544,15 @@ define i16 @v_mul_284_add_82_i16(i16 %arg) { ; GFX10-NEXT: s_movk_i32 s4, 0x11c ; GFX10-NEXT: v_mad_u16 v0, v0, s4, 0x52 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_284_add_82_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_movk_i32 s0, 0x11c +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mad_u16 v0, v0, s0, 0x52 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i16 %arg, 284 %add = add i16 %mul, 82 ret i16 %add @@ -2955,6 +3593,13 @@ define <2 x i16> @v_mul_5_add_1_v2i16(<2 x i16> %arg) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_pk_mad_u16 v0, v0, 5, 1 op_sel_hi:[1,0,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_5_add_1_v2i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_pk_mad_u16 v0, v0, 5, 1 op_sel_hi:[1,0,0] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul <2 x i16> %arg, %add = add <2 x i16> %mul, ret <2 x i16> %add @@ -3002,6 +3647,15 @@ define <2 x i16> @v_mul_284_add_82_v2i16(<2 x i16> %arg) { ; GFX10-NEXT: s_movk_i32 s4, 0x11c ; GFX10-NEXT: v_pk_mad_u16 v0, v0, s4, 0x52 op_sel_hi:[1,0,0] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_284_add_82_v2i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_movk_i32 s0, 0x11c +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_pk_mad_u16 v0, v0, s0, 0x52 op_sel_hi:[1,0,0] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul <2 x i16> %arg, %add = add <2 x i16> %mul, ret <2 x i16> %add @@ -3060,6 +3714,16 @@ define i64 @v_mul_5_add_1_i64(i64 %arg) { ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 5, 1 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v2, 5, v[1:2] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_5_add_1_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mov_b32_e32 v2, v1 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v0, 5, 1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mad_u32 v1, v2, 5, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i64 %arg, 5 %add = add i64 %mul, 1 ret i64 %add @@ -3132,6 +3796,17 @@ define i64 @v_mul_284_add_82_i64(i64 %arg) { ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x52 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, 0x11c, v2, v[1:2] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_284_add_82_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_movk_i32 s0, 0x11c +; GFX1250-NEXT: v_mov_b32_e32 v2, v1 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v0, s0, 0x52 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mad_u32 v1, 0x11c, v2, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i64 %arg, 284 %add = add i64 %mul, 82 ret i64 %add @@ -3204,6 +3879,17 @@ define i64 @v_mul_934584645_add_8234599_i64(i64 %arg) { ; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s4, 0x7da667 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, 0x37b4a145, v2, v[1:2] ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_934584645_add_8234599_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_mov_b32 s0, 0x37b4a145 +; GFX1250-NEXT: v_mov_b32_e32 v2, v1 +; GFX1250-NEXT: v_mad_nc_u64_u32 v[0:1], v0, s0, 0x7da667 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_mad_u32 v1, 0x37b4a145, v2, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %mul = mul i64 %arg, 934584645 %add = add i64 %mul, 8234599 ret i64 %add @@ -3394,6 +4080,44 @@ define amdgpu_kernel void @compute_mad(ptr addrspace(4) %i18, ptr addrspace(4) % ; GFX10-NEXT: v_add_co_ci_u32_e64 v2, null, s5, v3, vcc_lo ; GFX10-NEXT: global_store_dword v[1:2], v0, off ; GFX10-NEXT: s_endpgm +; +; GFX1250-LABEL: compute_mad: +; GFX1250: ; %bb.0: ; %bb +; GFX1250-NEXT: s_load_b96 s[8:10], s[4:5], 0x10 +; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_add_co_i32 s0, s10, 1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: v_mul_lo_u32 v1, s0, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_dual_add_nc_u32 v2, s0, v1 :: v_dual_add_nc_u32 v1, 1, v1 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v3, v2, v1 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x4 +; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX1250-NEXT: v_add_nc_u32_e32 v1, v3, v1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_mul_lo_u32 v1, v1, v2 +; GFX1250-NEXT: v_add_nc_u32_e32 v2, 1, v3 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff +; GFX1250-NEXT: v_mul_lo_u32 v3, v1, v2 +; GFX1250-NEXT: v_mad_u32 v0, ttmp9, s2, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_add_nc_u32_e32 v2, v3, v2 +; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v1 +; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1] +; GFX1250-NEXT: v_mad_u32 v3, v2, v3, v2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[8:9] +; GFX1250-NEXT: v_mad_u32 v2, v3, v2, v3 +; GFX1250-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-NEXT: s_endpgm bb: %i = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 %i2 = add i32 %arg1, 1 @@ -3450,6 +4174,13 @@ define amdgpu_ps i32 @s_mul_add_1_i32(i32 inreg %x, i32 inreg %y) { ; GFX10-NEXT: s_add_i32 s1, s1, 1 ; GFX10-NEXT: s_mul_i32 s0, s0, s1 ; GFX10-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: s_mul_add_1_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_add_co_i32 s1, s1, 1 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_mul_i32 s0, s0, s1 +; GFX1250-NEXT: ; return to shader part epilog %add = add i32 %y, 1 %mul = mul i32 %x, %add ret i32 %mul @@ -3479,6 +4210,13 @@ define amdgpu_ps i32 @s_mul_add_1_i32_commute(i32 inreg %x, i32 inreg %y) { ; GFX10-NEXT: s_add_i32 s1, s1, 1 ; GFX10-NEXT: s_mul_i32 s0, s1, s0 ; GFX10-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: s_mul_add_1_i32_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_add_co_i32 s1, s1, 1 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_mul_i32 s0, s1, s0 +; GFX1250-NEXT: ; return to shader part epilog %add = add i32 %y, 1 %mul = mul i32 %add, %x ret i32 %mul @@ -3511,6 +4249,13 @@ define i8 @v_mul_add_1_i8(i8 %x, i8 %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i8: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i8 %y, 1 %mul = mul i8 %x, %add ret i8 %mul @@ -3543,6 +4288,13 @@ define i8 @v_mul_add_1_i8_commute(i8 %x, i8 %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i8_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i8 %y, 1 %mul = mul i8 %add, %x ret i8 %mul @@ -3574,6 +4326,13 @@ define i8 @v_mul_add_1_i8_zext(i8 zeroext %x, i8 zeroext %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i8_zext: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i8 %y, 1 %mul = mul i8 %x, %add ret i8 %mul @@ -3605,6 +4364,13 @@ define i8 @v_mul_add_1_i8_zext_commute(i8 zeroext %x, i8 zeroext %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mad_u16 v0, v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_i8_zext_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u16 v0, v0, v1, v0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add i8 %y, 1 %mul = mul i8 %add, %x ret i8 %mul @@ -3656,6 +4422,18 @@ define <2 x i8> @v_mul_add_1_v2i8(<2 x i8> %x, <2 x i8> %y) { ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_v2i8: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u16 v1, v1, v3, v1 +; GFX1250-NEXT: v_mad_u16 v0, v0, v2, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_lshlrev_b16 v2, 8, v1 +; GFX1250-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX1250-NEXT: v_bitop3_b16 v0, v0, v2, 0xff bitop3:0xec +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add <2 x i8> %y, %mul = mul <2 x i8> %x, %add ret <2 x i8> %mul @@ -3707,6 +4485,18 @@ define <2 x i8> @v_mul_add_1_v2i8_commute(<2 x i8> %x, <2 x i8> %y) { ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: v_mul_add_1_v2i8_commute: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_mad_u16 v1, v1, v3, v1 +; GFX1250-NEXT: v_mad_u16 v0, v0, v2, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_lshlrev_b16 v2, 8, v1 +; GFX1250-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX1250-NEXT: v_bitop3_b16 v0, v0, v2, 0xff bitop3:0xec +; GFX1250-NEXT: s_set_pc_i64 s[30:31] %add = add <2 x i8> %y, %mul = mul <2 x i8> %add, %x ret <2 x i8> %mul @@ -3749,6 +4539,17 @@ define i64 @mul_u24_with_uneven_operands(i32 %z) { ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v1, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mul_u24_with_uneven_operands: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_add_nc_u32_e32 v1, 1, v0 +; GFX1250-NEXT: v_mul_u32_u24_e32 v0, v1, v0 +; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %c = and i32 %z, 1 %d = add nuw nsw i32 %c, 1 @@ -3792,6 +4593,17 @@ define i64 @mul_u24_with_uneven_operands_swapped(i32 %z) { ; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mul_u24_with_uneven_operands_swapped: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_add_nc_u32_e32 v1, 1, v0 +; GFX1250-NEXT: v_mul_u32_u24_e32 v0, v0, v1 +; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %c = and i32 %z, 1 %d = add nuw nsw i32 %c, 1 @@ -3836,6 +4648,17 @@ define i64 @mul_i24_with_uneven_operands(i32 %z) { ; GFX10-NEXT: v_mul_i32_i24_e32 v0, v2, v1 ; GFX10-NEXT: v_mul_hi_i32_i24_e32 v1, v2, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mul_i24_with_uneven_operands: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_and_b32_e32 v1, 1, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_add_nc_u32_e32 v2, 1, v1 +; GFX1250-NEXT: v_mul_i32_i24_e32 v0, v2, v1 +; GFX1250-NEXT: v_mul_hi_i32_i24_e32 v1, v2, v1 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %c = and i32 %z, 1 %d = add nuw nsw i32 %c, 1 @@ -3879,6 +4702,17 @@ define i64 @mul_i24_with_uneven_operands_swapped(i32 %z) { ; GFX10-NEXT: v_mul_i32_i24_e32 v0, v1, v2 ; GFX10-NEXT: v_mul_hi_i32_i24_e32 v1, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX1250-LABEL: mul_i24_with_uneven_operands_swapped: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_and_b32_e32 v1, 1, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_add_nc_u32_e32 v2, 1, v1 +; GFX1250-NEXT: v_mul_i32_i24_e32 v0, v1, v2 +; GFX1250-NEXT: v_mul_hi_i32_i24_e32 v1, v1, v2 +; GFX1250-NEXT: s_set_pc_i64 s[30:31] entry: %c = and i32 %z, 1 %d = add nuw nsw i32 %c, 1 diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s index b67c6d570d217..8a00f8851d192 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s @@ -1,6 +1,108 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s +v_bitop3_b32 v5, v1, v2, s3 +// GFX1250: v_bitop3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x34,0xd6,0x01,0x05,0x0e,0x00] + +v_bitop3_b32 v5, v1, v2, s3 bitop3:161 +// GFX1250: v_bitop3_b32 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x34,0xd6,0x01,0x05,0x0e,0x30] + +v_bitop3_b32 v5, v255, s2, s105 bitop3:0x27 +// GFX1250: v_bitop3_b32 v5, v255, s2, s105 bitop3:0x27 ; encoding: [0x05,0x04,0x34,0xd6,0xff,0x05,0xa4,0xe1] + +v_bitop3_b32 v5, s1, v255, exec_hi bitop3:100 +// GFX1250: v_bitop3_b32 v5, s1, v255, exec_hi bitop3:0x64 ; encoding: [0x05,0x04,0x34,0xd6,0x01,0xfe,0xff,0x89] + +v_bitop3_b32 v5, s105, s105, exec_lo bitop3:0 +// GFX1250: v_bitop3_b32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x34,0xd6,0x69,0xd2,0xf8,0x01] + +v_bitop3_b32 v5, vcc_lo, ttmp15, v3 bitop3:0x15 +// GFX1250: v_bitop3_b32 v5, vcc_lo, ttmp15, v3 bitop3:0x15 ; encoding: [0x05,0x02,0x34,0xd6,0x6a,0xf6,0x0c,0xa4] + +v_bitop3_b32 v5, vcc_hi, 0xaf123456, v255 bitop3:63 +// GFX1250: v_bitop3_b32 v5, vcc_hi, 0xaf123456, v255 bitop3:0x3f ; encoding: [0x05,0x07,0x34,0xd6,0x6b,0xfe,0xfd,0xe7,0x56,0x34,0x12,0xaf] + +v_bitop3_b32 v5, ttmp15, src_scc, ttmp15 bitop3:0x24 +// GFX1250: v_bitop3_b32 v5, ttmp15, src_scc, ttmp15 bitop3:0x24 ; encoding: [0x05,0x04,0x34,0xd6,0x7b,0xfa,0xed,0x81] + +v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 +// GFX1250: v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 ; encoding: [0x05,0x00,0x34,0xd6,0x7d,0xe0,0xf5,0xa1] + +v_bitop3_b32 v5, exec_lo, -1, vcc_hi bitop3:6 +// GFX1250: v_bitop3_b32 v5, exec_lo, -1, vcc_hi bitop3:6 ; encoding: [0x05,0x00,0x34,0xd6,0x7e,0x82,0xad,0xc1] + +v_bitop3_b32 v5, exec_hi, null, vcc_lo bitop3:77 +// GFX1250: v_bitop3_b32 v5, exec_hi, null, vcc_lo bitop3:0x4d ; encoding: [0x05,0x01,0x34,0xd6,0x7f,0xf8,0xa8,0xa9] + +v_bitop3_b32 v5, null, exec_lo, 0xaf123456 bitop3:88 +// GFX1250: v_bitop3_b32 v5, null, exec_lo, 0xaf123456 bitop3:0x58 ; encoding: [0x05,0x03,0x34,0xd6,0x7c,0xfc,0xfc,0x0b,0x56,0x34,0x12,0xaf] + +v_bitop3_b32 v5, -1, exec_hi, src_scc bitop3:99 +// GFX1250: v_bitop3_b32 v5, -1, exec_hi, src_scc bitop3:0x63 ; encoding: [0x05,0x04,0x34,0xd6,0xc1,0xfe,0xf4,0x6b] + +v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:101 +// GFX1250: v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:0x65 ; encoding: [0x05,0x04,0x34,0xd6,0xf0,0xfa,0xc0,0xab] + +v_bitop3_b32 v5, src_scc, vcc_lo, -1 bitop3:102 +// GFX1250: v_bitop3_b32 v5, src_scc, vcc_lo, -1 bitop3:0x66 ; encoding: [0x05,0x04,0x34,0xd6,0xfd,0xd4,0x04,0xcb] + +v_bitop3_b32 v255, 0xaf123456, vcc_hi, null bitop3:103 +// GFX1250: v_bitop3_b32 v255, 0xaf123456, vcc_hi, null bitop3:0x67 ; encoding: [0xff,0x04,0x34,0xd6,0xff,0xd6,0xf0,0xe9,0x56,0x34,0x12,0xaf] + +v_bitop3_b16 v5, v1, v2, s3 +// GFX1250: v_bitop3_b16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x33,0xd6,0x01,0x05,0x0e,0x00] + +v_bitop3_b16 v5, v1, v2, s3 bitop3:161 +// GFX1250: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd6,0x01,0x05,0x0e,0x30] + +v_bitop3_b16 v5, v255, s2, s105 bitop3:0x27 +// GFX1250: v_bitop3_b16 v5, v255, s2, s105 bitop3:0x27 ; encoding: [0x05,0x04,0x33,0xd6,0xff,0x05,0xa4,0xe1] + +v_bitop3_b16 v5, s1, v255, exec_hi bitop3:100 +// GFX1250: v_bitop3_b16 v5, s1, v255, exec_hi bitop3:0x64 ; encoding: [0x05,0x04,0x33,0xd6,0x01,0xfe,0xff,0x89] + +v_bitop3_b16 v5, s105, s105, exec_lo bitop3:0 +// GFX1250: v_bitop3_b16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x33,0xd6,0x69,0xd2,0xf8,0x01] + +v_bitop3_b16 v5, vcc_lo, ttmp15, v3 bitop3:0x15 +// GFX1250: v_bitop3_b16 v5, vcc_lo, ttmp15, v3 bitop3:0x15 ; encoding: [0x05,0x02,0x33,0xd6,0x6a,0xf6,0x0c,0xa4] + +v_bitop3_b16 v5, vcc_hi, 0xfe0b, v255 bitop3:63 +// GFX1250: v_bitop3_b16 v5, vcc_hi, 0xfe0b, v255 bitop3:0x3f ; encoding: [0x05,0x07,0x33,0xd6,0x6b,0xfe,0xfd,0xe7,0x0b,0xfe,0x00,0x00] + +v_bitop3_b16 v5, ttmp15, src_scc, ttmp15 bitop3:0x24 +// GFX1250: v_bitop3_b16 v5, ttmp15, src_scc, ttmp15 bitop3:0x24 ; encoding: [0x05,0x04,0x33,0xd6,0x7b,0xfa,0xed,0x81] + +v_bitop3_b16 v5, m0, 0.5, m0 bitop3:5 +// GFX1250: v_bitop3_b16 v5, m0, 0.5, m0 bitop3:5 ; encoding: [0x05,0x00,0x33,0xd6,0x7d,0xe0,0xf5,0xa1] + +v_bitop3_b16 v5, exec_lo, -1, vcc_hi bitop3:6 +// GFX1250: v_bitop3_b16 v5, exec_lo, -1, vcc_hi bitop3:6 ; encoding: [0x05,0x00,0x33,0xd6,0x7e,0x82,0xad,0xc1] + +v_bitop3_b16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX1250: v_bitop3_b16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x33,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bitop3_b16 v5, exec_hi, null, vcc_lo bitop3:0x77 op_sel:[1,1,1,1] +// GFX1250: v_bitop3_b16 v5, exec_hi, null, vcc_lo bitop3:0x77 op_sel:[1,1,1,1] ; encoding: [0x05,0x7e,0x33,0xd6,0x7f,0xf8,0xa8,0xe9] + +v_bitop3_b16 v5, null, exec_lo, 0xfe0b bitop3:0x88 op_sel:[0,0,0,0] +// GFX1250: v_bitop3_b16 v5, null, exec_lo, 0xfe0b bitop3:0x88 ; encoding: [0x05,0x01,0x33,0xd6,0x7c,0xfc,0xfc,0x13,0x0b,0xfe,0x00,0x00] + +v_bitop3_b16 v5, -1, exec_hi, src_scc bitop3:99 op_sel:[1,0,0,0] +// GFX1250: v_bitop3_b16 v5, -1, exec_hi, src_scc bitop3:0x63 op_sel:[1,0,0,0] ; encoding: [0x05,0x0c,0x33,0xd6,0xc1,0xfe,0xf4,0x6b] + +v_bitop3_b16 v5, 0.5, m0, 0.5 bitop3:101 op_sel:[0,1,0,0] +// GFX1250: v_bitop3_b16 v5, 0.5, m0, 0.5 bitop3:0x65 op_sel:[0,1,0,0] ; encoding: [0x05,0x14,0x33,0xd6,0xf0,0xfa,0xc0,0xab] + +v_bitop3_b16 v5, src_scc, vcc_lo, -1 bitop3:102 op_sel:[0,0,1,0] +// GFX1250: v_bitop3_b16 v5, src_scc, vcc_lo, -1 bitop3:0x66 op_sel:[0,0,1,0] ; encoding: [0x05,0x24,0x33,0xd6,0xfd,0xd4,0x04,0xcb] + +v_bitop3_b16 v255, 0xfe0b, vcc_hi, null bitop3:103 op_sel:[0,0,0,1] +// GFX1250: v_bitop3_b16 v255, 0xfe0b, vcc_hi, null bitop3:0x67 op_sel:[0,0,0,1] ; encoding: [0xff,0x44,0x33,0xd6,0xff,0xd6,0xf0,0xe9,0x0b,0xfe,0x00,0x00] + +v_bitop3_b16 v1, v2, v3, v4 bitop3:103 op_sel:[1,1,1,1] +// GFX1250: v_bitop3_b16 v1, v2, v3, v4 bitop3:0x67 op_sel:[1,1,1,1] ; encoding: [0x01,0x7c,0x33,0xd6,0x02,0x07,0x12,0xec] + v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] // GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s index 5157020fcc675..d9561fadb20d0 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s @@ -1,6 +1,108 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s +v_bitop3_b32 v5, v1, v2, s3 +// GFX1250: v_bitop3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x34,0xd6,0x01,0x05,0x0e,0x00] + +v_bitop3_b32 v5, v1, v2, s3 bitop3:161 +// GFX1250: v_bitop3_b32 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x34,0xd6,0x01,0x05,0x0e,0x30] + +v_bitop3_b32 v5, v255, s2, s105 bitop3:0x27 +// GFX1250: v_bitop3_b32 v5, v255, s2, s105 bitop3:0x27 ; encoding: [0x05,0x04,0x34,0xd6,0xff,0x05,0xa4,0xe1] + +v_bitop3_b32 v5, s1, v255, exec_hi bitop3:100 +// GFX1250: v_bitop3_b32 v5, s1, v255, exec_hi bitop3:0x64 ; encoding: [0x05,0x04,0x34,0xd6,0x01,0xfe,0xff,0x89] + +v_bitop3_b32 v5, s105, s105, exec_lo bitop3:0 +// GFX1250: v_bitop3_b32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x34,0xd6,0x69,0xd2,0xf8,0x01] + +v_bitop3_b32 v5, vcc_lo, ttmp15, v3 bitop3:0x15 +// GFX1250: v_bitop3_b32 v5, vcc_lo, ttmp15, v3 bitop3:0x15 ; encoding: [0x05,0x02,0x34,0xd6,0x6a,0xf6,0x0c,0xa4] + +v_bitop3_b32 v5, vcc_hi, 0xaf123456, v255 bitop3:63 +// GFX1250: v_bitop3_b32 v5, vcc_hi, 0xaf123456, v255 bitop3:0x3f ; encoding: [0x05,0x07,0x34,0xd6,0x6b,0xfe,0xfd,0xe7,0x56,0x34,0x12,0xaf] + +v_bitop3_b32 v5, ttmp15, src_scc, ttmp15 bitop3:0x24 +// GFX1250: v_bitop3_b32 v5, ttmp15, src_scc, ttmp15 bitop3:0x24 ; encoding: [0x05,0x04,0x34,0xd6,0x7b,0xfa,0xed,0x81] + +v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 +// GFX1250: v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 ; encoding: [0x05,0x00,0x34,0xd6,0x7d,0xe0,0xf5,0xa1] + +v_bitop3_b32 v5, exec_lo, -1, vcc_hi bitop3:6 +// GFX1250: v_bitop3_b32 v5, exec_lo, -1, vcc_hi bitop3:6 ; encoding: [0x05,0x00,0x34,0xd6,0x7e,0x82,0xad,0xc1] + +v_bitop3_b32 v5, exec_hi, null, vcc_lo bitop3:77 +// GFX1250: v_bitop3_b32 v5, exec_hi, null, vcc_lo bitop3:0x4d ; encoding: [0x05,0x01,0x34,0xd6,0x7f,0xf8,0xa8,0xa9] + +v_bitop3_b32 v5, null, exec_lo, 0xaf123456 bitop3:88 +// GFX1250: v_bitop3_b32 v5, null, exec_lo, 0xaf123456 bitop3:0x58 ; encoding: [0x05,0x03,0x34,0xd6,0x7c,0xfc,0xfc,0x0b,0x56,0x34,0x12,0xaf] + +v_bitop3_b32 v5, -1, exec_hi, src_scc bitop3:99 +// GFX1250: v_bitop3_b32 v5, -1, exec_hi, src_scc bitop3:0x63 ; encoding: [0x05,0x04,0x34,0xd6,0xc1,0xfe,0xf4,0x6b] + +v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:101 +// GFX1250: v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:0x65 ; encoding: [0x05,0x04,0x34,0xd6,0xf0,0xfa,0xc0,0xab] + +v_bitop3_b32 v5, src_scc, vcc_lo, -1 bitop3:102 +// GFX1250: v_bitop3_b32 v5, src_scc, vcc_lo, -1 bitop3:0x66 ; encoding: [0x05,0x04,0x34,0xd6,0xfd,0xd4,0x04,0xcb] + +v_bitop3_b32 v255, 0xaf123456, vcc_hi, null bitop3:103 +// GFX1250: v_bitop3_b32 v255, 0xaf123456, vcc_hi, null bitop3:0x67 ; encoding: [0xff,0x04,0x34,0xd6,0xff,0xd6,0xf0,0xe9,0x56,0x34,0x12,0xaf] + +v_bitop3_b16 v5.l, v1.l, v2.l, s3 +// GFX1250: v_bitop3_b16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x33,0xd6,0x01,0x05,0x0e,0x00] + +v_bitop3_b16 v5, v1, v2, s3 bitop3:161 +// GFX1250: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd6,0x01,0x05,0x0e,0x30] + +v_bitop3_b16 v5, v255, s2, s105 bitop3:0x27 +// GFX1250: v_bitop3_b16 v5, v255, s2, s105 bitop3:0x27 ; encoding: [0x05,0x04,0x33,0xd6,0xff,0x05,0xa4,0xe1] + +v_bitop3_b16 v5, s1, v255, exec_hi bitop3:100 +// GFX1250: v_bitop3_b16 v5, s1, v255, exec_hi bitop3:0x64 ; encoding: [0x05,0x04,0x33,0xd6,0x01,0xfe,0xff,0x89] + +v_bitop3_b16 v5, s105, s105, exec_lo bitop3:0 +// GFX1250: v_bitop3_b16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x33,0xd6,0x69,0xd2,0xf8,0x01] + +v_bitop3_b16 v5, vcc_lo, ttmp15, v3 bitop3:0x15 +// GFX1250: v_bitop3_b16 v5, vcc_lo, ttmp15, v3 bitop3:0x15 ; encoding: [0x05,0x02,0x33,0xd6,0x6a,0xf6,0x0c,0xa4] + +v_bitop3_b16 v5, vcc_hi, 0xfe0b, v255 bitop3:63 +// GFX1250: v_bitop3_b16 v5, vcc_hi, 0xfe0b, v255 bitop3:0x3f ; encoding: [0x05,0x07,0x33,0xd6,0x6b,0xfe,0xfd,0xe7,0x0b,0xfe,0x00,0x00] + +v_bitop3_b16 v5, ttmp15, src_scc, ttmp15 bitop3:0x24 +// GFX1250: v_bitop3_b16 v5, ttmp15, src_scc, ttmp15 bitop3:0x24 ; encoding: [0x05,0x04,0x33,0xd6,0x7b,0xfa,0xed,0x81] + +v_bitop3_b16 v5, m0, 0.5, m0 bitop3:5 +// GFX1250: v_bitop3_b16 v5, m0, 0.5, m0 bitop3:5 ; encoding: [0x05,0x00,0x33,0xd6,0x7d,0xe0,0xf5,0xa1] + +v_bitop3_b16 v5, exec_lo, -1, vcc_hi bitop3:6 +// GFX1250: v_bitop3_b16 v5, exec_lo, -1, vcc_hi bitop3:6 ; encoding: [0x05,0x00,0x33,0xd6,0x7e,0x82,0xad,0xc1] + +v_bitop3_b16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] +// GFX1250: v_bitop3_b16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x33,0xd6,0x7f,0xf8,0xa8,0x01] + +v_bitop3_b16 v5.h, exec_hi, null, vcc_lo bitop3:0x77 op_sel:[1,1,1,1] +// GFX1250: v_bitop3_b16 v5.h, exec_hi, null, vcc_lo bitop3:0x77 op_sel:[1,1,1,1] ; encoding: [0x05,0x7e,0x33,0xd6,0x7f,0xf8,0xa8,0xe9] + +v_bitop3_b16 v5.l, null, exec_lo, 0xfe0b bitop3:0x88 +// GFX1250: v_bitop3_b16 v5.l, null, exec_lo, 0xfe0b bitop3:0x88 ; encoding: [0x05,0x01,0x33,0xd6,0x7c,0xfc,0xfc,0x13,0x0b,0xfe,0x00,0x00] + +v_bitop3_b16 v5.l, -1, exec_hi, src_scc bitop3:99 +// GFX1250: v_bitop3_b16 v5.l, -1, exec_hi, src_scc bitop3:0x63 ; encoding: [0x05,0x04,0x33,0xd6,0xc1,0xfe,0xf4,0x6b] + +v_bitop3_b16 v5.l, 0.5, m0, 0.5 bitop3:101 op_sel:[0,1,0,0] +// GFX1250: v_bitop3_b16 v5.l, 0.5, m0, 0.5 bitop3:0x65 op_sel:[0,1,0,0] ; encoding: [0x05,0x14,0x33,0xd6,0xf0,0xfa,0xc0,0xab] + +v_bitop3_b16 v5.l, src_scc, vcc_lo, -1 bitop3:102 op_sel:[0,0,1,0] +// GFX1250: v_bitop3_b16 v5.l, src_scc, vcc_lo, -1 bitop3:0x66 op_sel:[0,0,1,0] ; encoding: [0x05,0x24,0x33,0xd6,0xfd,0xd4,0x04,0xcb] + +v_bitop3_b16 v255.h, 0xfe0b, vcc_hi, null bitop3:103 op_sel:[0,0,0,1] +// GFX1250: v_bitop3_b16 v255.h, 0xfe0b, vcc_hi, null bitop3:0x67 op_sel:[0,0,0,1] ; encoding: [0xff,0x44,0x33,0xd6,0xff,0xd6,0xf0,0xe9,0x0b,0xfe,0x00,0x00] + +v_bitop3_b16 v1.h, v2.h, v3.h, v4.h bitop3:103 +// GFX1250: v_bitop3_b16 v1.h, v2.h, v3.h, v4.h bitop3:0x67 op_sel:[1,1,1,1] ; encoding: [0x01,0x7c,0x33,0xd6,0x02,0x07,0x12,0xec] + v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] // GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s index bc910b9dd18e9..d9c7645543db2 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s @@ -2,6 +2,150 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:161 quad_perm:[0,1,2,3] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0xa1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0x0e,0x34,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0x27 row_mirror +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0x27 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0x0e,0xe4,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, v255 bitop3:100 row_half_mirror +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v255 bitop3:0x64 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0xfe,0x8f,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, s105 bitop3:0 row_shl:1 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi bitop3:0x15 row_shl:15 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi bitop3:0x15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x34,0xd6,0xfa,0x04,0xae,0xa1,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo bitop3:63 row_shr:1 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo bitop3:0x3f row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x07,0x34,0xd6,0xfa,0x04,0xaa,0xe1,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 bitop3:0x24 row_shr:15 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 bitop3:0x24 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi bitop3:5 row_ror:1 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi bitop3:5 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo bitop3:6 row_ror:15 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo bitop3:6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, null bitop3:77 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, null bitop3:0x4d row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x34,0xd6,0xfa,0x04,0xf2,0xa9,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, -1 bitop3:88 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, -1 bitop3:0x58 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x03,0x34,0xd6,0xfa,0x04,0x06,0x0b,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 bitop3:99 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 bitop3:0x63 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0xc2,0x6b,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v255, v255, v255, src_scc bitop3:101 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_bitop3_b32_e64_dpp v255, v255, v255, src_scc bitop3:0x65 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x04,0x34,0xd6,0xfa,0xfe,0xf7,0xab,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:161 quad_perm:[0,1,2,3] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0xa1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x34,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x27 row_mirror +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x27 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0xe4,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:100 row_half_mirror +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x64 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x8c,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v255 bitop3:0 row_shl:1 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, s105 bitop3:0x16 row_shl:15 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, s105 bitop3:0x16 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x33,0xd6,0xfa,0x04,0xa6,0xc1,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, vcc_hi bitop3:63 row_shr:1 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, vcc_hi bitop3:0x3f row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x07,0x33,0xd6,0xfa,0x04,0xae,0xe1,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, vcc_lo bitop3:0x24 row_shr:15 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, vcc_lo bitop3:0x24 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0xaa,0x81,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, ttmp15 bitop3:5 row_ror:1 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, ttmp15 bitop3:5 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xee,0xa1,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:6 row_ror:15 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0xc1,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:77 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:0x4d row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x33,0xd6,0xfa,0x04,0xfa,0xa9,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:88 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:0x58 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x03,0x33,0xd6,0xfa,0x04,0xf2,0x09,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:99 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:0x63 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x06,0x6b,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:101 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:0x65 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x04,0x33,0xd6,0xfa,0xfe,0xf7,0xab,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x33,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:102 op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:0x66 op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0xfe,0xc9,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:103 op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:0x67 op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x33,0xd6,0xfa,0x04,0xfa,0xe9,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:104 op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:0x68 op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x33,0xd6,0xfa,0x04,0xf2,0x09,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:104 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:0x68 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x25,0x33,0xd6,0xfa,0x04,0x06,0x0b,0x01,0x60,0x01,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:104 op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX1250: v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:0x68 op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x45,0x33,0xd6,0xfa,0xfe,0xf7,0x0b,0xff,0x6f,0x0d,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:102 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x66 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0x0e,0xcc,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s index 3bb84e264cf76..ccf50b207878b 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s @@ -2,6 +2,150 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:161 quad_perm:[0,1,2,3] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0xa1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0x0e,0x34,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0x27 row_mirror +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0x27 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0x0e,0xe4,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, v255 bitop3:100 row_half_mirror +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v255 bitop3:0x64 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0xfe,0x8f,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, s105 bitop3:0 row_shl:1 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi bitop3:0x15 row_shl:15 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi bitop3:0x15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x34,0xd6,0xfa,0x04,0xae,0xa1,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo bitop3:63 row_shr:1 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo bitop3:0x3f row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x07,0x34,0xd6,0xfa,0x04,0xaa,0xe1,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 bitop3:0x24 row_shr:15 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 bitop3:0x24 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi bitop3:5 row_ror:1 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi bitop3:5 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo bitop3:6 row_ror:15 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo bitop3:6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, null bitop3:77 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, null bitop3:0x4d row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x34,0xd6,0xfa,0x04,0xf2,0xa9,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, -1 bitop3:88 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, -1 bitop3:0x58 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x03,0x34,0xd6,0xfa,0x04,0x06,0x0b,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 bitop3:99 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 bitop3:0x63 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0xc2,0x6b,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v255, v255, v255, src_scc bitop3:101 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_bitop3_b32_e64_dpp v255, v255, v255, src_scc bitop3:0x65 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x04,0x34,0xd6,0xfa,0xfe,0xf7,0xab,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:161 quad_perm:[0,1,2,3] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0xa1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x34,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x27 row_mirror +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x27 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0xe4,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:100 row_half_mirror +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x64 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x8c,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v255 bitop3:0 row_shl:1 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, s105 bitop3:0x16 row_shl:15 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, s105 bitop3:0x16 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x33,0xd6,0xfa,0x04,0xa6,0xc1,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, vcc_hi bitop3:63 row_shr:1 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, vcc_hi bitop3:0x3f row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x07,0x33,0xd6,0xfa,0x04,0xae,0xe1,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, vcc_lo bitop3:0x24 row_shr:15 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, vcc_lo bitop3:0x24 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0xaa,0x81,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, ttmp15 bitop3:5 row_ror:1 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, ttmp15 bitop3:5 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xee,0xa1,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:6 row_ror:15 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0xc1,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:77 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:0x4d row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x33,0xd6,0xfa,0x04,0xfa,0xa9,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:88 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:0x58 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x03,0x33,0xd6,0xfa,0x04,0xf2,0x09,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:99 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:0x63 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x06,0x6b,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:101 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:0x65 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x04,0x33,0xd6,0xfa,0xfe,0xf7,0xab,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX1250: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x33,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi bitop3:102 op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX1250: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi bitop3:0x66 op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0xfe,0xc9,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.l, v1.h, v2.l, exec_lo bitop3:103 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_bitop3_b16_e64_dpp v5.l, v1.h, v2.l, exec_lo bitop3:0x67 op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x33,0xd6,0xfa,0x04,0xfa,0xe9,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.h, null bitop3:104 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.h, null bitop3:0x68 op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x33,0xd6,0xfa,0x04,0xf2,0x09,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 bitop3:104 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX1250: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 bitop3:0x68 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x25,0x33,0xd6,0xfa,0x04,0x06,0x0b,0x01,0x60,0x01,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v255.h, v255.l, v255.l, src_scc bitop3:104 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX1250: v_bitop3_b16_e64_dpp v255.h, v255.l, v255.l, src_scc bitop3:0x68 op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x45,0x33,0xd6,0xfa,0xfe,0xf7,0x0b,0xff,0x6f,0x0d,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, v3.h bitop3:102 quad_perm:[0,1,2,3] +// GFX1250: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, v3.h bitop3:0x66 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0x0e,0xcc,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s index f48445f84aa31..40d27c8515d6a 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s @@ -2,6 +2,126 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_bitop3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, v255 bitop3:161 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v255 bitop3:0xa1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x34,0xd6,0xe9,0x04,0xfe,0x37,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, s105 bitop3:0x27 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, s105 bitop3:0x27 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x34,0xd6,0xe9,0x04,0xa6,0xe1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi bitop3:100 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi bitop3:0x64 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x34,0xd6,0xe9,0x04,0xae,0x89,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo bitop3:0 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 bitop3:0x15 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 bitop3:0x15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x34,0xd6,0xe9,0x04,0xee,0xa1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi bitop3:63 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi bitop3:0x3f dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x07,0x34,0xd6,0xe9,0x04,0xfe,0xe1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo bitop3:0x24 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo bitop3:0x24 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x34,0xd6,0xe9,0x04,0xfa,0x81,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, null bitop3:5 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, null bitop3:5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd6,0xe9,0x04,0xf2,0xa1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, -1 bitop3:6 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, -1 bitop3:6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd6,0xe9,0x04,0x06,0xc3,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 bitop3:77 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 bitop3:0x4d dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x01,0x34,0xd6,0xea,0x04,0xc2,0xab,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v255, v255, v255, src_scc bitop3:88 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_bitop3_b32_e64_dpp v255, v255, v255, src_scc bitop3:0x58 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x34,0xd6,0xe9,0xfe,0xf7,0x0b,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v255 bitop3:161 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v255 bitop3:0xa1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xfe,0x37,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, s105 bitop3:0x27 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, s105 bitop3:0x27 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xa6,0xe1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, vcc_hi bitop3:100 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, vcc_hi bitop3:0x64 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xae,0x89,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, vcc_lo bitop3:0 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, ttmp15 bitop3:15 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, ttmp15 bitop3:0xf dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x33,0xd6,0xe9,0x04,0xee,0xe1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:63 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:0x3f dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x07,0x33,0xd6,0xe9,0x04,0xfe,0xe1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:0x24 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:0x24 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xfa,0x81,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:5 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd6,0xe9,0x04,0xf2,0xa1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:6 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:6 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x33,0xd6,0xea,0x04,0x06,0xc3,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:77 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:0x4d dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x01,0x33,0xd6,0xe9,0xfe,0xf7,0xab,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x33,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:88 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:0x58 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7b,0x33,0xd6,0xe9,0x04,0xfe,0x09,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:99 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:0x63 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x33,0xd6,0xe9,0x04,0xfa,0x69,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x33,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:102 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:0x66 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x24,0x33,0xd6,0xe9,0x04,0x06,0xcb,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:103 op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX1250: v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:0x67 op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x44,0x33,0xd6,0xea,0xfe,0xf7,0xeb,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:102 op_sel:[1,1,1,1] dpp8:[0,0,0,0,0,0,0,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x66 op_sel:[1,1,1,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x7c,0x33,0xd6,0xe9,0x04,0x0e,0xcc,0x01,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s index d7a95f42aaecc..fb5593d9a5d93 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s @@ -2,6 +2,126 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s +v_bitop3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, v255 bitop3:161 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v255 bitop3:0xa1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x34,0xd6,0xe9,0x04,0xfe,0x37,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, s105 bitop3:0x27 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, s105 bitop3:0x27 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x34,0xd6,0xe9,0x04,0xa6,0xe1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi bitop3:100 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi bitop3:0x64 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x34,0xd6,0xe9,0x04,0xae,0x89,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo bitop3:0 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 bitop3:0x15 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 bitop3:0x15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x34,0xd6,0xe9,0x04,0xee,0xa1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi bitop3:63 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi bitop3:0x3f dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x07,0x34,0xd6,0xe9,0x04,0xfe,0xe1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo bitop3:0x24 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo bitop3:0x24 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x34,0xd6,0xe9,0x04,0xfa,0x81,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, null bitop3:5 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, null bitop3:5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd6,0xe9,0x04,0xf2,0xa1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, -1 bitop3:6 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, -1 bitop3:6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd6,0xe9,0x04,0x06,0xc3,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 bitop3:77 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 bitop3:0x4d dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x01,0x34,0xd6,0xea,0x04,0xc2,0xab,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b32_e64_dpp v255, v255, v255, src_scc bitop3:88 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_bitop3_b32_e64_dpp v255, v255, v255, src_scc bitop3:0x58 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x34,0xd6,0xe9,0xfe,0xf7,0x0b,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, v255 bitop3:161 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, v255 bitop3:0xa1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xfe,0x37,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, s105 bitop3:0x27 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, s105 bitop3:0x27 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xa6,0xe1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, vcc_hi bitop3:100 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, vcc_hi bitop3:0x64 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xae,0x89,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, vcc_lo bitop3:0 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, ttmp15 bitop3:15 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, ttmp15 bitop3:0xf dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x33,0xd6,0xe9,0x04,0xee,0xe1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:63 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:0x3f dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x07,0x33,0xd6,0xe9,0x04,0xfe,0xe1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:0x24 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:0x24 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xfa,0x81,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:5 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd6,0xe9,0x04,0xf2,0xa1,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:6 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:6 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x33,0xd6,0xea,0x04,0x06,0xc3,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:77 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:0x4d dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x01,0x33,0xd6,0xe9,0xfe,0xf7,0xab,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x33,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi bitop3:88 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi bitop3:0x58 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7b,0x33,0xd6,0xe9,0x04,0xfe,0x09,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.l, v1.h, v2.l, exec_lo bitop3:99 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5.l, v1.h, v2.l, exec_lo bitop3:0x63 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x33,0xd6,0xe9,0x04,0xfa,0x69,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.h, null dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x33,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 bitop3:102 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 bitop3:0x66 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x24,0x33,0xd6,0xe9,0x04,0x06,0xcb,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v255.h, v255.l, v255.l, src_scc bitop3:103 dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX1250: v_bitop3_b16_e64_dpp v255.h, v255.l, v255.l, src_scc bitop3:0x67 op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x44,0x33,0xd6,0xea,0xfe,0xf7,0xeb,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, v3.h bitop3:102 dpp8:[0,0,0,0,0,0,0,0] +// GFX1250: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, v3.h bitop3:0x66 op_sel:[1,1,1,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x7c,0x33,0xd6,0xe9,0x04,0x0e,0xcc,0x01,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt index c81d89df8c903..335e2ed34ed6e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt @@ -2,6 +2,126 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-REAL16 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s +0xff,0x44,0x33,0xd6,0xff,0xd6,0xf0,0xe9,0x0b,0xfe,0x00,0x00 +# GFX1250-REAL16: v_bitop3_b16 v255.h, 0xfe0b, vcc_hi, null bitop3:0x67 op_sel:[0,0,0,1] ; encoding: [0xff,0x44,0x33,0xd6,0xff,0xd6,0xf0,0xe9,0x0b,0xfe,0x00,0x00] +# GFX1250-FAKE16: v_bitop3_b16 v255, 0xfe0b, vcc_hi, null bitop3:0x67 op_sel:[0,0,0,1] ; encoding: [0xff,0x44,0x33,0xd6,0xff,0xd6,0xf0,0xe9,0x0b,0xfe,0x00,0x00] + +0x05,0x0c,0x33,0xd6,0xc1,0xfe,0xf4,0x6b +# GFX1250-REAL16: v_bitop3_b16 v5.l, -1, exec_hi, src_scc bitop3:0x63 op_sel:[1,0,0,0] ; encoding: [0x05,0x0c,0x33,0xd6,0xc1,0xfe,0xf4,0x6b] +# GFX1250-FAKE16: v_bitop3_b16 v5, -1, exec_hi, src_scc bitop3:0x63 op_sel:[1,0,0,0] ; encoding: [0x05,0x0c,0x33,0xd6,0xc1,0xfe,0xf4,0x6b] + +0x05,0x14,0x33,0xd6,0xff,0xfa,0xfc,0xab,0x00,0x38,0x00,0x00 +# GFX1250-REAL16: v_bitop3_b16 v5.l, 0x3800, m0, 0x3800 bitop3:0x65 op_sel:[0,1,0,0] ; encoding: [0x05,0x14,0x33,0xd6,0xff,0xfa,0xfc,0xab,0x00,0x38,0x00,0x00] +# GFX1250-FAKE16: v_bitop3_b16 v5, 0x3800, m0, 0x3800 bitop3:0x65 op_sel:[0,1,0,0] ; encoding: [0x05,0x14,0x33,0xd6,0xff,0xfa,0xfc,0xab,0x00,0x38,0x00,0x00] + +0x05,0x7e,0x33,0xd6,0x7f,0xf8,0xa8,0xe9 +# GFX1250-REAL16: v_bitop3_b16 v5.h, exec_hi, null, vcc_lo bitop3:0x77 op_sel:[1,1,1,1] ; encoding: [0x05,0x7e,0x33,0xd6,0x7f,0xf8,0xa8,0xe9] +# GFX1250-FAKE16: v_bitop3_b16 v5, exec_hi, null, vcc_lo bitop3:0x77 op_sel:[1,1,1,1] ; encoding: [0x05,0x7e,0x33,0xd6,0x7f,0xf8,0xa8,0xe9] + +0x05,0x78,0x33,0xd6,0x7f,0xf8,0xa8,0x01 +# GFX1250-REAL16: v_bitop3_b16 v5.h, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x33,0xd6,0x7f,0xf8,0xa8,0x01] +# GFX1250-FAKE16: v_bitop3_b16 v5, exec_hi, null, vcc_lo op_sel:[1,1,1,1] ; encoding: [0x05,0x78,0x33,0xd6,0x7f,0xf8,0xa8,0x01] + +0x05,0x00,0x33,0xd6,0x7e,0x82,0xad,0xc1 +# GFX1250-REAL16: v_bitop3_b16 v5.l, exec_lo, -1, vcc_hi bitop3:6 ; encoding: [0x05,0x00,0x33,0xd6,0x7e,0x82,0xad,0xc1] +# GFX1250-FAKE16: v_bitop3_b16 v5, exec_lo, -1, vcc_hi bitop3:6 ; encoding: [0x05,0x00,0x33,0xd6,0x7e,0x82,0xad,0xc1] + +0x05,0x00,0x33,0xd6,0x7d,0xfe,0xf5,0xa1,0x00,0x38,0x00,0x00 +# GFX1250-REAL16: v_bitop3_b16 v5.l, m0, 0x3800, m0 bitop3:5 ; encoding: [0x05,0x00,0x33,0xd6,0x7d,0xfe,0xf5,0xa1,0x00,0x38,0x00,0x00] +# GFX1250-FAKE16: v_bitop3_b16 v5, m0, 0x3800, m0 bitop3:5 ; encoding: [0x05,0x00,0x33,0xd6,0x7d,0xfe,0xf5,0xa1,0x00,0x38,0x00,0x00] + +0x05,0x01,0x33,0xd6,0x7c,0xfc,0xfc,0x13,0x0b,0xfe,0x00,0x00 +# GFX1250-REAL16: v_bitop3_b16 v5.l, null, exec_lo, 0xfe0b bitop3:0x88 ; encoding: [0x05,0x01,0x33,0xd6,0x7c,0xfc,0xfc,0x13,0x0b,0xfe,0x00,0x00] +# GFX1250-FAKE16: v_bitop3_b16 v5, null, exec_lo, 0xfe0b bitop3:0x88 ; encoding: [0x05,0x01,0x33,0xd6,0x7c,0xfc,0xfc,0x13,0x0b,0xfe,0x00,0x00] + +0x05,0x04,0x33,0xd6,0x01,0xfe,0xff,0x89 +# GFX1250-REAL16: v_bitop3_b16 v5.l, s1, v255.l, exec_hi bitop3:0x64 ; encoding: [0x05,0x04,0x33,0xd6,0x01,0xfe,0xff,0x89] +# GFX1250-FAKE16: v_bitop3_b16 v5, s1, v255, exec_hi bitop3:0x64 ; encoding: [0x05,0x04,0x33,0xd6,0x01,0xfe,0xff,0x89] + +0x05,0x00,0x33,0xd6,0x69,0xd2,0xf8,0x01 +# GFX1250-REAL16: v_bitop3_b16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x33,0xd6,0x69,0xd2,0xf8,0x01] +# GFX1250-FAKE16: v_bitop3_b16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x33,0xd6,0x69,0xd2,0xf8,0x01] + +0x05,0x24,0x33,0xd6,0xfd,0xd4,0x04,0xcb +# GFX1250-REAL16: v_bitop3_b16 v5.l, src_scc, vcc_lo, -1 bitop3:0x66 op_sel:[0,0,1,0] ; encoding: [0x05,0x24,0x33,0xd6,0xfd,0xd4,0x04,0xcb] +# GFX1250-FAKE16: v_bitop3_b16 v5, src_scc, vcc_lo, -1 bitop3:0x66 op_sel:[0,0,1,0] ; encoding: [0x05,0x24,0x33,0xd6,0xfd,0xd4,0x04,0xcb] + +0x05,0x04,0x33,0xd6,0x7b,0xfa,0xed,0x81 +# GFX1250-REAL16: v_bitop3_b16 v5.l, ttmp15, src_scc, ttmp15 bitop3:0x24 ; encoding: [0x05,0x04,0x33,0xd6,0x7b,0xfa,0xed,0x81] +# GFX1250-FAKE16: v_bitop3_b16 v5, ttmp15, src_scc, ttmp15 bitop3:0x24 ; encoding: [0x05,0x04,0x33,0xd6,0x7b,0xfa,0xed,0x81] + +0x05,0x00,0x33,0xd6,0x01,0x05,0x0e,0x00 +# GFX1250-REAL16: v_bitop3_b16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x33,0xd6,0x01,0x05,0x0e,0x00] +# GFX1250-FAKE16: v_bitop3_b16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x33,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x04,0x33,0xd6,0x01,0x05,0x0e,0x30 +# GFX1250-REAL16: v_bitop3_b16 v5.l, v1.l, v2.l, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd6,0x01,0x05,0x0e,0x30] +# GFX1250-FAKE16: v_bitop3_b16 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x33,0xd6,0x01,0x05,0x0e,0x30] + +0x05,0x04,0x33,0xd6,0xff,0x05,0xa4,0xe1 +# GFX1250-REAL16: v_bitop3_b16 v5.l, v255.l, s2, s105 bitop3:0x27 ; encoding: [0x05,0x04,0x33,0xd6,0xff,0x05,0xa4,0xe1] +# GFX1250-FAKE16: v_bitop3_b16 v5, v255, s2, s105 bitop3:0x27 ; encoding: [0x05,0x04,0x33,0xd6,0xff,0x05,0xa4,0xe1] + +0x05,0x07,0x33,0xd6,0x6b,0xfe,0xfd,0xe7,0x0b,0xfe,0x00,0x00 +# GFX1250-REAL16: v_bitop3_b16 v5.l, vcc_hi, 0xfe0b, v255.l bitop3:0x3f ; encoding: [0x05,0x07,0x33,0xd6,0x6b,0xfe,0xfd,0xe7,0x0b,0xfe,0x00,0x00] +# GFX1250-FAKE16: v_bitop3_b16 v5, vcc_hi, 0xfe0b, v255 bitop3:0x3f ; encoding: [0x05,0x07,0x33,0xd6,0x6b,0xfe,0xfd,0xe7,0x0b,0xfe,0x00,0x00] + +0x05,0x02,0x33,0xd6,0x6a,0xf6,0x0c,0xa4 +# GFX1250-REAL16: v_bitop3_b16 v5.l, vcc_lo, ttmp15, v3.l bitop3:0x15 ; encoding: [0x05,0x02,0x33,0xd6,0x6a,0xf6,0x0c,0xa4] +# GFX1250-FAKE16: v_bitop3_b16 v5, vcc_lo, ttmp15, v3 bitop3:0x15 ; encoding: [0x05,0x02,0x33,0xd6,0x6a,0xf6,0x0c,0xa4] + +0x01,0x7c,0x33,0xd6,0x02,0x07,0x12,0xec +# GFX1250-REAL16: v_bitop3_b16 v1.h, v2.h, v3.h, v4.h bitop3:0x67 op_sel:[1,1,1,1] ; encoding: [0x01,0x7c,0x33,0xd6,0x02,0x07,0x12,0xec] +# GFX1250-FAKE16: v_bitop3_b16 v1, v2, v3, v4 bitop3:0x67 op_sel:[1,1,1,1] ; encoding: [0x01,0x7c,0x33,0xd6,0x02,0x07,0x12,0xec] + +0xff,0x04,0x34,0xd6,0xff,0xd6,0xf0,0xe9,0x56,0x34,0x12,0xaf +# GFX1250: v_bitop3_b32 v255, 0xaf123456, vcc_hi, null bitop3:0x67 ; encoding: [0xff,0x04,0x34,0xd6,0xff,0xd6,0xf0,0xe9,0x56,0x34,0x12,0xaf] + +0x05,0x04,0x34,0xd6,0xc1,0xfe,0xf4,0x6b +# GFX1250: v_bitop3_b32 v5, -1, exec_hi, src_scc bitop3:0x63 ; encoding: [0x05,0x04,0x34,0xd6,0xc1,0xfe,0xf4,0x6b] + +0x05,0x04,0x34,0xd6,0xf0,0xfa,0xc0,0xab +# GFX1250: v_bitop3_b32 v5, 0.5, m0, 0.5 bitop3:0x65 ; encoding: [0x05,0x04,0x34,0xd6,0xf0,0xfa,0xc0,0xab] + +0x05,0x01,0x34,0xd6,0x7f,0xf8,0xa8,0xa9 +# GFX1250: v_bitop3_b32 v5, exec_hi, null, vcc_lo bitop3:0x4d ; encoding: [0x05,0x01,0x34,0xd6,0x7f,0xf8,0xa8,0xa9] + +0x05,0x00,0x34,0xd6,0x7e,0x82,0xad,0xc1 +# GFX1250: v_bitop3_b32 v5, exec_lo, -1, vcc_hi bitop3:6 ; encoding: [0x05,0x00,0x34,0xd6,0x7e,0x82,0xad,0xc1] + +0x05,0x00,0x34,0xd6,0x7d,0xe0,0xf5,0xa1 +# GFX1250: v_bitop3_b32 v5, m0, 0.5, m0 bitop3:5 ; encoding: [0x05,0x00,0x34,0xd6,0x7d,0xe0,0xf5,0xa1] + +0x05,0x03,0x34,0xd6,0x7c,0xfc,0xfc,0x0b,0x56,0x34,0x12,0xaf +# GFX1250: v_bitop3_b32 v5, null, exec_lo, 0xaf123456 bitop3:0x58 ; encoding: [0x05,0x03,0x34,0xd6,0x7c,0xfc,0xfc,0x0b,0x56,0x34,0x12,0xaf] + +0x05,0x04,0x34,0xd6,0x01,0xfe,0xff,0x89 +# GFX1250: v_bitop3_b32 v5, s1, v255, exec_hi bitop3:0x64 ; encoding: [0x05,0x04,0x34,0xd6,0x01,0xfe,0xff,0x89] + +0x05,0x00,0x34,0xd6,0x69,0xd2,0xf8,0x01 +# GFX1250: v_bitop3_b32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x34,0xd6,0x69,0xd2,0xf8,0x01] + +0x05,0x04,0x34,0xd6,0xfd,0xd4,0x04,0xcb +# GFX1250: v_bitop3_b32 v5, src_scc, vcc_lo, -1 bitop3:0x66 ; encoding: [0x05,0x04,0x34,0xd6,0xfd,0xd4,0x04,0xcb] + +0x05,0x04,0x34,0xd6,0x7b,0xfa,0xed,0x81 +# GFX1250: v_bitop3_b32 v5, ttmp15, src_scc, ttmp15 bitop3:0x24 ; encoding: [0x05,0x04,0x34,0xd6,0x7b,0xfa,0xed,0x81] + +0x05,0x00,0x34,0xd6,0x01,0x05,0x0e,0x00 +# GFX1250: v_bitop3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x34,0xd6,0x01,0x05,0x0e,0x00] + +0x05,0x04,0x34,0xd6,0x01,0x05,0x0e,0x30 +# GFX1250: v_bitop3_b32 v5, v1, v2, s3 bitop3:0xa1 ; encoding: [0x05,0x04,0x34,0xd6,0x01,0x05,0x0e,0x30] + +0x05,0x04,0x34,0xd6,0xff,0x05,0xa4,0xe1 +# GFX1250: v_bitop3_b32 v5, v255, s2, s105 bitop3:0x27 ; encoding: [0x05,0x04,0x34,0xd6,0xff,0x05,0xa4,0xe1] + +0x05,0x07,0x34,0xd6,0x6b,0xfe,0xfd,0xe7,0x56,0x34,0x12,0xaf +# GFX1250: v_bitop3_b32 v5, vcc_hi, 0xaf123456, v255 bitop3:0x3f ; encoding: [0x05,0x07,0x34,0xd6,0x6b,0xfe,0xfd,0xe7,0x56,0x34,0x12,0xaf] + +0x05,0x02,0x34,0xd6,0x6a,0xf6,0x0c,0xa4 +# GFX1250: v_bitop3_b32 v5, vcc_lo, ttmp15, v3 bitop3:0x15 ; encoding: [0x05,0x02,0x34,0xd6,0x6a,0xf6,0x0c,0xa4] + 0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04 # GFX1250: v_lshl_add_u64 v[2:3], s[4:5], v7, v[8:9] ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0e,0x22,0x04] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt index dec73b74afc8d..f8d9afe00e4d8 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt @@ -2,6 +2,136 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s +0xff,0x04,0x33,0xd6,0xfa,0xfe,0xf7,0xab,0xff,0x6f,0x0d,0x30 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v255.l, v255.l, v255.l, src_scc bitop3:0x65 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x04,0x33,0xd6,0xfa,0xfe,0xf7,0xab,0xff,0x6f,0x0d,0x30] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:0x65 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x04,0x33,0xd6,0xfa,0xfe,0xf7,0xab,0xff,0x6f,0x0d,0x30] + +0xff,0x45,0x33,0xd6,0xfa,0xfe,0xf7,0x0b,0xff,0x6f,0x0d,0x30 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v255.h, v255.l, v255.l, src_scc bitop3:0x68 op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x45,0x33,0xd6,0xfa,0xfe,0xf7,0x0b,0xff,0x6f,0x0d,0x30] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:0x68 op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x45,0x33,0xd6,0xfa,0xfe,0xf7,0x0b,0xff,0x6f,0x0d,0x30] + +0x05,0x04,0x33,0xd6,0xfa,0x04,0x06,0x6b,0x01,0x60,0x09,0x13 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 bitop3:0x63 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x06,0x6b,0x01,0x60,0x09,0x13] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:0x63 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x06,0x6b,0x01,0x60,0x09,0x13] + +0x05,0x25,0x33,0xd6,0xfa,0x04,0x06,0x0b,0x01,0x60,0x01,0x13 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 bitop3:0x68 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x25,0x33,0xd6,0xfa,0x04,0x06,0x0b,0x01,0x60,0x01,0x13] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:0x68 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x25,0x33,0xd6,0xfa,0x04,0x06,0x0b,0x01,0x60,0x01,0x13] + +0x05,0x7c,0x33,0xd6,0xfa,0x04,0xfe,0xc9,0x01,0x2f,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi bitop3:0x66 op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0xfe,0xc9,0x01,0x2f,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:0x66 op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0xfe,0xc9,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0xc1,0x01,0x2f,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_hi bitop3:6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0xc1,0x01,0x2f,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0xc1,0x01,0x2f,0x01,0xff] + +0x05,0x78,0x33,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x33,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x33,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x01,0x33,0xd6,0xfa,0x04,0xfa,0xa9,0x01,0x50,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_lo bitop3:0x4d row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x33,0xd6,0xfa,0x04,0xfa,0xa9,0x01,0x50,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:0x4d row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x33,0xd6,0xfa,0x04,0xfa,0xa9,0x01,0x50,0x01,0xff] + +0x05,0x0c,0x33,0xd6,0xfa,0x04,0xfa,0xe9,0x01,0x50,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.h, v2.l, exec_lo bitop3:0x67 op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x33,0xd6,0xfa,0x04,0xfa,0xe9,0x01,0x50,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:0x67 op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x33,0xd6,0xfa,0x04,0xfa,0xe9,0x01,0x50,0x01,0xff] + +0x05,0x00,0x33,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x03,0x33,0xd6,0xfa,0x04,0xf2,0x09,0x01,0x5f,0x01,0x01 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, null bitop3:0x58 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x03,0x33,0xd6,0xfa,0x04,0xf2,0x09,0x01,0x5f,0x01,0x01] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:0x58 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x03,0x33,0xd6,0xfa,0x04,0xf2,0x09,0x01,0x5f,0x01,0x01] + +0x05,0x15,0x33,0xd6,0xfa,0x04,0xf2,0x09,0x01,0x5f,0x01,0x01 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.h, null bitop3:0x68 op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x33,0xd6,0xfa,0x04,0xf2,0x09,0x01,0x5f,0x01,0x01] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:0x68 op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x33,0xd6,0xfa,0x04,0xf2,0x09,0x01,0x5f,0x01,0x01] + +0x05,0x02,0x33,0xd6,0xfa,0x04,0xa6,0xc1,0x01,0x0f,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, s105 bitop3:0x16 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x33,0xd6,0xfa,0x04,0xa6,0xc1,0x01,0x0f,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, s105 bitop3:0x16 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x33,0xd6,0xfa,0x04,0xa6,0xc1,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x33,0xd6,0xfa,0x04,0xee,0xa1,0x01,0x21,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, ttmp15 bitop3:5 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xee,0xa1,0x01,0x21,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, ttmp15 bitop3:5 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xee,0xa1,0x01,0x21,0x01,0xff] + +0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0xe4,0x01,0x40,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l bitop3:0x27 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0xe4,0x01,0x40,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x27 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0xe4,0x01,0x40,0x01,0xff] + +0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x8c,0x01,0x41,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l bitop3:0x64 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x8c,0x01,0x41,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x64 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x8c,0x01,0x41,0x01,0xff] + +0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x34,0x01,0xe4,0x00,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l bitop3:0xa1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x34,0x01,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0xa1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x34,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x07,0x33,0xd6,0xfa,0x04,0xae,0xe1,0x01,0x11,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi bitop3:0x3f row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x07,0x33,0xd6,0xfa,0x04,0xae,0xe1,0x01,0x11,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, vcc_hi bitop3:0x3f row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x07,0x33,0xd6,0xfa,0x04,0xae,0xe1,0x01,0x11,0x01,0xff] + +0x05,0x04,0x33,0xd6,0xfa,0x04,0xaa,0x81,0x01,0x1f,0x01,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo bitop3:0x24 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0xaa,0x81,0x01,0x1f,0x01,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, vcc_lo bitop3:0x24 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0xaa,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x7c,0x33,0xd6,0xfa,0x04,0x0e,0xcc,0x01,0xe4,0x00,0xff +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, v3.h bitop3:0x66 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0x0e,0xcc,0x01,0xe4,0x00,0xff] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x66 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0x0e,0xcc,0x01,0xe4,0x00,0xff] + +0xff,0x04,0x34,0xd6,0xfa,0xfe,0xf7,0xab,0xff,0x6f,0x0d,0x30 +# GFX1250: v_bitop3_b32_e64_dpp v255, v255, v255, src_scc bitop3:0x65 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x04,0x34,0xd6,0xfa,0xfe,0xf7,0xab,0xff,0x6f,0x0d,0x30] + +0x05,0x03,0x34,0xd6,0xfa,0x04,0x06,0x0b,0x01,0x5f,0x01,0x01 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, -1 bitop3:0x58 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x03,0x34,0xd6,0xfa,0x04,0x06,0x0b,0x01,0x5f,0x01,0x01] + +0x05,0x04,0x34,0xd6,0xfa,0x04,0xc2,0x6b,0x01,0x60,0x09,0x13 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 bitop3:0x63 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0xc2,0x6b,0x01,0x60,0x09,0x13] + +0x05,0x00,0x34,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi bitop3:5 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] + +0x05,0x00,0x34,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo bitop3:6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] + +0x05,0x01,0x34,0xd6,0xfa,0x04,0xf2,0xa9,0x01,0x50,0x01,0xff +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, null bitop3:0x4d row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x34,0xd6,0xfa,0x04,0xf2,0xa9,0x01,0x50,0x01,0xff] + +0x05,0x00,0x34,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x04,0x34,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 bitop3:0x24 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x04,0x34,0xd6,0xfa,0x04,0xfe,0x8f,0x01,0x41,0x01,0xff +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v255 bitop3:0x64 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0xfe,0x8f,0x01,0x41,0x01,0xff] + +0x05,0x04,0x34,0xd6,0xfa,0x04,0x0e,0xe4,0x01,0x40,0x01,0xff +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0x27 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0x0e,0xe4,0x01,0x40,0x01,0xff] + +0x05,0x04,0x34,0xd6,0xfa,0x04,0x0e,0x34,0x01,0xe4,0x00,0xff +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0xa1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0x0e,0x34,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x02,0x34,0xd6,0xfa,0x04,0xae,0xa1,0x01,0x0f,0x01,0xff +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi bitop3:0x15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x34,0xd6,0xfa,0x04,0xae,0xa1,0x01,0x0f,0x01,0xff] + +0x05,0x07,0x34,0xd6,0xfa,0x04,0xaa,0xe1,0x01,0x11,0x01,0xff +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo bitop3:0x3f row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x07,0x34,0xd6,0xfa,0x04,0xaa,0xe1,0x01,0x11,0x01,0xff] + 0xff,0x81,0x6d,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30 # GFX1250: v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x6d,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt index db211f9061dca..44726a1de3762 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt @@ -2,6 +2,114 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s +0xff,0x01,0x33,0xd6,0xe9,0xfe,0xf7,0xab,0xff,0x00,0x00,0x00 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v255.l, v255.l, v255.l, src_scc bitop3:0x4d dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x01,0x33,0xd6,0xe9,0xfe,0xf7,0xab,0xff,0x00,0x00,0x00] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:0x4d dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x01,0x33,0xd6,0xe9,0xfe,0xf7,0xab,0xff,0x00,0x00,0x00] + +0xff,0x44,0x33,0xd6,0xea,0xfe,0xf7,0xeb,0xff,0x00,0x00,0x00 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v255.h, v255.l, v255.l, src_scc bitop3:0x67 op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x44,0x33,0xd6,0xea,0xfe,0xf7,0xeb,0xff,0x00,0x00,0x00] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v255, v255, v255, src_scc bitop3:0x67 op_sel:[0,0,0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x44,0x33,0xd6,0xea,0xfe,0xf7,0xeb,0xff,0x00,0x00,0x00] + +0x05,0x24,0x33,0xd6,0xe9,0x04,0x06,0xcb,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 bitop3:0x66 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x24,0x33,0xd6,0xe9,0x04,0x06,0xcb,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:0x66 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x24,0x33,0xd6,0xe9,0x04,0x06,0xcb,0x01,0x77,0x39,0x05] + +0x05,0x00,0x33,0xd6,0xea,0x04,0x06,0xc3,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 bitop3:6 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x33,0xd6,0xea,0x04,0x06,0xc3,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, -1 bitop3:6 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x33,0xd6,0xea,0x04,0x06,0xc3,0x01,0x77,0x39,0x05] + +0x05,0x07,0x33,0xd6,0xe9,0x04,0xfe,0xe1,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_hi bitop3:0x3f dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x07,0x33,0xd6,0xe9,0x04,0xfe,0xe1,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:0x3f dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x07,0x33,0xd6,0xe9,0x04,0xfe,0xe1,0x01,0x77,0x39,0x05] + +0x05,0x7b,0x33,0xd6,0xe9,0x04,0xfe,0x09,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi bitop3:0x58 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7b,0x33,0xd6,0xe9,0x04,0xfe,0x09,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi bitop3:0x58 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7b,0x33,0xd6,0xe9,0x04,0xfe,0x09,0x01,0x77,0x39,0x05] + +0x05,0x78,0x33,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x33,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x33,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] + +0x05,0x04,0x33,0xd6,0xe9,0x04,0xfa,0x81,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_lo bitop3:0x24 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xfa,0x81,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:0x24 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xfa,0x81,0x01,0x77,0x39,0x05] + +0x05,0x0c,0x33,0xd6,0xe9,0x04,0xfa,0x69,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.h, v2.l, exec_lo bitop3:0x63 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x33,0xd6,0xe9,0x04,0xfa,0x69,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, exec_lo bitop3:0x63 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x33,0xd6,0xe9,0x04,0xfa,0x69,0x01,0x77,0x39,0x05] + +0x05,0x00,0x33,0xd6,0xe9,0x04,0xf2,0xa1,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, null bitop3:5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd6,0xe9,0x04,0xf2,0xa1,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, null bitop3:5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd6,0xe9,0x04,0xf2,0xa1,0x01,0x77,0x39,0x05] + +0x05,0x10,0x33,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x33,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x33,0xd6,0xe9,0x04,0xf2,0x01,0x01,0x77,0x39,0x05] + +0x05,0x04,0x33,0xd6,0xe9,0x04,0xa6,0xe1,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, s105 bitop3:0x27 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xa6,0xe1,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, s105 bitop3:0x27 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xa6,0xe1,0x01,0x77,0x39,0x05] + +0x05,0x01,0x33,0xd6,0xe9,0x04,0xee,0xe1,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, ttmp15 bitop3:0xf dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x33,0xd6,0xe9,0x04,0xee,0xe1,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, ttmp15 bitop3:0xf dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x33,0xd6,0xe9,0x04,0xee,0xe1,0x01,0x77,0x39,0x05] + +0x05,0x04,0x33,0xd6,0xe9,0x04,0xfe,0x37,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v255.l bitop3:0xa1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xfe,0x37,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, v255 bitop3:0xa1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xfe,0x37,0x01,0x77,0x39,0x05] + +0x05,0x00,0x33,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x04,0x33,0xd6,0xe9,0x04,0xae,0x89,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi bitop3:0x64 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xae,0x89,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, vcc_hi bitop3:0x64 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x33,0xd6,0xe9,0x04,0xae,0x89,0x01,0x77,0x39,0x05] + +0x05,0x00,0x33,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +0x05,0x7c,0x33,0xd6,0xe9,0x04,0x0e,0xcc,0x01,0x00,0x00,0x00 +# GFX1250-REAL16: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, v3.h bitop3:0x66 op_sel:[1,1,1,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x7c,0x33,0xd6,0xe9,0x04,0x0e,0xcc,0x01,0x00,0x00,0x00] +# GFX1250-FAKE16: v_bitop3_b16_e64_dpp v5, v1, v2, v3 bitop3:0x66 op_sel:[1,1,1,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x7c,0x33,0xd6,0xe9,0x04,0x0e,0xcc,0x01,0x00,0x00,0x00] + +0xff,0x03,0x34,0xd6,0xe9,0xfe,0xf7,0x0b,0xff,0x00,0x00,0x00 +# GFX1250: v_bitop3_b32_e64_dpp v255, v255, v255, src_scc bitop3:0x58 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x34,0xd6,0xe9,0xfe,0xf7,0x0b,0xff,0x00,0x00,0x00] + +0x05,0x00,0x34,0xd6,0xe9,0x04,0x06,0xc3,0x01,0x77,0x39,0x05 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, -1 bitop3:6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd6,0xe9,0x04,0x06,0xc3,0x01,0x77,0x39,0x05] + +0x05,0x01,0x34,0xd6,0xea,0x04,0xc2,0xab,0x01,0x77,0x39,0x05 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 bitop3:0x4d dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x01,0x34,0xd6,0xea,0x04,0xc2,0xab,0x01,0x77,0x39,0x05] + +0x05,0x07,0x34,0xd6,0xe9,0x04,0xfe,0xe1,0x01,0x77,0x39,0x05 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi bitop3:0x3f dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x07,0x34,0xd6,0xe9,0x04,0xfe,0xe1,0x01,0x77,0x39,0x05] + +0x05,0x04,0x34,0xd6,0xe9,0x04,0xfa,0x81,0x01,0x77,0x39,0x05 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo bitop3:0x24 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x34,0xd6,0xe9,0x04,0xfa,0x81,0x01,0x77,0x39,0x05] + +0x05,0x00,0x34,0xd6,0xe9,0x04,0xf2,0xa1,0x01,0x77,0x39,0x05 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, null bitop3:5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd6,0xe9,0x04,0xf2,0xa1,0x01,0x77,0x39,0x05] + +0x05,0x04,0x34,0xd6,0xe9,0x04,0xa6,0xe1,0x01,0x77,0x39,0x05 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, s105 bitop3:0x27 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x34,0xd6,0xe9,0x04,0xa6,0xe1,0x01,0x77,0x39,0x05] + +0x05,0x02,0x34,0xd6,0xe9,0x04,0xee,0xa1,0x01,0x77,0x39,0x05 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 bitop3:0x15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x34,0xd6,0xe9,0x04,0xee,0xa1,0x01,0x77,0x39,0x05] + +0x05,0x04,0x34,0xd6,0xe9,0x04,0xfe,0x37,0x01,0x77,0x39,0x05 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v255 bitop3:0xa1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x34,0xd6,0xe9,0x04,0xfe,0x37,0x01,0x77,0x39,0x05] + +0x05,0x00,0x34,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x04,0x34,0xd6,0xe9,0x04,0xae,0x89,0x01,0x77,0x39,0x05 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi bitop3:0x64 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x34,0xd6,0xe9,0x04,0xae,0x89,0x01,0x77,0x39,0x05] + +0x05,0x00,0x34,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 +# GFX1250: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + 0xff,0x81,0x6d,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00 # GFX1250: v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x6d,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00]