diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 087ca1f954464..42a1ffb8a26d4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2149,6 +2149,8 @@ class getAsmVOP3P { @@ -2733,6 +2736,7 @@ def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>; def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; +def VOP_I32_I32_I32_I16 : VOPProfile <[i32, i32, i32, i16]>; def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index cdc3214924961..bc93f723490c7 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1661,8 +1661,8 @@ multiclass VOP3Only_Realtriple_gfx11_gfx12 op> : VOP3Only_Realtriple, VOP3Only_Realtriple; multiclass VOP3Only_Realtriple_t16_gfx11_gfx12 op, string asmName, string OpName = NAME> : - VOP3Only_Realtriple_t16, - VOP3Only_Realtriple_t16; + VOP3_Realtriple_t16_gfx11, + VOP3_Realtriple_t16_gfx12; multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12 op, string asmName, string OpName = NAME> { defm OpName#"_t16": VOP3Only_Realtriple_t16_gfx11_gfx12; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 78ca7a2f258cb..34ecdb56e8689 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -569,16 +569,10 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile, getAsmVOP3OpSel<3, HasClamp, HasOMod, HasSrc0FloatMods, HasSrc1FloatMods, HasSrc2FloatMods>.ret); - let AsmVOP3DPP16 = !subst(", $src2_modifiers", "", - getAsmVOP3DPP16.ret>.ret); - let AsmVOP3DPP8 = !subst(", $src2_modifiers", "", - getAsmVOP3DPP8.ret>.ret); + let AsmVOP3Base = !subst(", $src2_modifiers", "", + getAsmVOP3Base.ret); } class VOP3_CVT_SR_F8_ByteSel_Profile : @@ -636,8 +630,8 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile, AMDGPUfmaximum3>; } // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 -defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile>; -defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile>; +defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>; +defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>; defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile>; defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile>; @@ -752,6 +746,8 @@ def : GCNPat<(DivergentBinFrag (or_oneuse i64:$src0, i64:$src1), i64:$src2), (i32 (EXTRACT_SUBREG $src1, sub1)), (i32 (EXTRACT_SUBREG $src2, sub1))), sub1)>; +} // End SubtargetPredicate = isGFX9Plus + // FIXME: Probably should hardcode clamp bit in pseudo and avoid this. class OpSelBinOpClampPat : GCNPat< @@ -760,9 +756,14 @@ class OpSelBinOpClampPat; -def : OpSelBinOpClampPat; -def : OpSelBinOpClampPat; -} // End SubtargetPredicate = isGFX9Plus +let SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts in { + def : OpSelBinOpClampPat; + def : OpSelBinOpClampPat; +} // End SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts +let True16Predicate = UseFakeTrue16Insts in { + def : OpSelBinOpClampPat; + def : OpSelBinOpClampPat; +} // End True16Predicate = UseFakeTrue16Insts multiclass IMAD32_Pats { def : GCNPat < @@ -871,21 +872,31 @@ let SubtargetPredicate = isGFX10Plus in { def : PermlanePat; } - defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile, add>; - defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile, sub>; - - def : OpSelBinOpClampPat; - def : OpSelBinOpClampPat; - - // Undo sub x, c -> add x, -c canonicalization since c is more likely - // an inline immediate than -c. - def : GCNPat< - (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), - (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0) - >; + defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>; + defm V_SUB_NC_U16 : VOP3Inst_t16 <"v_sub_nc_u16", VOP_I16_I16_I16, sub>; } // End SubtargetPredicate = isGFX10Plus +let True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus in { + def : OpSelBinOpClampPat; + def : OpSelBinOpClampPat; + // Undo sub x, c -> add x, -c canonicalization since c is more likely + // an inline immediate than -c. + def : GCNPat< + (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), + (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0) + >; +} // End True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus + +let True16Predicate = UseFakeTrue16Insts in { + def : OpSelBinOpClampPat; + def : OpSelBinOpClampPat; + def : GCNPat< + (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), + (V_SUB_NC_U16_fake16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0) + >; +} // End True16Predicate = UseFakeTrue16Insts + let SubtargetPredicate = isGFX12Plus in { let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { defm V_PERMLANE16_VAR_B32 : VOP3Inst<"v_permlane16_var_b32", VOP3_PERMLANE_VAR_Profile>; @@ -1104,6 +1115,17 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12 op, string opName, multiclass VOP3Dot_Realtriple_gfx11_gfx12 op> : VOP3Dot_Realtriple, VOP3Dot_Realtriple; +multiclass VOP3_Realtriple_t16_gfx11_gfx12 op, string asmName, string opName = NAME, + string pseudo_mnemonic = "", bit isSingle = 0> : + VOP3_Realtriple_with_name, + VOP3_Realtriple_with_name; + +multiclass VOP3_Realtriple_t16_and_fake16_gfx11_gfx12 op, string asmName, string opName = NAME, + string pseudo_mnemonic = "", bit isSingle = 0> { + defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12; + defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12; +} + multiclass VOP3be_Real_gfx11_gfx12 op, string opName, string asmName> : VOP3be_Real, VOP3be_Real; @@ -1189,8 +1211,8 @@ defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", " defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">; defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">; -defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x303>; -defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x304>; +defm V_ADD_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x303, "v_add_nc_u16">; +defm V_SUB_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x304, "v_sub_nc_u16">; defm V_MUL_LO_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x305, "v_mul_lo_u16">; defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11_gfx12<0x306>; defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11_gfx12<0x307>; @@ -1198,8 +1220,8 @@ defm V_MAX_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30 defm V_MAX_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30a, "v_max_i16">; defm V_MIN_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30b, "v_min_u16">; defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">; -defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">; -defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">; +defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">; +defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">; defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>; defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >; defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 05a7d907d237a..aab5dc7465d93 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -111,7 +111,7 @@ class VOP3_Pseudo pattern = [], bit HasFP8DstByteSel = P.HasFP8DstByteSel; - let AsmOperands = !if(isVop3OpSel, + let AsmOperands = !if(!and(!not(P.IsTrue16), isVop3OpSel), P.AsmVOP3OpSel, !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64)); @@ -178,6 +178,7 @@ class VOP3_Real : Enc64 { let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); } +// To avoid having different version of every type of operand depending on if +// they are part of a True16 instruction or not, the operand encoding should be +// the same for SGPR, imm, and VGPR_32 whether the instruction is True16 or not. +class VOP3a_t16 : Enc64 { + bits<11> vdst; + bits<4> src0_modifiers; + bits<11> src0; + bits<3> src1_modifiers; + bits<11> src1; + bits<3> src2_modifiers; + bits<11> src2; + bits<1> clamp; + bits<2> omod; + + let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); + let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); + let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); + // 16-bit select fields which can be interpreted as OpSel or hi/lo suffix + let Inst{11} = !if(P.HasSrc0Mods, src0_modifiers{2}, 0); + let Inst{12} = !if(P.HasSrc1Mods, src1_modifiers{2}, 0); + let Inst{13} = !if(P.HasSrc2Mods, src2_modifiers{2}, 0); + let Inst{14} = !if(!and(P.HasDst, P.HasSrc0Mods), src0_modifiers{3}, 0); + let Inst{15} = !if(P.HasClamp, clamp{0}, 0); + + let Inst{31-26} = 0x35; + let Inst{40-32} = !if(P.HasSrc0, src0{8-0}, 0); + let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0); + let Inst{58-50} = !if(P.HasSrc2, src2{8-0}, 0); + let Inst{60-59} = !if(P.HasOMod, omod, 0); + let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); + let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); + let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); +} + class VOP3a_gfx6_gfx7 op, VOPProfile p> : VOP3a

{ let Inst{11} = !if(p.HasClamp, clamp{0}, 0); let Inst{25-17} = op; @@ -272,6 +308,10 @@ class VOP3e_gfx10 op, VOPProfile p> : VOP3a_gfx10 { class VOP3e_gfx11_gfx12 op, VOPProfile p> : VOP3e_gfx10; +class VOP3e_t16_gfx11_gfx12 op, VOPProfile p> : VOP3a_t16

{ + let Inst{25-16} = op; +} + class VOP3e_vi op, VOPProfile P> : VOP3a_vi { bits<8> vdst; let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0); @@ -736,7 +776,12 @@ class VOP3_DPPe_Fields : VOP3_DPPe_Fields_Base { bits<8> src0; } +class VOP3_DPPe_Fields_t16 : VOP3_DPPe_Fields_Base { + bits<11> src0; +} + // Common refers to common between DPP and DPP8 +// Base refers to a shared base between T16 and regular instructions class VOP3_DPPe_Common_Base op, VOPProfile P> : Enc96 { bits<4> src0_modifiers; bits<3> src1_modifiers; @@ -748,7 +793,7 @@ class VOP3_DPPe_Common_Base op, VOPProfile P> : Enc96 { let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); - // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs. + // 16-bit select fields which can be interpreted as OpSel or hi/lo suffix let Inst{11} = !if(P.HasOpSel, !if(P.HasSrc0Mods, src0_modifiers{2}, 0), !if(P.IsFP8SrcByteSel, byte_sel{1}, ?)); let Inst{12} = !if(P.HasOpSel, !if(P.HasSrc1Mods, src1_modifiers{2}, 0), @@ -777,6 +822,16 @@ class VOP3_DPPe_Common op, VOPProfile P> : VOP3_DPPe_Common_Base let Inst{58-50} = !if(P.HasSrc2, src2, 0); } +class VOP3_DPPe_Common_t16 op, VOPProfile P> : VOP3_DPPe_Common_Base { + bits<11> vdst; + bits<11> src1; + bits<11> src2; + + let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0); + let Inst{58-50} = !if(P.HasSrc2, src2{8-0}, 0); +} + class VOP3P_DPPe_Common_Base op, VOPProfile P> : Enc96 { bits<4> src0_modifiers; bits<4> src1_modifiers; @@ -786,6 +841,7 @@ class VOP3P_DPPe_Common_Base op, VOPProfile P> : Enc96 { let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0 let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1 let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2 + // OPSEL must be set such that the low result only uses low inputs, and the high result only uses high inputs. let Inst{11} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{2}, 0); // op_sel(0) let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1) let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2) @@ -810,6 +866,16 @@ class VOP3P_DPPe_Common op, VOPProfile P> : VOP3P_DPPe_Common_Base op, VOPProfile P> : VOP3P_DPPe_Common_Base { + bits<11> vdst; + bits<11> src1; + bits<11> src2; + + let Inst{7-0} = vdst{7-0}; + let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0); + let Inst{58-50} = !if(P.HasSrc2, src2{8-0}, 0); +} + class VOP_DPP_Pseudo pattern=[], dag Ins = P.InsDPP, string asmOps = P.AsmDPP> : VOP_Pseudo { @@ -870,6 +936,7 @@ class VOP_DPP_Real : // Copy relevant pseudo op flags let isConvergent = ps.isConvergent; let SubtargetPredicate = ps.SubtargetPredicate; + let True16Predicate = ps.True16Predicate; let AssemblerPredicate = ps.AssemblerPredicate; let OtherPredicates = ps.OtherPredicates; let AsmMatchConverter = ps.AsmMatchConverter; @@ -928,11 +995,29 @@ class VOP3_DPP_Base op, VOPProfile P, bit IsDPP16> : + VOP3_DPPe_Common, + VOP3_DPPe_Fields { + + let Inst{40-32} = 0xfa; + let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{80-72} = dpp_ctrl; + let Inst{82} = !if(IsDPP16, fi, ?); + let Inst{83} = bound_ctrl; + + // Inst{87-84} ignored by hw + let Inst{91-88} = bank_mask; + let Inst{95-92} = row_mask; +} + class VOP3_DPP op, string OpName, VOPProfile P, bit IsDPP16, dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP), string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> : - VOP3_DPP_Base, VOP3_DPPe_Common, - VOP3_DPPe_Fields { + VOP3_DPP_Base, VOP3_DPP_Enc; + +class VOP3_DPP_Enc_t16 op, VOPProfile P, bit IsDPP16 > + : VOP3_DPPe_Common_t16, + VOP3_DPPe_Fields_t16 { let Inst{40-32} = 0xfa; let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); @@ -945,6 +1030,13 @@ class VOP3_DPP op, string OpName, VOPProfile P, bit IsDPP16, let Inst{95-92} = row_mask; } +class VOP3_DPP_t16 op, string OpName, VOPProfile P, bit IsDPP16, + dag InsDPP = !if (IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP), + string AsmDPP = !if (IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> + : VOP3_DPP_Base, + VOP3_DPP_Enc_t16 { +} + class VOP3P_DPP op, string OpName, VOPProfile P, bit IsDPP16, dag InsDPP = !if(IsDPP16, P.InsVOP3DPP16, P.InsVOP3DPP), string AsmDPP = !if(IsDPP16, P.AsmVOP3DPP16, P.AsmVOP3DPP)> : @@ -979,6 +1071,12 @@ class VOP3_DPP8e_Fields { bits<9> fi; } +class VOP3_DPP8e_Fields_t16 { + bits<11> src0; + bits<24> dpp8; + bits<9> fi; +} + class VOP_DPP8_Base : InstSI { @@ -1011,16 +1109,28 @@ class VOP3_DPP8_Base : let Size = 12; } +class VOP3_DPP8_Enc op, VOPProfile P> : + VOP3_DPPe_Common, + VOP3_DPP8e_Fields { + let Inst{40-32} = fi; + let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{95-72} = dpp8{23-0}; +} class VOP3_DPP8 op, string OpName, VOPProfile P> : - VOP3_DPP8_Base, VOP3_DPPe_Common, - VOP3_DPP8e_Fields { + VOP3_DPP8_Base, VOP3_DPP8_Enc; +class VOP3_DPP8_Enc_t16 op, VOPProfile P> : + VOP3_DPPe_Common_t16, + VOP3_DPP8e_Fields_t16 { let Inst{40-32} = fi; let Inst{71-64} = !if(P.HasSrc0, src0{7-0}, 0); let Inst{95-72} = dpp8{23-0}; } +class VOP3_DPP8_t16 op, string OpName, VOPProfile P> : + VOP3_DPP8_Base, VOP3_DPP8_Enc_t16; + class VOP3P_DPP8 op, string OpName, VOPProfile P> : VOP3_DPP8_Base, VOP3P_DPPe_Common, VOP3_DPP8e_Fields { @@ -1273,6 +1383,30 @@ class VOP3_Profile : VOP3_Pr } +class VOP3_Profile_True16 : VOPProfile_True16

{ + let HasClamp = !if(Features.HasClamp, 1, P.HasClamp); + let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel); + let IsMAI = !if(Features.IsMAI, 1, P.IsMAI); + let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); + + let HasModifiers = + !if (Features.IsMAI, 0, + !or(Features.IsPacked, Features.HasOpSel, P.HasModifiers)); + let IsSingle = 1; +} + +class VOP3_Profile_Fake16 : VOPProfile_Fake16

{ + let HasClamp = !if(Features.HasClamp, 1, P.HasClamp); + let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel); + let IsMAI = !if(Features.IsMAI, 1, P.IsMAI); + let IsPacked = !if(Features.IsPacked, 1, P.IsPacked); + + let HasModifiers = + !if (Features.IsMAI, 0, + !or(Features.IsPacked, Features.HasOpSel, P.HasModifiers)); + let IsSingle = 1; +} + // consistently gives instructions a _e64 suffix multiclass VOP3Inst_Pseudo_Wrapper pattern = [], bit VOP3Only = 0> { def _e64 : VOP3_Pseudo; @@ -1325,11 +1459,33 @@ multiclass VOP3PseudoScalarInst; } +multiclass VOP3Inst_t16_with_profiles { + let True16Predicate = NotHasTrue16BitInsts in { + defm NAME : VOP3Inst; + } + let True16Predicate = UseRealTrue16Insts in { + defm _t16 : VOP3Inst; + } + let True16Predicate = UseFakeTrue16Insts in { + defm _fake16 : VOP3Inst; + } +} + +multiclass VOP3Inst_t16 + : VOP3Inst_t16_with_profiles, + VOP3_Profile_True16, VOP3_Profile_Fake16, + node, node_t16>; + //===----------------------------------------------------------------------===// // VOP3 DPP //===----------------------------------------------------------------------===// -class Base_VOP3_DPP16 op, VOP_DPP_Pseudo ps, string opName = ps.OpName> +class VOP3_DPP16_Helper op, VOP_DPP_Pseudo ps, string opName = ps.OpName> : VOP3_DPP { let VOP3_OPSEL = ps.Pfl.HasOpSel; let IsDOT = ps.IsDOT; @@ -1342,17 +1498,43 @@ class Base_VOP3_DPP16 op, VOP_DPP_Pseudo ps, string opName = ps.OpName> let OtherPredicates = ps.OtherPredicates; } +class VOP3_DPP16_t16_Helper op, VOP_DPP_Pseudo ps, + string opName = ps.OpName> + : VOP3_DPP_t16 { + let VOP3_OPSEL = ps.Pfl.HasOpSel; + let IsDOT = ps.IsDOT; + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + let AssemblerPredicate = HasDPP16; + let SubtargetPredicate = HasDPP16; + let OtherPredicates = ps.OtherPredicates; +} + class VOP3_DPP16 op, VOP_DPP_Pseudo ps, int subtarget, string opName = ps.OpName> - : Base_VOP3_DPP16, SIMCInstr; + : VOP3_DPP16_Helper, SIMCInstr; + +class VOP3_DPP16_t16 op, VOP_DPP_Pseudo ps, int subtarget, + string opName = ps.OpName> + : VOP3_DPP16_t16_Helper, SIMCInstr; class VOP3_DPP16_Gen op, VOP_DPP_Pseudo ps, GFXGen Gen, - string opName = ps.OpName> : - VOP3_DPP16 { + string opName = ps.OpName> + : VOP3_DPP16 { let AssemblerPredicate = Gen.AssemblerPredicate; - let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); - let DecoderNamespace = Gen.DecoderNamespace# - !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); + let DecoderNamespace = Gen.DecoderNamespace; +} + +class VOP3_DPP16_Gen_t16 op, VOP_DPP_Pseudo ps, GFXGen Gen, + string opName = ps.OpName> + : VOP3_DPP16_t16 { + let True16Predicate = + !if (ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let DecoderNamespace = + Gen.DecoderNamespace #!if (ps.Pfl.IsRealTrue16, "", "_FAKE16"); } class Base_VOP3_DPP8 op, VOP_Pseudo ps, string opName = ps.OpName> @@ -1366,11 +1548,25 @@ class Base_VOP3_DPP8 op, VOP_Pseudo ps, string opName = ps.OpName> let SubtargetPredicate = ps.SubtargetPredicate; let OtherPredicates = ps.OtherPredicates; + let True16Predicate = ps.True16Predicate; +} + +class Base_VOP3_DPP8_t16 op, VOP_Pseudo ps, string opName = ps.OpName> + : VOP3_DPP8_t16 { + let VOP3_OPSEL = ps.Pfl.HasOpSel; + let IsDOT = ps.IsDOT; + let hasSideEffects = ps.hasSideEffects; + let Defs = ps.Defs; + let SchedRW = ps.SchedRW; + let Uses = ps.Uses; + + let OtherPredicates = ps.OtherPredicates; + let True16Predicate = ps.True16Predicate; } class Base_VOP3b_DPP16 op, VOP_DPP_Pseudo ps, string opName = ps.OpName> - : Base_VOP3_DPP16 { + : VOP3_DPP16_Helper { bits<7> sdst; let Inst{14 - 8} = sdst; } @@ -1381,6 +1577,12 @@ class VOP3b_DPP8_Base op, VOP_Pseudo ps, string opName = ps.OpName> let Inst{14 - 8} = sdst; } +class VOP3b_DPP8_Base_t16 op, VOP_Pseudo ps, string opName = ps.OpName> + : Base_VOP3_DPP8 { + bits<8> sdst; + let Inst{14 - 8} = sdst{7-1}; +} + //===----------------------------------------------------------------------===// // VOP3 GFX11, GFX12 //===----------------------------------------------------------------------===// @@ -1420,10 +1622,11 @@ multiclass VOP3Dot_Real_Base op, string opName = NAME, } multiclass VOP3_Real_with_name op, string opName, - string asmName, bit isSingle = 0> { + string asmName, string pseudo_mnemonic = "", bit isSingle = 0> { defvar ps = !cast(opName#"_e64"); let AsmString = asmName # ps.AsmOperands, IsSingle = !or(isSingle, ps.Pfl.IsSingle) in { + // FIXME-TRUE16 support FP8 instructions properly if ps.Pfl.IsFP8SrcByteSel then { def _e64#Gen.Suffix : VOP3_Real_Gen, @@ -1432,17 +1635,27 @@ multiclass VOP3_Real_with_name op, string opName, def _e64#Gen.Suffix : VOP3_Real_Gen, VOP3FP8OpSel_dst_bytesel_gfx11_gfx12; - } else if ps.Pfl.HasOpSel then { - def _e64#Gen.Suffix : - VOP3_Real_Gen, - VOP3OpSel_gfx11_gfx12; } else { - def _e64#Gen.Suffix : - VOP3_Real_Gen, - VOP3e_gfx11_gfx12; + if ps.Pfl.IsRealTrue16 then { + def _e64#Gen.Suffix : + VOP3_Real_Gen, + VOP3e_t16_gfx11_gfx12; + } else { + if ps.Pfl.HasOpSel then { + def _e64#Gen.Suffix : + VOP3_Real_Gen, + VOP3OpSel_gfx11_gfx12; + } else { + def _e64#Gen.Suffix : + VOP3_Real_Gen, + VOP3e_gfx11_gfx12; + } + } } } - def Gen.Suffix#"_VOP3_alias" : LetDummies, AMDGPUMnemonicAlias { + def Gen.Suffix#"_VOP3_alias" : LetDummies, + AMDGPUMnemonicAlias { let AssemblerPredicate = Gen.AssemblerPredicate; } } @@ -1456,8 +1669,13 @@ multiclass VOP3_Real_No_Suffix op, string opName = NAME> { } multiclass VOP3_Real_dpp_Base op, string opName = NAME> { - def _e64_dpp#Gen.Suffix : - VOP3_DPP16_Gen(opName#"_e64"#"_dpp"), Gen>; + defvar ps = !cast(opName#"_e64"#"_dpp"); + if ps.Pfl.IsTrue16 then + def _e64_dpp#Gen.Suffix : + VOP3_DPP16_Gen_t16; + else + def _e64_dpp#Gen.Suffix : + VOP3_DPP16_Gen; } multiclass VOP3Dot_Real_dpp_Base op, string opName = NAME> { @@ -1552,18 +1770,14 @@ multiclass VOP3Only_Realtriple op> : VOP3_Realtriple; multiclass VOP3_Realtriple_with_name op, string opName, - string asmName, bit isSingle = 0> : - VOP3_Real_with_name, + string asmName, string pseudo_mnemonic = "", bit isSingle = 0> : + VOP3_Real_with_name, VOP3_Real_dpp_with_name, VOP3_Real_dpp8_with_name; multiclass VOP3Only_Realtriple_with_name op, string opName, string asmName> : - VOP3_Realtriple_with_name; - -multiclass VOP3Only_Realtriple_t16 op, string asmName, - string opName = NAME> - : VOP3Only_Realtriple_with_name; + VOP3_Realtriple_with_name; multiclass VOP3be_Realtriple< GFXGen Gen, bits<10> op, bit isSingle = 0, string opName = NAME, @@ -1579,6 +1793,16 @@ multiclass VOP3beOnly_Realtriple op> : // VOP3 GFX11 //===----------------------------------------------------------------------===// +// VOP1 and VOP2 depend on these triple defs + +multiclass VOP3_Realtriple_t16_gfx11 op, string asmName, string opName = NAME, + string pseudo_mnemonic = "", bit isSingle = 0> : + VOP3_Realtriple_with_name; + +multiclass VOP3Only_Realtriple_t16_gfx11 op, string asmName, + string opName = NAME, string pseudo_mnemonic = ""> + : VOP3_Realtriple_t16_gfx11; + multiclass VOP3be_Real_gfx11 op, string opName, string asmName, bit isSingle = 0> : VOP3be_Real; @@ -1591,10 +1815,6 @@ multiclass VOP3_Realtriple_gfx11 op, bit isSingle = 0, string opName = NAME> : VOP3_Realtriple; -multiclass VOP3Only_Realtriple_t16_gfx11 op, string asmName, - string opName = NAME> - : VOP3Only_Realtriple_with_name; - //===----------------------------------------------------------------------===// // VOP3 GFX12 //===----------------------------------------------------------------------===// @@ -1610,6 +1830,16 @@ multiclass VOP3Only_Real_Base_gfx12 op> : multiclass VOP3Only_Realtriple_t16_gfx12 op> : VOP3Only_Realtriple; +multiclass VOP3_Realtriple_t16_gfx12 op, string asmName, string opName = NAME, + string pseudo_mnemonic = "", bit isSingle = 0> : + VOP3_Realtriple_with_name; + +multiclass VOP3_Realtriple_t16_and_fake16_gfx12 op, string asmName, string opName = NAME, + string pseudo_mnemonic = "", bit isSingle = 0> { + defm opName#"_t16":VOP3_Realtriple_t16_gfx12; + defm opName#"_fake16":VOP3_Realtriple_t16_gfx12; +} + multiclass VOP3be_Real_with_name_gfx12 op, string opName, string asmName, bit isSingle = 0> { defvar ps = !cast(opName#"_e64"); @@ -1624,18 +1854,14 @@ multiclass VOP3be_Real_with_name_gfx12 op, string opName, } multiclass VOP3_Realtriple_with_name_gfx12 op, string opName, - string asmName, bit isSingle = 0> : - VOP3_Realtriple_with_name; + string asmName, string pseudo_mnemonic = "", bit isSingle = 0> : + VOP3_Realtriple_with_name; multiclass VOP3Only_Realtriple_with_name_gfx11_gfx12 op, string opName, string asmName> : VOP3Only_Realtriple_with_name, VOP3Only_Realtriple_with_name; -multiclass VOP3Only_Realtriple_with_name_t16_gfx12 op, string asmName, - string opName = NAME> - : VOP3Only_Realtriple_with_name; - //===----------------------------------------------------------------------===// include "VOPCInstructions.td" @@ -1705,4 +1931,4 @@ def VOPTrue16Table : GenericTable { let PrimaryKey = ["Opcode"]; let PrimaryKeyName = "getTrue16OpcodeHelper"; -} \ No newline at end of file +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir index 11411c691c390..1971cd80d5686 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.s16.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s # Note: 16-bit instructions generally produce a 0 result in the high 16-bits on GFX8 and GFX9 and preserve high 16 bits on GFX10+ @@ -23,6 +23,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] + ; ; GFX10-LABEL: name: add_s16 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -30,6 +31,14 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_ADD_NC_U16_e64_]] + ; + ; GFX11-LABEL: name: add_s16 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: [[V_ADD_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_ADD_NC_U16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -56,6 +65,7 @@ body: | ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_U16_e64_]] + ; ; GFX10-LABEL: name: add_s16_zext_to_s32 ; GFX10: liveins: $vgpr0, $vgpr1 ; GFX10-NEXT: {{ $}} @@ -65,6 +75,16 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ADD_NC_U16_e64_]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; + ; GFX11-LABEL: name: add_s16_zext_to_s32 + ; GFX11: liveins: $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX11-NEXT: [[V_ADD_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 + ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ADD_NC_U16_fake16_e64_]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s16) = G_TRUNC %0 @@ -91,12 +111,20 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] + ; ; GFX10-LABEL: name: add_s16_neg_inline_const_64 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10-NEXT: [[V_SUB_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_SUB_NC_U16_e64_]] + ; + ; GFX11-LABEL: name: add_s16_neg_inline_const_64 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_SUB_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_fake16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_SUB_NC_U16_fake16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_CONSTANT i16 -64 @@ -121,6 +149,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 [[COPY]], 64, 0, implicit $exec ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_U16_e64_]] + ; ; GFX10-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32 ; GFX10: liveins: $vgpr0 ; GFX10-NEXT: {{ $}} @@ -129,6 +158,15 @@ body: | ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_SUB_NC_U16_e64_]], implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; + ; GFX11-LABEL: name: add_s16_neg_inline_const_64_zext_to_s32 + ; GFX11: liveins: $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX11-NEXT: [[V_SUB_NC_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_fake16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec + ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 + ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_SUB_NC_U16_fake16_e64_]], implicit $exec + ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_CONSTANT i16 -64 diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir index 1151bde02ef62..41b61f2e09a3d 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir @@ -82,9 +82,9 @@ body: | # Regression test for src_modifiers on base u16 opcode # GCN-LABEL: name: vop3_u16 -# GCN: %5:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec -# GCN: %7:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec -# GCN: %9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec +# GCN: %5:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec +# GCN: %7:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec +# GCN: %9:vgpr_32 = V_ADD_NC_U16_fake16_e64 4, %8, 8, %7, 0, 0, implicit $exec name: vop3_u16 tracksRegLiveness: true body: | @@ -96,11 +96,11 @@ body: | %2:vgpr_32 = COPY $vgpr2 %3:vgpr_32 = IMPLICIT_DEF %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec - %5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec + %5:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, %4, 0, %3, 0, 0, implicit $exec %6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec - %7:vgpr_32 = V_ADD_NC_U16_e64 1, %6, 2, %5, 0, 0, implicit $exec + %7:vgpr_32 = V_ADD_NC_U16_fake16_e64 1, %6, 2, %5, 0, 0, implicit $exec %8:vgpr_32 = V_MOV_B32_dpp %3, %7, 1, 15, 15, 1, implicit $exec - %9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec + %9:vgpr_32 = V_ADD_NC_U16_fake16_e64 4, %8, 8, %7, 0, 0, implicit $exec ... name: vop3p @@ -880,11 +880,11 @@ body: | # Check op_sel is all 0s when combining # GCN-LABEL: name: opsel_vop3 -# GCN: %4:vgpr_32 = V_ADD_I16_e64_dpp %2, 0, %0, 0, %1, 0, 0, 1, 15, 15, 1, implicit $exec -# GCN: %6:vgpr_32 = V_ADD_I16_e64 4, %5, 0, %1, 0, 0, implicit $exec -# GCN: %8:vgpr_32 = V_ADD_I16_e64 0, %7, 4, %1, 0, 0, implicit $exec -# GCN: %10:vgpr_32 = V_ADD_I16_e64 4, %9, 4, %1, 0, 0, implicit $exec -# GCN: %12:vgpr_32 = V_ADD_I16_e64 8, %11, 0, %1, 0, 0, implicit $exec +# GCN: %4:vgpr_32 = V_ADD_I16_fake16_e64_dpp %2, 0, %0, 0, %1, 0, 0, 1, 15, 15, 1, implicit $exec +# GCN: %6:vgpr_32 = V_ADD_I16_fake16_e64 4, %5, 0, %1, 0, 0, implicit $exec +# GCN: %8:vgpr_32 = V_ADD_I16_fake16_e64 0, %7, 4, %1, 0, 0, implicit $exec +# GCN: %10:vgpr_32 = V_ADD_I16_fake16_e64 4, %9, 4, %1, 0, 0, implicit $exec +# GCN: %12:vgpr_32 = V_ADD_I16_fake16_e64 8, %11, 0, %1, 0, 0, implicit $exec name: opsel_vop3 tracksRegLiveness: true body: | @@ -897,23 +897,23 @@ body: | ; Combine for op_sel:[0,0,0] %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec - %4:vgpr_32 = V_ADD_I16_e64 0, %3, 0, %1, 0, 0, implicit $exec + %4:vgpr_32 = V_ADD_I16_fake16_e64 0, %3, 0, %1, 0, 0, implicit $exec ; Do not combine for op_sel:[1,0,0] %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec - %6:vgpr_32 = V_ADD_I16_e64 4, %5, 0, %1, 0, 0, implicit $exec + %6:vgpr_32 = V_ADD_I16_fake16_e64 4, %5, 0, %1, 0, 0, implicit $exec ; Do not combine for op_sel:[0,1,0] %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec - %8:vgpr_32 = V_ADD_I16_e64 0, %7, 4, %1, 0, 0, implicit $exec + %8:vgpr_32 = V_ADD_I16_fake16_e64 0, %7, 4, %1, 0, 0, implicit $exec ; Do not combine for op_sel:[1,1,0] %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec - %10:vgpr_32 = V_ADD_I16_e64 4, %9, 4, %1, 0, 0, implicit $exec + %10:vgpr_32 = V_ADD_I16_fake16_e64 4, %9, 4, %1, 0, 0, implicit $exec ; Do not combine for op_sel:[0,0,1] (dst_op_sel only) %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec - %12:vgpr_32 = V_ADD_I16_e64 8, %11, 0, %1, 0, 0, implicit $exec + %12:vgpr_32 = V_ADD_I16_fake16_e64 8, %11, 0, %1, 0, 0, implicit $exec ... # Check op_sel is all 0s and op_sel_hi is all 1s when combining diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll index c62b4e565078e..2e2a1094ba99a 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll @@ -996,7 +996,7 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_i16(i16 inreg ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; GISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY2]], 0, [[COPY1]], 0, 0, implicit $exec + ; GISEL-GFX11-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY2]], 0, [[COPY1]], 0, 0, implicit $exec ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]] ; GISEL-GFX11-NEXT: FLAT_STORE_SHORT [[COPY3]], [[V_ADD_NC_U16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`) ; GISEL-GFX11-NEXT: S_ENDPGM 0 @@ -1020,7 +1020,7 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_i16(i16 inreg ; DAGISEL-GFX11-WF32-NEXT: {{ $}} ; DAGISEL-GFX11-WF32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; DAGISEL-GFX11-WF32-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; DAGISEL-GFX11-WF32-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec + ; DAGISEL-GFX11-WF32-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; DAGISEL-GFX11-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX11-WF32-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]] ; DAGISEL-GFX11-WF32-NEXT: FLAT_STORE_SHORT killed [[COPY2]], killed [[V_ADD_NC_U16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`) @@ -1032,7 +1032,7 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_i16(i16 inreg ; DAGISEL-GFX11-WF64-NEXT: {{ $}} ; DAGISEL-GFX11-WF64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; DAGISEL-GFX11-WF64-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; DAGISEL-GFX11-WF64-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec + ; DAGISEL-GFX11-WF64-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec ; DAGISEL-GFX11-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX11-WF64-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]] ; DAGISEL-GFX11-WF64-NEXT: FLAT_STORE_SHORT killed [[COPY2]], killed [[V_ADD_NC_U16_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into `ptr poison`) diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 67038f4c8eec0..210d55898367d 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -244,49 +244,67 @@ v_add_lshl_u32 v5, src_scc, vcc_lo, -1 v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null // GFX11: encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_add_nc_i16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] +v_add_nc_i16 v5.l, v1.h, v2.l +// GFX11: encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] -v_add_nc_i16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00] +v_add_nc_i16 v5.l, v255.l, v255.h +// GFX11: encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] -v_add_nc_i16 v5, s1, s2 +v_add_nc_i16 v5.l, s1, s2 // GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] -v_add_nc_i16 v5, s105, s105 +v_add_nc_i16 v5.l, s105, s105 // GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] -v_add_nc_i16 v5, vcc_lo, ttmp15 +v_add_nc_i16 v5.l, vcc_lo, ttmp15 // GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] -v_add_nc_i16 v5, vcc_hi, 0xfe0b +v_add_nc_i16 v5.l, vcc_hi, 0xfe0b // GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_add_nc_i16 v5, ttmp15, src_scc +v_add_nc_i16 v5.l, ttmp15, src_scc // GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] -v_add_nc_i16 v5, m0, 0.5 +v_add_nc_i16 v5.l, m0, 0.5 // GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00] -v_add_nc_i16 v5, exec_lo, -1 +v_add_nc_i16 v5.l, exec_lo, -1 // GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] -v_add_nc_i16 v5, exec_hi, null +v_add_nc_i16 v5.l, exec_hi, null // GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] -v_add_nc_i16 v5, null, exec_lo op_sel:[1,1,1] +v_add_nc_i16 v5.l, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_i16 v5.l, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1] // GFX11: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00] -v_add_nc_i16 v5, -1, exec_hi op_sel:[0,0,0] +v_add_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0] // GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00] -v_add_nc_i16 v5, 0.5, m0 op_sel:[1,0,0] +v_add_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0] // GFX11: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00] -v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] +v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] // GFX11: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00] -v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_i16 v5.l, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_i16 v5.l, v1.h, v2.l +// GFX11: encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_i16 v5.l, v255.l, v255.h +// GFX11: encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_i16 v255.h, 0xfe0b, vcc_hi clamp // GFX11: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_add_nc_i32 v5, v1, v2 @@ -334,49 +352,67 @@ v_add_nc_i32 v5, src_scc, vcc_lo v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp // GFX11: encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_add_nc_u16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] +v_add_nc_u16 v5.l, v1.h, v2.l +// GFX11: encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] -v_add_nc_u16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00] +v_add_nc_u16 v5.l, v255.l, v255.h +// GFX11: encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] -v_add_nc_u16 v5, s1, s2 +v_add_nc_u16 v5.l, s1, s2 // GFX11: encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] -v_add_nc_u16 v5, s105, s105 +v_add_nc_u16 v5.l, s105, s105 // GFX11: encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] -v_add_nc_u16 v5, vcc_lo, ttmp15 +v_add_nc_u16 v5.l, vcc_lo, ttmp15 // GFX11: encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] -v_add_nc_u16 v5, vcc_hi, 0xfe0b +v_add_nc_u16 v5.l, vcc_hi, 0xfe0b // GFX11: encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_add_nc_u16 v5, ttmp15, src_scc +v_add_nc_u16 v5.l, ttmp15, src_scc // GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] -v_add_nc_u16 v5, m0, 0.5 +v_add_nc_u16 v5.l, m0, 0.5 // GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00] -v_add_nc_u16 v5, exec_lo, -1 +v_add_nc_u16 v5.l, exec_lo, -1 // GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] -v_add_nc_u16 v5, exec_hi, null +v_add_nc_u16 v5.l, exec_hi, null // GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] -v_add_nc_u16 v5, null, exec_lo op_sel:[1,1,1] +v_add_nc_u16 v5.l, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00] + +v_add_nc_u16 v5.l, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00] + +v_add_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1] // GFX11: encoding: [0x05,0x58,0x03,0xd7,0x7c,0xfc,0x00,0x00] -v_add_nc_u16 v5, -1, exec_hi op_sel:[0,0,0] +v_add_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0] // GFX11: encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00] -v_add_nc_u16 v5, 0.5, m0 op_sel:[1,0,0] +v_add_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0] // GFX11: encoding: [0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00] -v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] +v_add_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] // GFX11: encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00] -v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_u16 v5.l, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00] + +v_add_nc_u16 v5.l, v1.h, v2.l +// GFX11: encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_u16 v5.l, v255.l, v255.h +// GFX11: encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_u16 v255.h, 0xfe0b, vcc_hi clamp // GFX11: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_alignbit_b32 v5, v1, v2, s3 @@ -5801,49 +5837,67 @@ v_sub_co_u32 v5, ttmp[14:15], src_scc, vcc_lo v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp // GFX11: encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_sub_nc_i16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] +v_sub_nc_i16 v5.l, v1.h, v2.l +// GFX11: encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] -v_sub_nc_i16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00] +v_sub_nc_i16 v5.l, v255.l, v255.h +// GFX11: encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] -v_sub_nc_i16 v5, s1, s2 +v_sub_nc_i16 v5.l, s1, s2 // GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] -v_sub_nc_i16 v5, s105, s105 +v_sub_nc_i16 v5.l, s105, s105 // GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] -v_sub_nc_i16 v5, vcc_lo, ttmp15 +v_sub_nc_i16 v5.l, vcc_lo, ttmp15 // GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] -v_sub_nc_i16 v5, vcc_hi, 0xfe0b +v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b // GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_sub_nc_i16 v5, ttmp15, src_scc +v_sub_nc_i16 v5.l, ttmp15, src_scc // GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] -v_sub_nc_i16 v5, m0, 0.5 +v_sub_nc_i16 v5.l, m0, 0.5 // GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00] -v_sub_nc_i16 v5, exec_lo, -1 +v_sub_nc_i16 v5.l, exec_lo, -1 // GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] -v_sub_nc_i16 v5, exec_hi, null +v_sub_nc_i16 v5.l, exec_hi, null // GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] -v_sub_nc_i16 v5, null, exec_lo op_sel:[1,1,1] +v_sub_nc_i16 v5.l, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_i16 v5.l, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1] // GFX11: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00] -v_sub_nc_i16 v5, -1, exec_hi op_sel:[0,0,0] +v_sub_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0] // GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00] -v_sub_nc_i16 v5, 0.5, m0 op_sel:[1,0,0] +v_sub_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0] // GFX11: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00] -v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] +v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] // GFX11: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00] -v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_i16 v5.l, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_i16 v5.l, v1.h, v2.l +// GFX11: encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_i16 v5.l, v255.l, v255.h +// GFX11: encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi clamp // GFX11: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_sub_nc_i32 v5, v1, v2 @@ -5891,49 +5945,67 @@ v_sub_nc_i32 v5, src_scc, vcc_lo v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp // GFX11: encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_sub_nc_u16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] +v_sub_nc_u16 v5.l, v1.h, v2.l +// GFX11: encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] -v_sub_nc_u16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00] +v_sub_nc_u16 v5.l, v255.l, v255.h +// GFX11: encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] -v_sub_nc_u16 v5, s1, s2 +v_sub_nc_u16 v5.l, s1, s2 // GFX11: encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] -v_sub_nc_u16 v5, s105, s105 +v_sub_nc_u16 v5.l, s105, s105 // GFX11: encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] -v_sub_nc_u16 v5, vcc_lo, ttmp15 +v_sub_nc_u16 v5.l, vcc_lo, ttmp15 // GFX11: encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] -v_sub_nc_u16 v5, vcc_hi, 0xfe0b +v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b // GFX11: encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_sub_nc_u16 v5, ttmp15, src_scc +v_sub_nc_u16 v5.l, ttmp15, src_scc // GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] -v_sub_nc_u16 v5, m0, 0.5 +v_sub_nc_u16 v5.l, m0, 0.5 // GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00] -v_sub_nc_u16 v5, exec_lo, -1 +v_sub_nc_u16 v5.l, exec_lo, -1 // GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] -v_sub_nc_u16 v5, exec_hi, null +v_sub_nc_u16 v5.l, exec_hi, null // GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] -v_sub_nc_u16 v5, null, exec_lo op_sel:[1,1,1] +v_sub_nc_u16 v5.l, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00] + +v_sub_nc_u16 v5.l, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00] + +v_sub_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1] // GFX11: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00] -v_sub_nc_u16 v5, -1, exec_hi op_sel:[0,0,0] +v_sub_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0] // GFX11: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00] -v_sub_nc_u16 v5, 0.5, m0 op_sel:[1,0,0] +v_sub_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0] // GFX11: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00] -v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] +v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] // GFX11: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00] -v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX11: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_u16 v5.l, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00] + +v_sub_nc_u16 v5.l, v1.h, v2.l +// GFX11: encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_u16 v5.l, v255.l, v255.h +// GFX11: encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi clamp // GFX11: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_subrev_co_u32 v5, s6, v1, v2 diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 3c693c556194e..c82b61e21edf6 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -194,47 +194,47 @@ v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] // GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror // GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror // GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 // GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 // GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 // GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 // GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 // GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 // GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_add_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_add_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -278,47 +278,47 @@ v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] // GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror // GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror // GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 // GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 // GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 // GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 // GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 // GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 // GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_add_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4116,47 +4116,47 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask: v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] // GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror // GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror // GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 // GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 // GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 // GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 // GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 // GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 // GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_sub_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -4200,47 +4200,47 @@ v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] // GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror // GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror // GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 // GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 // GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 // GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 // GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 // GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 // GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_sub_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -4475,30 +4475,6 @@ v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 // GFX11: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] @@ -4724,30 +4700,6 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 ban v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX11: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 // GFX11: encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index 79709278bc0c7..73369685a0e6d 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -114,14 +114,23 @@ v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x47,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x10,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_add_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xc0,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x26,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -132,14 +141,23 @@ v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_add_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x80,0x26,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x10,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xc0,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2601,14 +2619,23 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0xfc,0x01,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x10,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_sub_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xc0,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -2619,14 +2646,23 @@ v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sub_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x80,0x25,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x10,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_sub_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xc0,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -2748,30 +2784,6 @@ v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] @@ -2997,30 +3009,6 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX11: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] // GFX11: encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index f28933ec3a894..1ae1eaf1ceead 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -208,49 +208,58 @@ v_add_lshl_u32 v5, src_scc, vcc_lo, -1 v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null // GFX12: encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_add_nc_i16 v5, v1, v2 +v_add_nc_i16 v5.l, v1.l, v2.l // GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] -v_add_nc_i16 v5, v255, v255 +v_add_nc_i16 v5.l, v1.h, v2.l +// GFX12: encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_i16 v5.l, v255.l, v255.l // GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00] -v_add_nc_i16 v5, s1, s2 +v_add_nc_i16 v5.l, v255.l, v255.h +// GFX12: encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_i16 v5.l, s1, s2 // GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] -v_add_nc_i16 v5, s105, s105 +v_add_nc_i16 v5.l, s105, s105 // GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] -v_add_nc_i16 v5, vcc_lo, ttmp15 +v_add_nc_i16 v5.l, vcc_lo, ttmp15 // GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] -v_add_nc_i16 v5, vcc_hi, 0xfe0b +v_add_nc_i16 v5.l, vcc_hi, 0xfe0b // GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_add_nc_i16 v5, ttmp15, src_scc +v_add_nc_i16 v5.l, ttmp15, src_scc // GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] -v_add_nc_i16 v5, m0, 0.5 +v_add_nc_i16 v5.l, m0, 0.5 // GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00] -v_add_nc_i16 v5, exec_lo, -1 +v_add_nc_i16 v5.l, exec_lo, -1 // GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] -v_add_nc_i16 v5, exec_hi, null +v_add_nc_i16 v5.l, exec_hi, null // GFX12: encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] -v_add_nc_i16 v5, null, exec_lo op_sel:[1,1,1] +v_add_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1] // GFX12: encoding: [0x05,0x58,0x0d,0xd7,0x7c,0xfc,0x00,0x00] -v_add_nc_i16 v5, -1, exec_hi op_sel:[0,0,0] +v_add_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0] // GFX12: encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00] -v_add_nc_i16 v5, 0.5, m0 op_sel:[1,0,0] +v_add_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0] // GFX12: encoding: [0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00] -v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] +v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] // GFX12: encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00] -v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_i16 v255.h, 0xfe0b, vcc_hi clamp // GFX12: encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_add_nc_i32 v5, v1, v2 @@ -298,49 +307,58 @@ v_add_nc_i32 v5, src_scc, vcc_lo v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp // GFX12: encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_add_nc_u16 v5, v1, v2 +v_add_nc_u16 v5.l, v1.l, v2.l // GFX12: encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] -v_add_nc_u16 v5, v255, v255 +v_add_nc_u16 v5.l, v1.h, v2.l +// GFX12: encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] + +v_add_nc_u16 v5.l, v255.l, v255.l // GFX12: encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00] -v_add_nc_u16 v5, s1, s2 +v_add_nc_u16 v5.l, v255.l, v255.h +// GFX12: encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] + +v_add_nc_u16 v5.l, s1, s2 // GFX12: encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] -v_add_nc_u16 v5, s105, s105 +v_add_nc_u16 v5.l, s105, s105 // GFX12: encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] -v_add_nc_u16 v5, vcc_lo, ttmp15 +v_add_nc_u16 v5.l, vcc_lo, ttmp15 // GFX12: encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] -v_add_nc_u16 v5, vcc_hi, 0xfe0b +v_add_nc_u16 v5.l, vcc_hi, 0xfe0b // GFX12: encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_add_nc_u16 v5, ttmp15, src_scc +v_add_nc_u16 v5.l, ttmp15, src_scc // GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] -v_add_nc_u16 v5, m0, 0.5 +v_add_nc_u16 v5.l, m0, 0.5 // GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00] -v_add_nc_u16 v5, exec_lo, -1 +v_add_nc_u16 v5.l, exec_lo, -1 // GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] -v_add_nc_u16 v5, exec_hi, null +v_add_nc_u16 v5.l, exec_hi, null // GFX12: encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] -v_add_nc_u16 v5, null, exec_lo op_sel:[1,1,1] +v_add_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1] // GFX12: encoding: [0x05,0x58,0x03,0xd7,0x7c,0xfc,0x00,0x00] -v_add_nc_u16 v5, -1, exec_hi op_sel:[0,0,0] +v_add_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0] // GFX12: encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00] -v_add_nc_u16 v5, 0.5, m0 op_sel:[1,0,0] +v_add_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0] // GFX12: encoding: [0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00] -v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] +v_add_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] // GFX12: encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00] -v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_add_nc_u16 v255.h, 0xfe0b, vcc_hi clamp // GFX12: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_alignbit_b32 v5, v1, v2, s3 @@ -5696,49 +5714,58 @@ v_sub_co_u32 v5, ttmp[14:15], src_scc, vcc_lo v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp // GFX12: encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_sub_nc_i16 v5, v1, v2 +v_sub_nc_i16 v5.l, v1.l, v2.l // GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] -v_sub_nc_i16 v5, v255, v255 +v_sub_nc_i16 v5.l, v1.h, v2.l +// GFX12: encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_i16 v5.l, v255.l, v255.l // GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00] -v_sub_nc_i16 v5, s1, s2 +v_sub_nc_i16 v5.l, v255.l, v255.h +// GFX12: encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_i16 v5.l, s1, s2 // GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] -v_sub_nc_i16 v5, s105, s105 +v_sub_nc_i16 v5.l, s105, s105 // GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] -v_sub_nc_i16 v5, vcc_lo, ttmp15 +v_sub_nc_i16 v5.l, vcc_lo, ttmp15 // GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] -v_sub_nc_i16 v5, vcc_hi, 0xfe0b +v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b // GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_sub_nc_i16 v5, ttmp15, src_scc +v_sub_nc_i16 v5.l, ttmp15, src_scc // GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] -v_sub_nc_i16 v5, m0, 0.5 +v_sub_nc_i16 v5.l, m0, 0.5 // GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00] -v_sub_nc_i16 v5, exec_lo, -1 +v_sub_nc_i16 v5.l, exec_lo, -1 // GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] -v_sub_nc_i16 v5, exec_hi, null +v_sub_nc_i16 v5.l, exec_hi, null // GFX12: encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] -v_sub_nc_i16 v5, null, exec_lo op_sel:[1,1,1] +v_sub_nc_i16 v5.h, null, exec_lo op_sel:[1,1,1] // GFX12: encoding: [0x05,0x58,0x0e,0xd7,0x7c,0xfc,0x00,0x00] -v_sub_nc_i16 v5, -1, exec_hi op_sel:[0,0,0] +v_sub_nc_i16 v5.l, -1, exec_hi op_sel:[0,0,0] // GFX12: encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00] -v_sub_nc_i16 v5, 0.5, m0 op_sel:[1,0,0] +v_sub_nc_i16 v5.l, 0.5, m0 op_sel:[1,0,0] // GFX12: encoding: [0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00] -v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] +v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] // GFX12: encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00] -v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi clamp // GFX12: encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_sub_nc_i32 v5, v1, v2 @@ -5786,49 +5813,58 @@ v_sub_nc_i32 v5, src_scc, vcc_lo v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp // GFX12: encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_sub_nc_u16 v5, v1, v2 +v_sub_nc_u16 v5.l, v1.l, v2.l // GFX12: encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] -v_sub_nc_u16 v5, v255, v255 +v_sub_nc_u16 v5.l, v1.h, v2.l +// GFX12: encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] + +v_sub_nc_u16 v5.l, v255.l, v255.l // GFX12: encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00] -v_sub_nc_u16 v5, s1, s2 +v_sub_nc_u16 v5.l, v255.l, v255.h +// GFX12: encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] + +v_sub_nc_u16 v5.l, s1, s2 // GFX12: encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] -v_sub_nc_u16 v5, s105, s105 +v_sub_nc_u16 v5.l, s105, s105 // GFX12: encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] -v_sub_nc_u16 v5, vcc_lo, ttmp15 +v_sub_nc_u16 v5.l, vcc_lo, ttmp15 // GFX12: encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] -v_sub_nc_u16 v5, vcc_hi, 0xfe0b +v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b // GFX12: encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_sub_nc_u16 v5, ttmp15, src_scc +v_sub_nc_u16 v5.l, ttmp15, src_scc // GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] -v_sub_nc_u16 v5, m0, 0.5 +v_sub_nc_u16 v5.l, m0, 0.5 // GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00] -v_sub_nc_u16 v5, exec_lo, -1 +v_sub_nc_u16 v5.l, exec_lo, -1 // GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] -v_sub_nc_u16 v5, exec_hi, null +v_sub_nc_u16 v5.l, exec_hi, null // GFX12: encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] -v_sub_nc_u16 v5, null, exec_lo op_sel:[1,1,1] +v_sub_nc_u16 v5.h, null, exec_lo op_sel:[1,1,1] // GFX12: encoding: [0x05,0x58,0x04,0xd7,0x7c,0xfc,0x00,0x00] -v_sub_nc_u16 v5, -1, exec_hi op_sel:[0,0,0] +v_sub_nc_u16 v5.l, -1, exec_hi op_sel:[0,0,0] // GFX12: encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00] -v_sub_nc_u16 v5, 0.5, m0 op_sel:[1,0,0] +v_sub_nc_u16 v5.l, 0.5, m0 op_sel:[1,0,0] // GFX12: encoding: [0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00] -v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] +v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] // GFX12: encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00] -v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp +// GFX12: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi clamp // GFX12: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_subrev_co_u32 v5, s6, v1, v2 diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index adf37901fc85f..56bd0ee4b4746 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -214,47 +214,71 @@ v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror -// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 // GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 // GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf // GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_add_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 // GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_add_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 // GFX12: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_add_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -298,47 +322,71 @@ v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror -// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 // GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 // GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf // GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_add_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 // GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_add_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 // GFX12: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4622,47 +4670,71 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask: v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror -// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 // GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 // GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf // GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 // GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_sub_nc_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 // GFX12: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_sub_nc_i16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -4706,47 +4778,71 @@ v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror -// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 // GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 // GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf // GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 // GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_sub_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 // GFX12: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_sub_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -5001,30 +5097,6 @@ v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 // GFX12: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] @@ -5250,30 +5322,6 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 ban v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX12: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 // GFX12: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index 1be122faccbc9..6331d22c6976d 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -134,14 +134,38 @@ v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0xff,0x00,0x47,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_add_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: [0xff,0x80,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x10,0x0d,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0xc0,0x0d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x26,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -152,14 +176,38 @@ v_add_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_add_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0xff,0x80,0x26,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x10,0x03,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0xc0,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3043,14 +3091,38 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0xff,0xfc,0x01,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_sub_nc_i16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: [0xff,0x80,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x10,0x0e,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0xc0,0x0e,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -3061,14 +3133,38 @@ v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sub_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0xff,0x80,0x25,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_sub_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x05,0x10,0x04,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: [0xff,0xc0,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -3210,30 +3306,6 @@ v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0xff,0x00,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] @@ -3459,30 +3531,6 @@ v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX12: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] // GFX12: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 07058a6451592..365caa5f9b6d0 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -189,49 +189,112 @@ # GFX11: v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_add_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] +0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00 + +# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] +0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00 + +# W32-REAL16: v_add_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_add_nc_i16 v5, m0, 0x3800 +# W32-REAL16: v_add_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_add_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_add_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] -0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00 - -# GFX11: v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00 +# W32-REAL16: v_add_nc_i16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0d,0xd7,0xc1,0xfe,0x00,0x00 + +# W32-REAL16: v_add_nc_i16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x0d,0xd7,0xf0,0xfa,0x00,0x00 + +# W32-REAL16: v_add_nc_i16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x0d,0xd7,0xfd,0xd4,0x00,0x00 + +# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] +0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00 + +# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] +0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00 + +# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX11: v_add_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00] @@ -279,49 +342,112 @@ # GFX11: v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_add_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] +0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00 + +# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] +0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00 + +# W32-REAL16: v_add_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_add_nc_u16 v5, m0, 0x3800 +# W32-REAL16: v_add_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_add_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_add_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] -0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00 - -# GFX11: v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00 +# W32-REAL16: v_add_nc_u16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x03,0xd7,0xc1,0xfe,0x00,0x00 + +# W32-REAL16: v_add_nc_u16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x03,0xd7,0xf0,0xfa,0x00,0x00 + +# W32-REAL16: v_add_nc_u16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x03,0xd7,0xfd,0xd4,0x00,0x00 + +# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] +0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00 + +# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] +0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00 + +# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX11: v_alignbit_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00] @@ -5871,49 +5997,112 @@ # GFX11: v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_sub_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] +0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00 + +# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] +0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00 + +# W32-REAL16: v_sub_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_sub_nc_i16 v5, m0, 0x3800 +# W32-REAL16: v_sub_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_sub_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_sub_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] -0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00 - -# GFX11: v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00 +# W32-REAL16: v_sub_nc_i16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0e,0xd7,0xc1,0xfe,0x00,0x00 + +# W32-REAL16: v_sub_nc_i16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x0e,0xd7,0xf0,0xfa,0x00,0x00 + +# W32-REAL16: v_sub_nc_i16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x0e,0xd7,0xfd,0xd4,0x00,0x00 + +# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] +0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00 + +# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] +0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00 + +# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX11: v_sub_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00] @@ -5961,49 +6150,112 @@ # GFX11: v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_sub_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] +0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00 + +# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] +0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00 + +# W32-REAL16: v_sub_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_sub_nc_u16 v5, m0, 0x3800 +# W32-REAL16: v_sub_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_sub_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_sub_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] -0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00 - -# GFX11: v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00 +# W32-REAL16: v_sub_nc_u16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x04,0xd7,0xc1,0xfe,0x00,0x00 + +# W32-REAL16: v_sub_nc_u16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, 0x3800, m0 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, 0x3800, m0 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +0x05,0x00,0x04,0xd7,0xf0,0xfa,0x00,0x00 + +# W32-REAL16: v_sub_nc_u16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, src_scc, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x04,0xd7,0xfd,0xd4,0x00,0x00 + +# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] +0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00 + +# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] +0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00 + +# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # W32: v_subrev_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index 4ae8b053f0e0f..d0bd6398ad10a 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -3824,88 +3824,220 @@ # W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -4664,88 +4796,220 @@ # GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index b44dba748666c..cbf5a3d11e50b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -2168,34 +2168,112 @@ # W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -2840,34 +2918,112 @@ # GFX11: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX11: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index af04a31423b6f..c87c8855f5cdc 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -153,49 +153,112 @@ # GFX12: v_add_lshl_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x47,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX12: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_add_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x0d,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_add_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_add_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_add_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x0d,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x01,0x04,0x00,0x00 -# GFX12: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x69,0xd2,0x00,0x00 -# GFX12: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x6a,0xf6,0x00,0x00 -# GFX12: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX12: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_add_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x0d,0xd7,0x7b,0xfa,0x01,0x00 -# GFX12: v_add_nc_i16 v5, m0, 0x3800 +# W32-REAL16: v_add_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0d,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x7d,0xe0,0x01,0x00 -# GFX12: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_add_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x0d,0xd7,0x7e,0x82,0x01,0x00 -# GFX12: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x7f,0xf8,0x00,0x00 -# GFX12: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x0d,0xd7,0x7c,0xfc,0x00,0x00 -# GFX12: v_add_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x58,0x0d,0xd7,0xc1,0xfe,0x00,0x00 -# GFX12: v_add_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] +# W32-REAL16: v_add_nc_i16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x08,0x0d,0xd7,0xf0,0xfa,0x00,0x00 -# GFX12: v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x10,0x0d,0xd7,0xfd,0xd4,0x00,0x00 -# GFX12: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 + +# W32-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_add_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00] +0x05,0x08,0x0d,0xd7,0x01,0x05,0x02,0x00 + +# W32-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_add_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_add_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00] +0x05,0x10,0x0d,0xd7,0xff,0xff,0x03,0x00 + +# W32-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x0d,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX12: v_add_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x26,0xd7,0x01,0x05,0x02,0x00] @@ -243,49 +306,112 @@ # GFX12: v_add_nc_i32 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0x80,0x26,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX12: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_add_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_add_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_add_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x03,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x03,0xd7,0x01,0x04,0x00,0x00 -# GFX12: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x03,0xd7,0x69,0xd2,0x00,0x00 -# GFX12: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x03,0xd7,0x6a,0xf6,0x00,0x00 -# GFX12: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x03,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX12: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_add_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x03,0xd7,0x7b,0xfa,0x01,0x00 -# GFX12: v_add_nc_u16 v5, m0, 0x3800 +# W32-REAL16: v_add_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x03,0xd7,0x7d,0xe0,0x01,0x00 -# GFX12: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_add_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x03,0xd7,0x7e,0x82,0x01,0x00 -# GFX12: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x03,0xd7,0x7f,0xf8,0x00,0x00 -# GFX12: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x03,0xd7,0x7c,0xfc,0x00,0x00 -# GFX12: v_add_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x58,0x03,0xd7,0xc1,0xfe,0x00,0x00 -# GFX12: v_add_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] +# W32-REAL16: v_add_nc_u16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x08,0x03,0xd7,0xf0,0xfa,0x00,0x00 -# GFX12: v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x10,0x03,0xd7,0xfd,0xd4,0x00,0x00 -# GFX12: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 + +# W32-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_add_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00] +0x05,0x08,0x03,0xd7,0x01,0x05,0x02,0x00 + +# W32-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_add_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_add_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00] +0x05,0x10,0x03,0xd7,0xff,0xff,0x03,0x00 + +# W32-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_add_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX12: v_alignbit_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00] @@ -5797,49 +5923,112 @@ # GFX12: v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX12: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_sub_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 ; encoding: [0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x0e,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, s1, s2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x01,0x04,0x00,0x00 -# GFX12: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, s105, s105 ; encoding: [0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x69,0xd2,0x00,0x00 -# GFX12: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x6a,0xf6,0x00,0x00 -# GFX12: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX12: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x0e,0xd7,0x7b,0xfa,0x01,0x00 -# GFX12: v_sub_nc_i16 v5, m0, 0x3800 +# W32-REAL16: v_sub_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x0e,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x7d,0xe0,0x01,0x00 -# GFX12: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x0e,0xd7,0x7e,0x82,0x01,0x00 -# GFX12: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, exec_hi, null ; encoding: [0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x7f,0xf8,0x00,0x00 -# GFX12: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, null, exec_lo ; encoding: [0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x0e,0xd7,0x7c,0xfc,0x00,0x00 -# GFX12: v_sub_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x58,0x0e,0xd7,0xc1,0xfe,0x00,0x00 -# GFX12: v_sub_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] +# W32-REAL16: v_sub_nc_i16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x08,0x0e,0xd7,0xf0,0xfa,0x00,0x00 -# GFX12: v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x10,0x0e,0xd7,0xfd,0xd4,0x00,0x00 -# GFX12: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 + +# W32-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00] +0x05,0x08,0x0e,0xd7,0x01,0x05,0x02,0x00 + +# W32-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_sub_nc_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_sub_nc_i16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00] +0x05,0x10,0x0e,0xd7,0xff,0xff,0x03,0x00 + +# W32-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x0e,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # GFX12: v_sub_nc_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x25,0xd7,0x01,0x05,0x02,0x00] @@ -5887,49 +6076,112 @@ # GFX12: v_sub_nc_i32 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0x80,0x25,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX12: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_sub_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x04,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x04,0xd7,0x01,0x04,0x00,0x00 -# GFX12: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00] 0x05,0x00,0x04,0xd7,0x69,0xd2,0x00,0x00 -# GFX12: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00] 0x05,0x00,0x04,0xd7,0x6a,0xf6,0x00,0x00 -# GFX12: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x04,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX12: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00] 0x05,0x00,0x04,0xd7,0x7b,0xfa,0x01,0x00 -# GFX12: v_sub_nc_u16 v5, m0, 0x3800 +# W32-REAL16: v_sub_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, m0, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x7d,0xfe,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x04,0xd7,0x7d,0xe0,0x01,0x00 -# GFX12: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00] 0x05,0x00,0x04,0xd7,0x7e,0x82,0x01,0x00 -# GFX12: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, exec_hi, null ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00] 0x05,0x00,0x04,0xd7,0x7f,0xf8,0x00,0x00 -# GFX12: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00] 0x05,0x00,0x04,0xd7,0x7c,0xfc,0x00,0x00 -# GFX12: v_sub_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.h, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, -1, exec_hi op_sel:[1,1,1] ; encoding: [0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00] 0x05,0x58,0x04,0xd7,0xc1,0xfe,0x00,0x00 -# GFX12: v_sub_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] +# W32-REAL16: v_sub_nc_u16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, 0x3800, m0 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] 0x05,0x08,0x04,0xd7,0xf0,0xfa,0x00,0x00 -# GFX12: v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00] 0x05,0x10,0x04,0xd7,0xfd,0xd4,0x00,0x00 -# GFX12: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 + +# W32-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00] +0x05,0x08,0x04,0xd7,0x01,0x05,0x02,0x00 + +# W32-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_sub_nc_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_sub_nc_u16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00] +0x05,0x10,0x04,0xd7,0xff,0xff,0x03,0x00 + +# W32-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_sub_nc_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 # W32: v_subrev_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index 65cfdd5ef7de0..5081b9811e43e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -4115,88 +4115,268 @@ # W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -5000,88 +5180,268 @@ # GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] 0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 # GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index 4640b967cbc07..77f05027d1cfd 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -2393,34 +2393,160 @@ # W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x08,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x10,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0xc0,0x0d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x58,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x08,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x10,0x03,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -3113,34 +3239,160 @@ # GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0xff,0x13,0x11,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x58,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x08,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x10,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0xc0,0x0e,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x58,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x08,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x10,0x04,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xc0,0x04,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] ; encoding: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92]