diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 41a03bb1e73c9..5461c645e608f 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -104,14 +104,14 @@ class VOP1_DPP_Pseudo pattern=[]> : VOP_DPP_Pseudo { } -class getVOP1Pat64 : LetDummies { +class getVOP1Pat : LetDummies { list ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))], !if(P.HasOMod, [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, i1:$clamp, i32:$omod))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] + [(set P.DstVT:$vdst, (node P.Src0RC32:$src0))] ) ); } @@ -233,35 +233,18 @@ let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; } // End isMoveImm = 1 -// FIXME: Specify SchedRW for READFIRSTLANE_B32 -// TODO: Make profile for this, there is VOP3 encoding also -def V_READFIRSTLANE_B32 : - InstSI <(outs SReg_32:$vdst), - (ins VRegOrLdsSrc_32:$src0), - "v_readfirstlane_b32 $vdst, $src0", - [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLdsSrc_32:$src0)))]>, - Enc32 { - - let isCodeGenOnly = 0; - let UseNamedOperandTable = 1; - - let Size = 4; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; +def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped, untyped]> { + let DstRC = RegisterOperand; + let Src0RC32 = VRegOrLdsSrc_32; + let Asm32 = " $vdst, $src0"; +} - let VOP1 = 1; - let VALU = 1; - let Uses = [EXEC]; +// FIXME: Specify SchedRW for READFIRSTLANE_B32 +// TODO: There is VOP3 encoding also +def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32", VOP_READFIRSTLANE, + getVOP1Pat.ret, 1> { let isConvergent = 1; - - bits<8> vdst; - bits<9> src0; - - let Inst{8-0} = src0; - let Inst{16-9} = 0x2; - let Inst{24-17} = vdst; - let Inst{31-25} = 0x3f; //encoding } let isReMaterializable = 1 in { @@ -726,8 +709,8 @@ def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1 let SubtargetPredicate = isGFX11Plus in { // Restrict src0 to be VGPR def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, - getVOP1Pat64.ret, + getVOP1Pat.ret, /*VOP1Only=*/ 1>; defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16>; defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>; @@ -1109,6 +1092,11 @@ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast(NAME#"_e64").Pfl>; } + multiclass VOP1Only_Real_gfx6_gfx7 op> { + def _gfx6_gfx7 : + VOP1_Real(NAME), SIEncodingFamily.SI>, + VOP1e(NAME).Pfl>; + } } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" multiclass VOP1_Real_gfx6_gfx7 op> : @@ -1125,6 +1113,9 @@ multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 op> : VOP1_Real_gfx6_gfx7_gfx10, VOP1_Real_NO_DPP, VOP1_Real_NO_DPP; +multiclass VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12 op> : + VOP1Only_Real_gfx6_gfx7, VOP1Only_Real_gfx10_gfx11_gfx12; + defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; @@ -1135,6 +1126,7 @@ defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x000>; defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x001>; +defm V_READFIRSTLANE_B32 : VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>; defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x003>; defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x004>; defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x005>; @@ -1238,6 +1230,7 @@ multiclass VOP1_Real_vi op> { defm V_NOP : VOP1_Real_vi <0x0>; defm V_MOV_B32 : VOP1_Real_vi <0x1>; +defm V_READFIRSTLANE_B32 : VOP1Only_Real_vi <0x2>; defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>;