Skip to content

Commit 967b64b

Browse files
committed
[AMDGPU] Split dot2-insts feature
Split out some of the instructions predicated on the dot2-insts target feature into a new dot7-insts, in preparation for subtargets that have some but not all of these instructions. NFCI. Differential Revision: https://reviews.llvm.org/D98717
1 parent 6718ce4 commit 967b64b

File tree

9 files changed

+67
-28
lines changed

9 files changed

+67
-28
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,13 +193,13 @@ TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")
193193
// Deep learning builtins.
194194
//===----------------------------------------------------------------------===//
195195

196-
TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dot2-insts")
196+
TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dot7-insts")
197197
TARGET_BUILTIN(__builtin_amdgcn_sdot2, "SiV2SsV2SsSiIb", "nc", "dot2-insts")
198198
TARGET_BUILTIN(__builtin_amdgcn_udot2, "UiV2UsV2UsUiIb", "nc", "dot2-insts")
199199
TARGET_BUILTIN(__builtin_amdgcn_sdot4, "SiSiSiSiIb", "nc", "dot1-insts")
200-
TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUiIb", "nc", "dot2-insts")
200+
TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUiIb", "nc", "dot7-insts")
201201
TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSiIb", "nc", "dot1-insts")
202-
TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot2-insts")
202+
TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot7-insts")
203203

204204
//===----------------------------------------------------------------------===//
205205
// GFX10+ only builtins.

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
183183
Features["dot2-insts"] = true;
184184
Features["dot5-insts"] = true;
185185
Features["dot6-insts"] = true;
186+
Features["dot7-insts"] = true;
186187
Features["dl-insts"] = true;
187188
Features["flat-address-space"] = true;
188189
Features["16-bit-insts"] = true;
@@ -200,6 +201,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
200201
Features["dot2-insts"] = true;
201202
Features["dot5-insts"] = true;
202203
Features["dot6-insts"] = true;
204+
Features["dot7-insts"] = true;
203205
LLVM_FALLTHROUGH;
204206
case GK_GFX1010:
205207
Features["dl-insts"] = true;
@@ -227,6 +229,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
227229
Features["dl-insts"] = true;
228230
Features["dot1-insts"] = true;
229231
Features["dot2-insts"] = true;
232+
Features["dot7-insts"] = true;
230233
LLVM_FALLTHROUGH;
231234
case GK_GFX90C:
232235
case GK_GFX909:

clang/test/CodeGenOpenCL/amdgpu-features.cl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,17 @@
5050
// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
5151
// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
5252
// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
53-
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
54-
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
53+
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
54+
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
5555
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
56-
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
56+
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
5757
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
5858
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
59-
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
60-
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
61-
// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
62-
// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
63-
// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
64-
// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
59+
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
60+
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
61+
// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
62+
// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
63+
// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
64+
// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
6565

6666
kernel void test() {}

clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ kernel void builtins_amdgcn_dl_insts_err(
1313
half2 v2hA, half2 v2hB, float fC,
1414
short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
1515
ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) {
16-
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}}
17-
fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}}
16+
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot7-insts}}
17+
fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot7-insts}}
1818

1919
siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
2020
siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
@@ -25,12 +25,12 @@ kernel void builtins_amdgcn_dl_insts_err(
2525
siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}}
2626
siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}}
2727

28-
uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}}
29-
uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}}
28+
uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot7-insts}}
29+
uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot7-insts}}
3030

3131
siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}}
3232
siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}}
3333

34-
uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}}
35-
uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}}
34+
uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot7-insts}}
35+
uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot7-insts}}
3636
}

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ def FeatureDot1Insts : SubtargetFeature<"dot1-insts",
480480
def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
481481
"HasDot2Insts",
482482
"true",
483-
"Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
483+
"Has v_dot2_i32_i16, v_dot2_u32_u16 instructions"
484484
>;
485485

486486
def FeatureDot3Insts : SubtargetFeature<"dot3-insts",
@@ -507,6 +507,12 @@ def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
507507
"Has v_dot4c_i32_i8 instruction"
508508
>;
509509

510+
def FeatureDot7Insts : SubtargetFeature<"dot7-insts",
511+
"HasDot7Insts",
512+
"true",
513+
"Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
514+
>;
515+
510516
def FeatureMAIInsts : SubtargetFeature<"mai-insts",
511517
"HasMAIInsts",
512518
"true",
@@ -902,6 +908,7 @@ def FeatureISAVersion9_0_6 : FeatureSet<
902908
FeatureDLInsts,
903909
FeatureDot1Insts,
904910
FeatureDot2Insts,
911+
FeatureDot7Insts,
905912
FeatureSupportsSRAMECC,
906913
FeatureImageGather4D16Bug]>;
907914

@@ -920,6 +927,7 @@ def FeatureISAVersion9_0_8 : FeatureSet<
920927
FeatureDot4Insts,
921928
FeatureDot5Insts,
922929
FeatureDot6Insts,
930+
FeatureDot7Insts,
923931
FeatureMAIInsts,
924932
FeaturePkFmacF16Inst,
925933
FeatureAtomicFaddInsts,
@@ -948,6 +956,7 @@ def FeatureISAVersion9_0_A : FeatureSet<
948956
FeatureDot4Insts,
949957
FeatureDot5Insts,
950958
FeatureDot6Insts,
959+
FeatureDot7Insts,
951960
Feature64BitDPP,
952961
FeaturePackedFP32Ops,
953962
FeatureMAIInsts,
@@ -1008,6 +1017,7 @@ def FeatureISAVersion10_1_1 : FeatureSet<
10081017
FeatureDot2Insts,
10091018
FeatureDot5Insts,
10101019
FeatureDot6Insts,
1020+
FeatureDot7Insts,
10111021
FeatureNSAEncoding,
10121022
FeatureWavefrontSize32,
10131023
FeatureScalarStores,
@@ -1028,6 +1038,7 @@ def FeatureISAVersion10_1_2 : FeatureSet<
10281038
FeatureDot2Insts,
10291039
FeatureDot5Insts,
10301040
FeatureDot6Insts,
1041+
FeatureDot7Insts,
10311042
FeatureNSAEncoding,
10321043
FeatureWavefrontSize32,
10331044
FeatureScalarStores,
@@ -1049,6 +1060,7 @@ def FeatureISAVersion10_3_0 : FeatureSet<
10491060
FeatureDot2Insts,
10501061
FeatureDot5Insts,
10511062
FeatureDot6Insts,
1063+
FeatureDot7Insts,
10521064
FeatureNSAEncoding,
10531065
FeatureWavefrontSize32,
10541066
FeatureShaderCyclesRegister]>;
@@ -1373,6 +1385,9 @@ def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">,
13731385
def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
13741386
AssemblerPredicate<(all_of FeatureDot6Insts)>;
13751387

1388+
def HasDot7Insts : Predicate<"Subtarget->hasDot7Insts()">,
1389+
AssemblerPredicate<(all_of FeatureDot7Insts)>;
1390+
13761391
def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">,
13771392
AssemblerPredicate<(all_of FeatureGetWaveIdInst)>;
13781393

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
267267
HasDot4Insts(false),
268268
HasDot5Insts(false),
269269
HasDot6Insts(false),
270+
HasDot7Insts(false),
270271
HasMAIInsts(false),
271272
HasPkFmacF16Inst(false),
272273
HasAtomicFaddInsts(false),

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
150150
bool HasDot4Insts;
151151
bool HasDot5Insts;
152152
bool HasDot6Insts;
153+
bool HasDot7Insts;
153154
bool HasMAIInsts;
154155
bool HasPkFmacF16Inst;
155156
bool HasAtomicFaddInsts;
@@ -687,6 +688,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
687688
return HasDot6Insts;
688689
}
689690

691+
bool hasDot7Insts() const {
692+
return HasDot7Insts;
693+
}
694+
690695
bool hasMAIInsts() const {
691696
return HasMAIInsts;
692697
}

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10486,7 +10486,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
1048610486
EVT VT = N->getValueType(0);
1048710487
SDLoc SL(N);
1048810488

10489-
if (!Subtarget->hasDot2Insts() || VT != MVT::f32)
10489+
if (!Subtarget->hasDot7Insts() || VT != MVT::f32)
1049010490
return SDValue();
1049110491

1049210492
// FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -287,19 +287,24 @@ class SDot2Pat<Instruction Inst> : GCNPat <
287287
let IsDOT = 1 in {
288288
let SubtargetPredicate = HasDot2Insts in {
289289

290-
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
291-
VOP3_Profile<VOP_F32_V2F16_V2F16_F32>,
292-
AMDGPUfdot2, 1/*ExplicitClamp*/>;
293290
def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16",
294291
VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2, 1>;
295292
def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16",
296293
VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2, 1>;
294+
295+
} // End SubtargetPredicate = HasDot2Insts
296+
297+
let SubtargetPredicate = HasDot7Insts in {
298+
299+
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
300+
VOP3_Profile<VOP_F32_V2F16_V2F16_F32>,
301+
AMDGPUfdot2, 1/*ExplicitClamp*/>;
297302
def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
298303
VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
299304
def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
300305
VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
301306

302-
} // End SubtargetPredicate = HasDot2Insts
307+
} // End SubtargetPredicate = HasDot7Insts
303308

304309
let SubtargetPredicate = HasDot1Insts in {
305310

@@ -564,13 +569,18 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>;
564569

565570
let SubtargetPredicate = HasDot2Insts in {
566571

567-
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
568572
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
569573
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
574+
575+
} // End SubtargetPredicate = HasDot2Insts
576+
577+
let SubtargetPredicate = HasDot7Insts in {
578+
579+
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
570580
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
571581
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
572582

573-
} // End SubtargetPredicate = HasDot2Insts
583+
} // End SubtargetPredicate = HasDot7Insts
574584

575585
let SubtargetPredicate = HasDot1Insts in {
576586

@@ -657,13 +667,18 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x22>;
657667

658668
let SubtargetPredicate = HasDot2Insts in {
659669

660-
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>;
661670
defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
662671
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
672+
673+
} // End SubtargetPredicate = HasDot2Insts
674+
675+
let SubtargetPredicate = HasDot7Insts in {
676+
677+
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>;
663678
defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x17>;
664679
defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x19>;
665680

666-
} // End SubtargetPredicate = HasDot2Insts
681+
} // End SubtargetPredicate = HasDot7Insts
667682

668683
let SubtargetPredicate = HasDot1Insts in {
669684

0 commit comments

Comments
 (0)