Skip to content

Commit 67a22cc

Browse files
committed
Added neg and abs modifier for src2
Fixed review comments
1 parent f33f712 commit 67a22cc

File tree

19 files changed

+1096
-174
lines changed

19 files changed

+1096
-174
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -438,8 +438,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-conversion-
438438
//===----------------------------------------------------------------------===//
439439
// GFX950 only builtins.
440440
//===----------------------------------------------------------------------===//
441-
TARGET_BUILTIN(__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4, "V4fV8ZiV8ZiV4fIiIiIiiIii", "nc", "gfx950-insts")
442-
TARGET_BUILTIN(__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4, "V16fV8ZiV8ZiV16fIiIiIiiIii", "nc", "gfx950-insts")
441+
TARGET_BUILTIN(__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4, "V4fV8ZiV8ZiV4fIiIiIbIbIiiIii", "nc", "gfx950-insts")
442+
TARGET_BUILTIN(__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4, "V16fV8ZiV8ZiV16fIiIiIbIbIiiIii", "nc", "gfx950-insts")
443443

444444
TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_f16, "V4fV8hV8hV4fIiIiIi", "nc", "gfx950-insts")
445445
TARGET_BUILTIN(__builtin_amdgcn_mfma_f32_16x16x32_bf16, "V4fV8yV8yV4fIiIiIi", "nc", "gfx950-insts")

clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -435,18 +435,18 @@ v16f test_mfma_f32_32x32x16_bf16(v8bf16 a, v8bf16 b, v16f c) {
435435

436436
// CHECK-GFX950-LABEL: @test_mfma_scale_f32_16x16x128_f8f6f4
437437
// CHECK-GFX950: [[EXTRACT_A:%.+]] = shufflevector <8 x i32> %a, <8 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
438-
// CHECK-GFX950: call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v6i32.v8i32(<6 x i32> [[EXTRACT_A]], <8 x i32> %b, <4 x float> %c, i32 3, i32 1, i32 2, i32 %scale_a, i32 3, i32 %scale_b)
438+
// CHECK-GFX950: call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v6i32.v8i32(<6 x i32> [[EXTRACT_A]], <8 x i32> %b, <4 x float> %c, i32 3, i32 1, i1 false, i1 false, i32 2, i32 %scale_a, i32 3, i32 %scale_b)
439439
void test_mfma_scale_f32_16x16x128_f8f6f4(global v4f* out, v8i a, v8i b, v4f c, int scale_a, int scale_b)
440440
{
441-
*out = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a, b, c, 3, 1, 2, scale_a, 3, scale_b);
441+
*out = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a, b, c, 3, 1, false, false, 2, scale_a, 3, scale_b);
442442
}
443443

444444
// CHECK-GFX950-LABEL: @test_mfma_scale_f32_32x32x64_f8f6f4
445445
// CHECK-GFX950: [[EXTRACT_A:%.+]] = shufflevector <8 x i32> %a, <8 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
446-
// CHECK-GFX950: call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v6i32.v8i32(<6 x i32> [[EXTRACT_A]], <8 x i32> %b, <16 x float> %c, i32 3, i32 1, i32 2, i32 %scale_a, i32 3, i32 %scale_b)
446+
// CHECK-GFX950: call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v6i32.v8i32(<6 x i32> [[EXTRACT_A]], <8 x i32> %b, <16 x float> %c, i32 3, i32 1, i1 false, i1 false, i32 2, i32 %scale_a, i32 3, i32 %scale_b)
447447
void test_mfma_scale_f32_32x32x64_f8f6f4(global v16f* out, v8i a, v8i b, v16f c, int scale_a, int scale_b)
448448
{
449-
*out = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a, b, c, 3, 1, 2, scale_a, 3, scale_b);
449+
*out = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a, b, c, 3, 1, false, false, 2, scale_a, 3, scale_b);
450450
}
451451

452452
// CHECK-GFX950-LABEL: @test_mfma_i32_16x16x64_i8(

clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950-param.cl

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,21 @@ void test_mfma_f32_32x32x16_bf16(__global float16* out, bfloat8 a, bfloat8 b, fl
3838
}
3939

4040
void test_mfma_scale_f32_16x16x128_f8f6f4(__global float4* out, int8 a, int8 b, float4 c, int X, int Y) {
41-
*out = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a, b, c, X, 0, 1, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' must be a constant integer}}
42-
*out = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a, b, c, 0, X, 1, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' must be a constant integer}}
43-
*out = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a, b, c, 0, 0, X, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' must be a constant integer}}
44-
*out = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a, b, c, 0, 0, 0, Y, X, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' must be a constant integer}}
41+
*out = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a, b, c, X, 0, false, false, 1, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' must be a constant integer}}
42+
*out = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a, b, c, 0, X, false, false, 1, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' must be a constant integer}}
43+
*out = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a, b, c, 0, 0, X, false, 1, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' must be a constant integer}}
44+
*out = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a, b, c, 0, 0, false, X, 1, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' must be a constant integer}}
45+
*out = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a, b, c, 0, 0, false, false, X, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' must be a constant integer}}
46+
*out = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a, b, c, 0, 0, false, false, 0, Y, X, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' must be a constant integer}}
4547
}
4648

4749
void test_mfma_scale_f32_32x32x64_f8f6f4(__global float16* out, int8 a, int8 b, float16 c, int X, int Y) {
48-
*out = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a, b, c, X, 0, 1, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' must be a constant integer}}
49-
*out = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a, b, c, 0, X, 1, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' must be a constant integer}}
50-
*out = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a, b, c, 0, 0, X, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' must be a constant integer}}
51-
*out = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a, b, c, 0, 0, 0, Y, X, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' must be a constant integer}}
50+
*out = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a, b, c, X, 0, false, false, 1, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' must be a constant integer}}
51+
*out = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a, b, c, 0, X, false, false, 1, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' must be a constant integer}}
52+
*out = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a, b, c, 0, 0, X, false, 1, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' must be a constant integer}}
53+
*out = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a, b, c, 0, 0, false, X, 1, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' must be a constant integer}}
54+
*out = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a, b, c, 0, 0, false, false, X, Y, 2, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' must be a constant integer}}
55+
*out = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a, b, c, 0, 0, false, false, 0, Y, X, Y); // expected-error{{argument to '__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' must be a constant integer}}
5256
}
5357

5458
void test_mfma_i32_16x16x64_i8(__global int4* out, int4 a, int4 b, int4 c, int X) {

clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ void test(__global float4* out0, half8 a0, half8 b0, float4 c0,
5050
*out13 = __builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_bf8_fp8' needs target feature gfx950-insts}}
5151
*out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_bf8' needs target feature gfx950-insts}}
5252
*out13 = __builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8(a13, b13, c13, 0, 0, 0); // expected-error{{'__builtin_amdgcn_smfmac_f32_32x32x64_fp8_fp8' needs target feature gfx950-insts}}
53-
*out14 = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a14, b14, c14, 0, 0, 0, d14, 0, e14); // expected-error{{'__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' needs target feature gfx950-insts}}
54-
*out15 = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a15, b15, c15, 0, 0, 0, d15, 0, e15); // expected-error{{'__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' needs target feature gfx950-insts}}
53+
*out14 = __builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4(a14, b14, c14, 0, 0, false, false, 0, d14, 0, e14); // expected-error{{'__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4' needs target feature gfx950-insts}}
54+
*out15 = __builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4(a15, b15, c15, 0, 0, false, false, 0, d15, 0, e15); // expected-error{{'__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4' needs target feature gfx950-insts}}
5555
*out16 = __builtin_amdgcn_permlane16_swap(a16, b16, false, false); // expected-error{{'__builtin_amdgcn_permlane16_swap' needs target feature permlane16-swap}}
5656
*out16 = __builtin_amdgcn_permlane32_swap(a16, b16, false, false); // expected-error{{'__builtin_amdgcn_permlane32_swap' needs target feature permlane32-swap}}
5757
*out17 = __builtin_amdgcn_cvt_scalef32_sr_bf8_bf16(*out17, a17, b17, c17, 0); // expected-error{{'__builtin_amdgcn_cvt_scalef32_sr_bf8_bf16' needs target feature bf8-cvt-scale-insts}}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3239,17 +3239,17 @@ class AMDGPUMfmaScaleIntrinsic<LLVMType DestTy> :
32393239
[llvm_anyvector_ty, llvm_anyvector_ty, DestTy,
32403240
llvm_i32_ty, // cbsz
32413241
llvm_i32_ty, // blgp
3242-
// llvm_i1_ty, // TODO: neg_src2
3243-
// llvm_i1_ty, // TODO: abs_src2
3244-
// llvm_i1_ty, // TODO: clamp
3242+
llvm_i1_ty, // neg_src2
3243+
llvm_i1_ty, // abs_src2
32453244
llvm_i32_ty, // op_sel (A matrix scale, 2-bits) // TODO: Make i2?
32463245
llvm_i32_ty, // v_mfma_ld_scale_b32 src0 (A matrix scale)
32473246
llvm_i32_ty, // op_sel (B matrix scale, 2-bits) // TODO: Make i2?
32483247
llvm_i32_ty // v_mfma_ld_scale_b32 src1 (B matrix scale)
32493248
],
32503249
[IntrConvergent, IntrNoMem,
32513250
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>,
3252-
ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<7>>
3251+
ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>,
3252+
ImmArg<ArgIndex<7>>, ImmArg<ArgIndex<9>>
32533253
]>;
32543254

32553255
defset list<Intrinsic> AMDGPUMFMAIntrinsics908 = {

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4890,8 +4890,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
48904890
? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
48914891
: getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
48924892

4893-
OpdsMapping[8] = getVGPROpMapping(MI.getOperand(8).getReg(), MRI, *TRI);
48944893
OpdsMapping[10] = getVGPROpMapping(MI.getOperand(10).getReg(), MRI, *TRI);
4894+
OpdsMapping[12] = getVGPROpMapping(MI.getOperand(12).getReg(), MRI, *TRI);
48954895
break;
48964896
}
48974897
case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
152152
ImmTyOpSelHi,
153153
ImmTyNegLo,
154154
ImmTyNegHi,
155+
ImmTyNegLoSrc2,
156+
ImmTyNegHiSrc2,
155157
ImmTyIndexKey8bit,
156158
ImmTyIndexKey16bit,
157159
ImmTyDPP8,
@@ -416,6 +418,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
416418
bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
417419
bool isNegLo() const { return isImmTy(ImmTyNegLo); }
418420
bool isNegHi() const { return isImmTy(ImmTyNegHi); }
421+
bool isNegHiSrc2() const { return isImmTy(ImmTyNegHiSrc2); }
422+
bool isNegLoSrc2() const { return isImmTy(ImmTyNegLoSrc2); }
419423
bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
420424

421425
bool isRegOrImm() const {
@@ -1138,6 +1142,8 @@ class AMDGPUOperand : public MCParsedAsmOperand {
11381142
case ImmTyHigh: OS << "High"; break;
11391143
case ImmTyBLGP: OS << "BLGP"; break;
11401144
case ImmTyCBSZ: OS << "CBSZ"; break;
1145+
case ImmTyNegLoSrc2: OS << "NegSrc2"; break;
1146+
case ImmTyNegHiSrc2: OS << "AbsSrc2"; break;
11411147
case ImmTyABID: OS << "ABID"; break;
11421148
case ImmTyEndpgm: OS << "Endpgm"; break;
11431149
case ImmTyWaitVDST: OS << "WaitVDST"; break;
@@ -1632,7 +1638,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
16321638
ParseStatus parseOperandArrayWithPrefix(
16331639
const char *Prefix, OperandVector &Operands,
16341640
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1635-
bool (*ConvertResult)(int64_t &) = nullptr);
1641+
std::function<bool(int64_t &)> ConvertResult = nullptr);
16361642

16371643
ParseStatus
16381644
parseNamedBit(StringRef Name, OperandVector &Operands,
@@ -1687,6 +1693,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
16871693
ParseStatus parseFlatOffset(OperandVector &Operands);
16881694
ParseStatus parseR128A16(OperandVector &Operands);
16891695
ParseStatus parseBLGP(OperandVector &Operands);
1696+
ParseStatus parseNegHiSrc2(OperandVector &Operands);
1697+
ParseStatus parseNegLoSrc2(OperandVector &Operands);
16901698
bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
16911699
bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
16921700

@@ -6560,15 +6568,15 @@ ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
65606568

65616569
ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
65626570
const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6563-
bool (*ConvertResult)(int64_t &)) {
6571+
std::function<bool(int64_t &)> ConvertResult) {
65646572
SMLoc S = getLoc();
65656573
if (!trySkipId(Prefix, AsmToken::Colon))
65666574
return ParseStatus::NoMatch;
65676575

65686576
if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
65696577
return ParseStatus::Failure;
65706578

6571-
unsigned Val = 0;
6579+
int64_t Val = 0;
65726580
const unsigned MaxSize = 4;
65736581

65746582
// FIXME: How to verify the number of elements matches the number of src
@@ -6593,7 +6601,9 @@ ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
65936601
if (!skipToken(AsmToken::Comma, "expected a comma"))
65946602
return ParseStatus::Failure;
65956603
}
6596-
6604+
if (ConvertResult && !ConvertResult(Val)) {
6605+
Error(S, "invalid " + StringRef(Prefix) + " value.");
6606+
}
65976607
Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
65986608
return ParseStatus::Success;
65996609
}
@@ -7163,6 +7173,23 @@ ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
71637173
return Res;
71647174
}
71657175

7176+
static bool RightShift2Bits(int64_t &Neg) {
7177+
Neg >>= 2;
7178+
return true;
7179+
}
7180+
7181+
ParseStatus AMDGPUAsmParser::parseNegLoSrc2(OperandVector &Operands) {
7182+
return parseOperandArrayWithPrefix(
7183+
"neg_lo", Operands, AMDGPUOperand::ImmTyNegLoSrc2,
7184+
RightShift2Bits); // Extracting only neg_lo[2]
7185+
}
7186+
7187+
ParseStatus AMDGPUAsmParser::parseNegHiSrc2(OperandVector &Operands) {
7188+
return parseOperandArrayWithPrefix(
7189+
"neg_hi", Operands, AMDGPUOperand::ImmTyNegHiSrc2,
7190+
RightShift2Bits); // Extracting only neg_hi[2]
7191+
}
7192+
71667193
//===----------------------------------------------------------------------===//
71677194
// Exp
71687195
//===----------------------------------------------------------------------===//
@@ -8863,6 +8890,12 @@ void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
88638890
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyBLGP,
88648891
0, InsertPos);
88658892

8893+
// add neg and abs for src2
8894+
addOptionalImmOperand(Inst, Operands, OptionalIdx,
8895+
AMDGPUOperand::ImmTyNegLoSrc2, 0);
8896+
addOptionalImmOperand(Inst, Operands, OptionalIdx,
8897+
AMDGPUOperand::ImmTyNegHiSrc2, 0);
8898+
88668899
// Add dummy src_modifiers
88678900
Inst.addOperand(MCOperand::createImm(0));
88688901
Inst.addOperand(MCOperand::createImm(0));
@@ -8886,9 +8919,9 @@ void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
88868919
for (unsigned J = 0; J < 2; ++J) {
88878920
unsigned ModVal = 0;
88888921
if (OpSel & (1 << J))
8889-
ModVal |= SISrcMods::OP_SEL_0;
8922+
ModVal |= SISrcMods::OP_SEL_0; // 3rd bit is from opsel
88908923
if (OpSelHi & (1 << J))
8891-
ModVal |= SISrcMods::OP_SEL_1;
8924+
ModVal |= SISrcMods::OP_SEL_1; // 4th bit is from opsel_hi
88928925

88938926
const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
88948927
Inst.getOperand(ModIdx).setImm(ModVal);

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,6 +1300,20 @@ void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
13001300
printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
13011301
}
13021302

1303+
void AMDGPUInstPrinter::printNegLoSrc2(const MCInst *MI, unsigned OpNo,
1304+
const MCSubtargetInfo &STI,
1305+
raw_ostream &O) {
1306+
if (unsigned NegLo = !!(MI->getOperand(OpNo).getImm()))
1307+
O << " neg_lo:[0,0," << NegLo << ']';
1308+
}
1309+
1310+
void AMDGPUInstPrinter::printNegHiSrc2(const MCInst *MI, unsigned OpNo,
1311+
const MCSubtargetInfo &STI,
1312+
raw_ostream &O) {
1313+
if (unsigned NegHi = !!(MI->getOperand(OpNo).getImm()))
1314+
O << " neg_hi:[0,0," << NegHi << ']';
1315+
}
1316+
13031317
void AMDGPUInstPrinter::printIndexKey8bit(const MCInst *MI, unsigned OpNo,
13041318
const MCSubtargetInfo &STI,
13051319
raw_ostream &O) {

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ class MCInstrDesc;
1919

2020
class AMDGPUInstPrinter : public MCInstPrinter {
2121
public:
22-
AMDGPUInstPrinter(const MCAsmInfo &MAI,
23-
const MCInstrInfo &MII, const MCRegisterInfo &MRI)
24-
: MCInstPrinter(MAI, MII, MRI) {}
22+
AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
23+
const MCRegisterInfo &MRI)
24+
: MCInstPrinter(MAI, MII, MRI) {}
2525

2626
// Autogenerated by tblgen
2727
std::pair<const char *, uint64_t>
@@ -50,7 +50,7 @@ class AMDGPUInstPrinter : public MCInstPrinter {
5050
raw_ostream &O);
5151

5252
void printSMRDOffset8(const MCInst *MI, unsigned OpNo,
53-
const MCSubtargetInfo &STI, raw_ostream &O);
53+
const MCSubtargetInfo &STI, raw_ostream &O);
5454
void printSMEMOffset(const MCInst *MI, unsigned OpNo,
5555
const MCSubtargetInfo &STI, raw_ostream &O);
5656
void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
@@ -62,7 +62,7 @@ class AMDGPUInstPrinter : public MCInstPrinter {
6262
void printDim(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
6363
raw_ostream &O);
6464
void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
65-
raw_ostream &O);
65+
raw_ostream &O);
6666
void printFORMAT(const MCInst *MI, unsigned OpNo,
6767
const MCSubtargetInfo &STI, raw_ostream &O);
6868
void printSymbolicFormat(const MCInst *MI,
@@ -126,6 +126,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
126126
const MCSubtargetInfo &STI, raw_ostream &O);
127127
void printNegHi(const MCInst *MI, unsigned OpNo,
128128
const MCSubtargetInfo &STI, raw_ostream &O);
129+
void printNegLoSrc2(const MCInst *MI, unsigned OpNo,
130+
const MCSubtargetInfo &STI, raw_ostream &O);
131+
void printNegHiSrc2(const MCInst *MI, unsigned OpNo,
132+
const MCSubtargetInfo &STI, raw_ostream &O);
129133
void printIndexKey8bit(const MCInst *MI, unsigned OpNo,
130134
const MCSubtargetInfo &STI, raw_ostream &O);
131135
void printIndexKey16bit(const MCInst *MI, unsigned OpNo,

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
310310
class UnscaledMFMAOptimizationPat<SDPatternOperator intrin> : PatFrag<
311311
(ops node:$srca, node:$srcb, node:$srcc,
312312
node:$cbsz, node:$blgp),
313-
(intrin $srca, $srcb, $srcc, $cbsz, $blgp,
313+
(intrin $srca, $srcb, $srcc, $cbsz, $blgp, 0, 0,
314314
srcvalue, 0, srcvalue, 0)
315315
>;
316316

@@ -1244,6 +1244,9 @@ def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">;
12441244
def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">;
12451245
def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">;
12461246

1247+
def neg_lo_src2 : CustomOperand<i1, 1, "NegLoSrc2">;
1248+
def neg_hi_src2 : CustomOperand<i1, 1, "NegHiSrc2">;
1249+
12471250
def IndexKey16bit : CustomOperand<i32, 1>;
12481251
def IndexKey8bit : CustomOperand<i32, 1>;
12491252

0 commit comments

Comments
 (0)