Skip to content

Commit f7392f4

Browse files
authored
[AArch64] Add intrinsics for bfloat16 min/max/minnm/maxnm (#90105)
According to specifications in [ARM-software/acle/pull/309](ARM-software/acle#309) Add following intrinsics: ``` // svmax single,multi svbfloat16x2_t svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) svbfloat16x4_t svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) svbfloat16x2_t svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) svbfloat16x4_t svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) ``` ``` // svmin single,multi svbfloat16x2_t svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) svbfloat16x4_t svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) svbfloat16x2_t svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) svbfloat16x4_t svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) ``` ``` // svmaxnm single,multi svbfloat16x2_t svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) svbfloat16x4_t svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) svbfloat16x2_t svmaxnm_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) svbfloat16x4_t svmaxnm_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) ``` ``` // svminnm single,multi svbfloat16x2_t svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) svbfloat16x4_t svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) svbfloat16x2_t svminnm_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) svbfloat16x4_t svminnm_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) ``` - Variations other than bfloat16 are already supported.
1 parent 44eded3 commit f7392f4

File tree

8 files changed

+875
-62
lines changed

8 files changed

+875
-62
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2095,7 +2095,7 @@ def SVFCLAMP_BF : SInst<"svclamp[_{d}]", "dddd", "b", MergeNone, "aarch64_sve_
20952095
multiclass MinMaxIntr<string i, string zm, string mul, string t> {
20962096
def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>;
20972097
def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>;
2098-
def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
2098+
def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "bhfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
20992099
}
21002100

21012101
let TargetGuard = "sme2" in {
@@ -2113,11 +2113,11 @@ let TargetGuard = "sme2" in {
21132113
}
21142114

21152115
multiclass SInstMinMaxByVector<string name> {
2116-
def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
2117-
def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
2116+
def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
2117+
def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
21182118

2119-
def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
2120-
def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
2119+
def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
2120+
def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "bhfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
21212121
}
21222122

21232123
let TargetGuard = "sme2" in {

clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c

Lines changed: 145 additions & 5 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c

Lines changed: 145 additions & 5 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c

Lines changed: 145 additions & 5 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c

Lines changed: 145 additions & 5 deletions
Large diffs are not rendered by default.

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,6 +1675,7 @@ static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
16751675
return 0;
16761676

16771677
EVT EltVT = VT.getVectorElementType();
1678+
unsigned Key = VT.getVectorMinNumElements();
16781679
switch (Kind) {
16791680
case SelectTypeKind::AnyType:
16801681
break;
@@ -1688,14 +1689,17 @@ static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
16881689
return 0;
16891690
break;
16901691
case SelectTypeKind::FP:
1691-
if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64)
1692+
if (EltVT == MVT::bf16)
1693+
Key = 16;
1694+
else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1695+
EltVT != MVT::f64)
16921696
return 0;
16931697
break;
16941698
}
16951699

16961700
unsigned Offset;
1697-
switch (VT.getVectorMinNumElements()) {
1698-
case 16: // 8-bit
1701+
switch (Key) {
1702+
case 16: // 8-bit or bf16
16991703
Offset = 0;
17001704
break;
17011705
case 8: // 16-bit
@@ -5482,8 +5486,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
54825486
case Intrinsic::aarch64_sve_fmax_single_x2:
54835487
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
54845488
Node->getValueType(0),
5485-
{0, AArch64::FMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_S,
5486-
AArch64::FMAX_VG2_2ZZ_D}))
5489+
{AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5490+
AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
54875491
SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
54885492
return;
54895493
case Intrinsic::aarch64_sve_smax_single_x4:
@@ -5503,8 +5507,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
55035507
case Intrinsic::aarch64_sve_fmax_single_x4:
55045508
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
55055509
Node->getValueType(0),
5506-
{0, AArch64::FMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_S,
5507-
AArch64::FMAX_VG4_4ZZ_D}))
5510+
{AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5511+
AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
55085512
SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
55095513
return;
55105514
case Intrinsic::aarch64_sve_smin_single_x2:
@@ -5524,8 +5528,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
55245528
case Intrinsic::aarch64_sve_fmin_single_x2:
55255529
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
55265530
Node->getValueType(0),
5527-
{0, AArch64::FMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_S,
5528-
AArch64::FMIN_VG2_2ZZ_D}))
5531+
{AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5532+
AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
55295533
SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
55305534
return;
55315535
case Intrinsic::aarch64_sve_smin_single_x4:
@@ -5545,8 +5549,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
55455549
case Intrinsic::aarch64_sve_fmin_single_x4:
55465550
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
55475551
Node->getValueType(0),
5548-
{0, AArch64::FMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_S,
5549-
AArch64::FMIN_VG4_4ZZ_D}))
5552+
{AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5553+
AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
55505554
SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
55515555
return;
55525556
case Intrinsic::aarch64_sve_smax_x2:
@@ -5566,8 +5570,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
55665570
case Intrinsic::aarch64_sve_fmax_x2:
55675571
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
55685572
Node->getValueType(0),
5569-
{0, AArch64::FMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_S,
5570-
AArch64::FMAX_VG2_2Z2Z_D}))
5573+
{AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5574+
AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
55715575
SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
55725576
return;
55735577
case Intrinsic::aarch64_sve_smax_x4:
@@ -5587,8 +5591,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
55875591
case Intrinsic::aarch64_sve_fmax_x4:
55885592
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
55895593
Node->getValueType(0),
5590-
{0, AArch64::FMAX_VG4_4Z4Z_H, AArch64::FMAX_VG4_4Z4Z_S,
5591-
AArch64::FMAX_VG4_4Z4Z_D}))
5594+
{AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5595+
AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
55925596
SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
55935597
return;
55945598
case Intrinsic::aarch64_sve_smin_x2:
@@ -5608,8 +5612,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
56085612
case Intrinsic::aarch64_sve_fmin_x2:
56095613
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
56105614
Node->getValueType(0),
5611-
{0, AArch64::FMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_S,
5612-
AArch64::FMIN_VG2_2Z2Z_D}))
5615+
{AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5616+
AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
56135617
SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
56145618
return;
56155619
case Intrinsic::aarch64_sve_smin_x4:
@@ -5629,64 +5633,64 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
56295633
case Intrinsic::aarch64_sve_fmin_x4:
56305634
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
56315635
Node->getValueType(0),
5632-
{0, AArch64::FMIN_VG4_4Z4Z_H, AArch64::FMIN_VG4_4Z4Z_S,
5633-
AArch64::FMIN_VG4_4Z4Z_D}))
5636+
{AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
5637+
AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
56345638
SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
56355639
return;
56365640
case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
56375641
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
56385642
Node->getValueType(0),
5639-
{0, AArch64::FMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_S,
5640-
AArch64::FMAXNM_VG2_2ZZ_D}))
5643+
{AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
5644+
AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
56415645
SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
56425646
return;
56435647
case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
56445648
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
56455649
Node->getValueType(0),
5646-
{0, AArch64::FMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_S,
5647-
AArch64::FMAXNM_VG4_4ZZ_D}))
5650+
{AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
5651+
AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
56485652
SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
56495653
return;
56505654
case Intrinsic::aarch64_sve_fminnm_single_x2:
56515655
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
56525656
Node->getValueType(0),
5653-
{0, AArch64::FMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_S,
5654-
AArch64::FMINNM_VG2_2ZZ_D}))
5657+
{AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
5658+
AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
56555659
SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
56565660
return;
56575661
case Intrinsic::aarch64_sve_fminnm_single_x4:
56585662
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
56595663
Node->getValueType(0),
5660-
{0, AArch64::FMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_S,
5661-
AArch64::FMINNM_VG4_4ZZ_D}))
5664+
{AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
5665+
AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
56625666
SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
56635667
return;
56645668
case Intrinsic::aarch64_sve_fmaxnm_x2:
56655669
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
56665670
Node->getValueType(0),
5667-
{0, AArch64::FMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_S,
5668-
AArch64::FMAXNM_VG2_2Z2Z_D}))
5671+
{AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
5672+
AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
56695673
SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
56705674
return;
56715675
case Intrinsic::aarch64_sve_fmaxnm_x4:
56725676
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
56735677
Node->getValueType(0),
5674-
{0, AArch64::FMAXNM_VG4_4Z4Z_H, AArch64::FMAXNM_VG4_4Z4Z_S,
5675-
AArch64::FMAXNM_VG4_4Z4Z_D}))
5678+
{AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
5679+
AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
56765680
SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
56775681
return;
56785682
case Intrinsic::aarch64_sve_fminnm_x2:
56795683
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
56805684
Node->getValueType(0),
5681-
{0, AArch64::FMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_S,
5682-
AArch64::FMINNM_VG2_2Z2Z_D}))
5685+
{AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
5686+
AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
56835687
SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
56845688
return;
56855689
case Intrinsic::aarch64_sve_fminnm_x4:
56865690
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
56875691
Node->getValueType(0),
5688-
{0, AArch64::FMINNM_VG4_4Z4Z_H, AArch64::FMINNM_VG4_4Z4Z_S,
5689-
AArch64::FMINNM_VG4_4Z4Z_D}))
5692+
{AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
5693+
AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
56905694
SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
56915695
return;
56925696
case Intrinsic::aarch64_sve_fcvtzs_x2:

0 commit comments

Comments
 (0)