diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index db10077895ee51..b7dafecf3ce16b 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -244,6 +244,7 @@ namespace clang { bool isAppendSVALL() const { return Flags & IsAppendSVALL; } bool isInsertOp1SVALL() const { return Flags & IsInsertOp1SVALL; } bool isGatherPrefetch() const { return Flags & IsGatherPrefetch; } + bool isReverseUSDOT() const { return Flags & ReverseUSDOT; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 4b77b05756376c..caf141532fcfff 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -68,6 +68,7 @@ // a: scalar of element type (splat to vector type) // R: scalar of 1/2 width element type (splat to vector type) // r: scalar of 1/4 width element type (splat to vector type) +// @: unsigned scalar of 1/4 width element type (splat to vector type) // e: 1/2 width unsigned elements, 2x element count // b: 1/4 width unsigned elements, 4x element count // h: 1/2 width elements, 2x element count @@ -196,6 +197,7 @@ def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the s def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches. def IsGatherPrefetch : FlagType<0x10000000>; def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped. +def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped. // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { @@ -1240,6 +1242,14 @@ let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_INT8)" in { def SVMLLA_S32 : SInst<"svmmla[_s32]", "ddqq","i", MergeNone, "aarch64_sve_smmla">; def SVMLLA_U32 : SInst<"svmmla[_u32]", "ddqq","Ui", MergeNone, "aarch64_sve_ummla">; def SVUSMLLA_S32 : SInst<"svusmmla[_s32]", "ddbq","i", MergeNone, "aarch64_sve_usmmla">; + +def SVUSDOT_S : SInst<"svusdot[_s32]", "ddbq", "i", MergeNone, "aarch64_sve_usdot">; +def SVUSDOT_N_S : SInst<"svusdot[_n_s32]", "ddbr", "i", MergeNone, "aarch64_sve_usdot">; +def SVSUDOT_S : SInst<"svsudot[_s32]", "ddqb", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT]>; +def SVSUDOT_N_S : SInst<"svsudot[_n_s32]", "ddq@", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT]>; + +def SVUSDOT_LANE_S : SInst<"svusdot_lane[_s32]", "ddbqi", "i", MergeNone, "aarch64_sve_usdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; +def SVSUDOT_LANE_S : SInst<"svsudot_lane[_s32]", "ddqbi", "i", MergeNone, "aarch64_sve_sudot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; } let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP32)" in { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 524924e366383d..b58f167d9ed24e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8040,6 +8040,9 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, if (TypeFlags.isReverseCompare()) std::swap(Ops[1], Ops[2]); + if (TypeFlags.isReverseUSDOT()) + std::swap(Ops[1], Ops[2]); + // Predicated intrinsics with _z suffix need a select w/ zeroinitializer. if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) { llvm::Type *OpndTy = Ops[1]->getType(); diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sudot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sudot.c new file mode 100644 index 00000000000000..d678851bad5868 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sudot.c @@ -0,0 +1,54 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_MATMUL_INT8 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_MATMUL_INT8 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svint32_t test_svsudot_s32(svint32_t x, svint8_t y, svuint8_t z) { + // CHECK-LABEL: test_svsudot_s32 + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.usdot.nxv4i32( %x, %z, %y) + // CHECK: ret %[[RET]] + return SVE_ACLE_FUNC(svsudot, _s32, , )(x, y, z); +} + +svint32_t test_svsudot_n_s32(svint32_t x, svint8_t y, uint8_t z) { + // CHECK-LABEL: test_svsudot_n_s32 + // CHECK: %[[SPLAT:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %z) + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.usdot.nxv4i32( %x, %[[SPLAT]], %y) + // CHECK: ret %[[RET]] + return SVE_ACLE_FUNC(svsudot, _n_s32, , )(x, y, z); +} + +svint32_t test_svsudot_lane_s32_0(svint32_t x, svint8_t y, svuint8_t z) { + // CHECK-LABEL: test_svsudot_lane_s32_0 + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.sudot.lane.nxv4i32( %x, %y, %z, i32 0) + // CHECK: ret %[[RET]] + return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 0); +} + +svint32_t test_svsudot_lane_s32_1(svint32_t x, svint8_t y, svuint8_t z) { + // CHECK-LABEL: test_svsudot_lane_s32_1 + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.sudot.lane.nxv4i32( %x, %y, %z, i32 1) + // CHECK: ret %[[RET]] + return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 1); +} + +svint32_t test_svsudot_lane_s32_2(svint32_t x, svint8_t y, svuint8_t z) { + // CHECK-LABEL: test_svsudot_lane_s32_2 + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.sudot.lane.nxv4i32( %x, %y, %z, i32 2) + // CHECK: ret %[[RET]] + return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 2); +} + +svint32_t test_svsudot_lane_s32_3(svint32_t x, svint8_t y, svuint8_t z) { + // CHECK-LABEL: test_svsudot_lane_s32_3 + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.sudot.lane.nxv4i32( %x, %y, %z, i32 3) + // CHECK: ret %[[RET]] + return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 3); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_usdot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_usdot.c new file mode 100644 index 00000000000000..1639cbb2b86f39 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_usdot.c @@ -0,0 +1,54 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_MATMUL_INT8 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_MATMUL_INT8 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svint32_t test_svusdot_s32(svint32_t x, svuint8_t y, svint8_t z) { + // CHECK-LABEL: test_svusdot_s32 + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.usdot.nxv4i32( %x, %y, %z) + // CHECK: ret %[[RET]] + return SVE_ACLE_FUNC(svusdot, _s32, , )(x, y, z); +} + +svint32_t test_svusdot_n_s32(svint32_t x, svuint8_t y, int8_t z) { + // CHECK-LABEL: test_svusdot_n_s32 + // CHECK: %[[SPLAT:.*]] = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 %z) + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.usdot.nxv4i32( %x, %y, %[[SPLAT]]) + // CHECK: ret %[[RET]] + return SVE_ACLE_FUNC(svusdot, _n_s32, , )(x, y, z); +} + +svint32_t test_svusdot_lane_s32_0(svint32_t x, svuint8_t y, svint8_t z) { + // CHECK-LABEL: test_svusdot_lane_s32_0 + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.usdot.lane.nxv4i32( %x, %y, %z, i32 0) + // CHECK: ret %[[RET]] + return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 0); +} + +svint32_t test_svusdot_lane_s32_1(svint32_t x, svuint8_t y, svint8_t z) { + // CHECK-LABEL: test_svusdot_lane_s32_1 + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.usdot.lane.nxv4i32( %x, %y, %z, i32 1) + // CHECK: ret %[[RET]] + return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 1); +} + +svint32_t test_svusdot_lane_s32_2(svint32_t x, svuint8_t y, svint8_t z) { + // CHECK-LABEL: test_svusdot_lane_s32_2 + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.usdot.lane.nxv4i32( %x, %y, %z, i32 2) + // CHECK: ret %[[RET]] + return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 2); +} + +svint32_t test_svusdot_lane_s32_3(svint32_t x, svuint8_t y, svint8_t z) { + // CHECK-LABEL: test_svusdot_lane_s32_3 + // CHECK: %[[RET:.*]] = call @llvm.aarch64.sve.usdot.lane.nxv4i32( %x, %y, %z, i32 3) + // CHECK: ret %[[RET]] + return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 3); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index ae1d938eed01f8..7d99e39f9d03e1 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -213,13 +213,13 @@ class Intrinsic { /// Return true if the intrinsic takes a splat operand. bool hasSplat() const { // These prototype modifiers are described in arm_sve.td. - return Proto.find_first_of("ajfrKLR") != std::string::npos; + return Proto.find_first_of("ajfrKLR@") != std::string::npos; } /// Return the parameter index of the splat operand. unsigned getSplatIdx() const { // These prototype modifiers are described in arm_sve.td. - auto Idx = Proto.find_first_of("ajfrKLR"); + auto Idx = Proto.find_first_of("ajfrKLR@"); assert(Idx != std::string::npos && Idx > 0 && "Prototype has no splat operand"); return Idx - 1; @@ -541,6 +541,12 @@ void SVEType::applyModifier(char Mod) { ElementBitwidth /= 4; NumVectors = 0; break; + case '@': + Signed = false; + Float = false; + ElementBitwidth /= 4; + NumVectors = 0; + break; case 'K': Signed = true; Float = false;