Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64][SME2] Add SME2 MLA/MLS builtins. #75584

Merged
merged 7 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
213 changes: 213 additions & 0 deletions clang/include/clang/Basic/arm_sme.td

Large diffs are not rendered by default.

25 changes: 25 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10318,6 +10318,28 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
return nullptr;
}

static void swapCommutativeSMEOperands(unsigned BuiltinID,
SmallVectorImpl<Value *> &Ops) {
unsigned MultiVec;
switch (BuiltinID) {
default:
return;
case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
MultiVec = 1;
break;
case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
MultiVec = 2;
break;
case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
MultiVec = 4;
break;
}

if (MultiVec > 0)
for (unsigned I = 0; I < MultiVec; ++I)
std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
}

Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
Expand All @@ -10340,6 +10362,9 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
BuiltinID == SME::BI__builtin_sme_svstr_za)
return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);

// Handle builtins which require their multi-vector operands to be swapped
swapCommutativeSMEOperands(BuiltinID, Ops);

// Should not happen!
if (Builtin->LLVMIntrinsic == 0)
return nullptr;
Expand Down
292 changes: 292 additions & 0 deletions clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c

Large diffs are not rendered by default.

696 changes: 696 additions & 0 deletions clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c

Large diffs are not rendered by default.

1,790 changes: 1,790 additions & 0 deletions clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlall.c

Large diffs are not rendered by default.

292 changes: 292 additions & 0 deletions clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c

Large diffs are not rendered by default.

696 changes: 696 additions & 0 deletions clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c

Large diffs are not rendered by default.

44 changes: 44 additions & 0 deletions clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,3 +241,47 @@ void test_bfmlslb_bad_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm) __
svbfmlslb_lane_f32(zda, zn, zm, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svbfmlslt_lane_f32(zda, zn, zm, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
}

void test_multiply_add_sub_long(uint32_t base, svint8_t s8, svuint8_t u8,
svint16_t s16, svuint16_t u16, svint8x2_t s8x2,
svuint8x2_t u8x2, svint16x2_t s16x2, svuint16x2_t u16x2,
svint8x4_t s8x4, svuint8x4_t u8x4, svint16x4_t s16x4, svuint16x4_t u16x4) __arm_streaming __arm_shared_za {

svmla_lane_za32_s8_vg4x1(base, s8, s8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svmla_lane_za32_u8_vg4x1(base, u8, u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svmla_lane_za64_s16_vg4x1(base, s16, s16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svmla_lane_za64_u16_vg4x1(base, u16, u16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}

svmla_lane_za32_s8_vg4x2(base, s8x2, s8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svmla_lane_za32_u8_vg4x2(base, u8x2, u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svmla_lane_za64_s16_vg4x2(base, s16x2, s16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svmla_lane_za64_u16_vg4x2(base, u16x2, u16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}

svmla_lane_za32_s8_vg4x4(base, s8x4, s8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svmla_lane_za32_u8_vg4x4(base, u8x4, u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svmla_lane_za64_s16_vg4x4(base, s16x4, s16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svmla_lane_za64_u16_vg4x4(base, u16x4, u16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}

svmls_lane_za32_s8_vg4x1(base, s8, s8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svmls_lane_za32_u8_vg4x1(base, u8, u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svmls_lane_za64_s16_vg4x1(base, s16, s16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svmls_lane_za64_u16_vg4x1(base, u16, u16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}

svmls_lane_za32_s8_vg4x2(base, s8x2, s8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svmls_lane_za32_u8_vg4x2(base, u8x2, u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svmls_lane_za64_s16_vg4x2(base, s16x2, s16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svmls_lane_za64_u16_vg4x2(base, u16x2, u16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}

svmls_lane_za32_s8_vg4x4(base, s8x4, s8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svmls_lane_za32_u8_vg4x4(base, u8x4, u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svmls_lane_za64_s16_vg4x4(base, s16x4, s16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
svmls_lane_za64_u16_vg4x4(base, u16x4, u16, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}

svsumla_lane_za32_s8_vg4x1(base, s8, u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svsumla_lane_za32_s8_vg4x2(base, s8x2, u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svsumla_lane_za32_s8_vg4x4(base, s8x4, u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}

svusmla_lane_za32_u8_vg4x1(base, u8, s8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svusmla_lane_za32_u8_vg4x2(base, u8x2, s8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
svusmla_lane_za32_u8_vg4x4(base, u8x4, s8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
}