diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e027e754477fc..a1c32abb4dcd8 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3170,6 +3170,9 @@ def warn_attribute_arm_sm_incompat_builtin : Warning< def warn_attribute_arm_za_builtin_no_za_state : Warning< "builtin call is not valid when calling from a function without active ZA state">, InGroup>; +def warn_attribute_arm_zt0_builtin_no_zt0_state : Warning< + "builtin call is not valid when calling from a function without active ZT0 state">, + InGroup>; def err_sve_vector_in_non_sve_target : Error< "SVE vector type %0 cannot be used in a target without sve">; def err_attribute_riscv_rvv_bits_unsupported : Error< diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 4fb50b8e4e4e5..695e1bddf9ffc 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -636,37 +636,37 @@ let TargetGuard = "sme2,sme-i16i64" in { // Spill and fill of ZT0 // let TargetGuard = "sme2" in { - def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<0, ImmCheck0_0>]>; - def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<0, ImmCheck0_0>]>; + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>; } // // Zero ZT0 // let TargetGuard = "sme2" in { - def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<0, ImmCheck0_0>]>; + def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsOutZT0], [ImmCheck<0, ImmCheck0_0>]>; } // // lookup table expand four contiguous registers // let TargetGuard = "sme2" in { - def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt_{d}_x4", "4.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; - def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt_{d}_x4", "4.di[i", "sUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>; + def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt_{d}_x4", "4.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; + def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt_{d}_x4", "4.di[i", "sUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>; } // // lookup table expand one register // let TargetGuard = "sme2" in { - def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; - def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; + def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; + def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; } // // lookup table expand two contiguous registers // let TargetGuard = "sme2" in { - def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; - def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; + def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; + def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index e6ad84676053b..9a6ea9898ef70 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -229,6 +229,9 @@ def IsStreamingOrSVE2p1 : FlagType<0x40000000000>; // Use for intrin def IsInZA : FlagType<0x80000000000>; def IsOutZA : FlagType<0x100000000000>; def IsInOutZA : FlagType<0x200000000000>; +def IsInZT0 : FlagType<0x400000000000>; +def IsOutZT0 : FlagType<0x800000000000>; +def IsInOutZT0 : FlagType<0x1000000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 1f83dcf07b6f9..7833d5a2ea20e 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3013,6 +3013,11 @@ enum ArmSMEState : unsigned { ArmOutZA = 0b10, ArmInOutZA = 0b11, ArmZAMask = 0b11, + + ArmInZT0 = 0b01 << 2, + ArmOutZT0 = 0b10 << 2, + ArmInOutZT0 = 0b11 << 2, + ArmZT0Mask = 0b11 << 2 }; bool Sema::ParseSVEImmChecks( @@ -3206,6 +3211,13 @@ static bool hasArmZAState(const FunctionDecl *FD) { (FD->hasAttr() && FD->getAttr()->isNewZA()); } +static bool hasArmZT0State(const FunctionDecl *FD) { + const auto *T = FD->getType()->getAs(); + return (T && FunctionType::getArmZT0State(T->getAArch64SMEAttributes()) != + FunctionType::ARM_None) || + (FD->hasAttr() && FD->getAttr()->isNewZT0()); +} + static ArmSMEState getSMEState(unsigned BuiltinID) { switch (BuiltinID) { default: @@ -3233,6 +3245,11 @@ bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_za_builtin_no_za_state) << TheCall->getSourceRange(); + + if ((getSMEState(BuiltinID) & ArmZT0Mask) && !hasArmZT0State(FD)) + Diag(TheCall->getBeginLoc(), + diag::warn_attribute_arm_zt0_builtin_no_zt0_state) + << TheCall->getSourceRange(); } // Range check SME intrinsics that take immediate values. diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c index 1a495d3b117ec..3e4454d943358 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -20,7 +20,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_out("za") { +void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_out("zt0") { svldr_zt(0, base); } @@ -36,6 +36,6 @@ void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_out("za") // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svstr_zt(void *base) __arm_streaming_compatible __arm_in("za") { +void test_svstr_zt(void *base) __arm_streaming_compatible __arm_in("zt0") { svstr_zt(0, base); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c index d656178fdf7a1..7c210fbe6923e 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c @@ -19,7 +19,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8(0, zn, 15); } @@ -34,7 +34,7 @@ svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") { // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") { +svint8_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8(0, zn, 15); } @@ -48,7 +48,7 @@ svint8_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") { // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint16_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16(0, zn, 15); } @@ -63,7 +63,7 @@ svuint16_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") { +svint16_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16(0, zn, 15); } @@ -77,7 +77,7 @@ svint16_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv8f16(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") { +svfloat16_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16(0, zn, 15); } @@ -91,7 +91,7 @@ svfloat16_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za" // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv8bf16(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") { +svbfloat16_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16(0, zn, 15); } @@ -105,7 +105,7 @@ svbfloat16_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("z // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint32_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32(0, zn, 15); } @@ -119,7 +119,7 @@ svuint32_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") { +svint32_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32(0, zn, 15); } @@ -133,6 +133,6 @@ svint32_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti2.lane.zt.nxv4f32(i32 0, [[ZN:%.*]], i32 15) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") { +svfloat32_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32(0, zn, 15); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c index 60cd24fdf4630..d7ef75ce01dd7 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c @@ -26,7 +26,7 @@ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8_x2(0, zn, 7); } @@ -49,7 +49,7 @@ svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") { +svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8_x2(0, zn, 7); } @@ -71,7 +71,7 @@ svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16_x2(0, zn, 7); } @@ -94,7 +94,7 @@ svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") { +svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16_x2(0, zn, 7); } @@ -116,7 +116,7 @@ svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za" // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") { +svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16_x2(0, zn, 7); } @@ -138,7 +138,7 @@ svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") { +svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16_x2(0, zn, 7); } @@ -160,7 +160,7 @@ svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32_x2(0, zn, 7); } @@ -182,7 +182,7 @@ svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") { +svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32_x2(0, zn, 7); } @@ -204,6 +204,6 @@ svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za" // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat32x2_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") { +svfloat32x2_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32_x2(0, zn, 7); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c index e05748c8a6424..f65c0ef61f818 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c @@ -34,7 +34,7 @@ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8_x4(0, zn, 3); } @@ -65,7 +65,7 @@ svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") { +svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8_x4(0, zn, 3); } @@ -95,7 +95,7 @@ svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16_x4(0, zn, 3); } @@ -125,7 +125,7 @@ svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") { +svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16_x4(0, zn, 3); } @@ -155,7 +155,7 @@ svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za" // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") { +svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16_x4(0, zn, 3); } @@ -185,7 +185,7 @@ svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") { +svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16_x4(0, zn, 3); } @@ -215,7 +215,7 @@ svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32_x4(0, zn, 3); } @@ -245,7 +245,7 @@ svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") { +svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32_x4(0, zn, 3); } @@ -275,6 +275,6 @@ svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za" // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat32x4_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") { +svfloat32x4_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32_x4(0, zn, 3); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c index 1e303a9a661d7..cbab1cc8e81bd 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c @@ -19,7 +19,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint8_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u8(0, zn, 7); } @@ -34,7 +34,7 @@ svuint8_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") { // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") { +svint8_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s8(0, zn, 7); } @@ -48,7 +48,7 @@ svint8_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") { // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint16_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16(0, zn, 7); } @@ -62,7 +62,7 @@ svuint16_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") { +svint16_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16(0, zn, 7); } @@ -76,7 +76,7 @@ svint16_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv8f16(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") { +svfloat16_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16(0, zn, 7); } @@ -90,7 +90,7 @@ svfloat16_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za" // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv8bf16(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") { +svbfloat16_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16(0, zn, 7); } @@ -104,7 +104,7 @@ svbfloat16_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("z // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint32_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32(0, zn, 7); } @@ -118,7 +118,7 @@ svuint32_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") { +svint32_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32(0, zn, 7); } @@ -132,6 +132,6 @@ svint32_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.luti4.lane.zt.nxv4f32(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") { +svfloat32_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32(0, zn, 7); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c index 3544f62527aca..f7f16281ff406 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c @@ -26,7 +26,7 @@ // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u8_x2(0, zn, 3); } @@ -49,7 +49,7 @@ svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") { +svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s8_x2(0, zn, 3); } @@ -71,7 +71,7 @@ svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16_x2(0, zn, 3); } @@ -94,7 +94,7 @@ svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") { +svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16_x2(0, zn, 3); } @@ -116,7 +116,7 @@ svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za" // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") { +svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16_x2(0, zn, 3); } @@ -138,7 +138,7 @@ svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) // CPP-CHECK-NEXT: ret [[TMP4]] // -svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") { +svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16_x2(0, zn, 3); } @@ -160,7 +160,7 @@ svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32_x2(0, zn, 3); } @@ -182,7 +182,7 @@ svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") { +svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32_x2(0, zn, 3); } @@ -204,6 +204,6 @@ svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za" // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) // CPP-CHECK-NEXT: ret [[TMP4]] // -svfloat32x2_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") { +svfloat32x2_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32_x2(0, zn, 3); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c index e7151e8256039..3fedfdc330893 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c @@ -36,7 +36,7 @@ // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16_x4(0, zn, 1); } @@ -68,7 +68,7 @@ svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") { +svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16_x4(0, zn, 1); } @@ -100,7 +100,7 @@ svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") { +svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16_x4(0, zn, 1); } @@ -132,7 +132,7 @@ svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") { +svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16_x4(0, zn, 1); } @@ -164,7 +164,7 @@ svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za" // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") { +svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32_x4(0, zn, 1); } @@ -196,7 +196,7 @@ svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") { +svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32_x4(0, zn, 1); } @@ -228,6 +228,6 @@ svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za" // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) // CPP-CHECK-NEXT: ret [[TMP8]] // -svfloat32x4_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") { +svfloat32x4_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32_x4(0, zn, 1); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c index 4a038ce61e3bf..4105cc3e78ec2 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c @@ -18,6 +18,6 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.zt(i32 0) // CPP-CHECK-NEXT: ret void // -void test_svzero_zt(void) __arm_streaming_compatible __arm_out("za") { +void test_svzero_zt(void) __arm_streaming_compatible __arm_out("zt0") { svzero_zt(0); } diff --git a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c index 079cff5a5bbae..55c97c73e8b69 100644 --- a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c +++ b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \ -// RUN: -target-feature +sme -target-feature +sve2 -target-feature +neon -fsyntax-only -verify %s +// RUN: -target-feature +sme2 -target-feature +sve2 -target-feature +neon -fsyntax-only -verify %s // REQUIRES: aarch64-registered-target @@ -108,3 +108,11 @@ svint8_t new_za(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming { // expected-no-warning return svread_hor_za8_s8_m(zd, pg, 0, slice_base); } + +void missing_zt0(void) __arm_streaming { + // expected-warning@+1 {{builtin call is not valid when calling from a function without active ZT0 state}} + svzero_zt(0); +} + +__arm_new("zt0") +void new_zt0(void) __arm_streaming { svzero_zt(0); } // no warning diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp index 0d9f090637eec..a627ef9c01ae2 100644 --- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp @@ -71,15 +71,15 @@ void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t svbmops_za32_s32_m(4, pred, pred, s32, s32); // expected-error {{argument value 4 is outside the valid range [0, 3]}} } -void test_ldr_zt(const void *const_base) __arm_streaming_compatible __arm_inout("za") { +void test_ldr_zt(const void *const_base) __arm_streaming_compatible __arm_inout("zt0") { svldr_zt(1, const_base); // expected-error {{argument value 1 is outside the valid range [0, 0]}} } -void test_str_zt(void *base) __arm_streaming_compatible __arm_in("za") { +void test_str_zt(void *base) __arm_streaming_compatible __arm_in("zt0") { svstr_zt(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}} } -void test_svluti2_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("za") { +void test_svluti2_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("zt0") { // Test Reg Offset svluti2_lane_zt_u8_x4(1, zn, 0); // expected-error {{argument value 1 is outside the valid range [0, 0]}} // Test index value range @@ -106,7 +106,7 @@ void test_svluti2_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("za") { svluti2_lane_zt_f32_x4(0, zn, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} } -void test_svluti4_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("za") { +void test_svluti4_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("zt0") { // Test Reg Offset svluti4_lane_zt_u16_x4(1, zn, 0); // expected-error {{argument value 1 is outside the valid range [0, 0]}} // Test index value range @@ -129,7 +129,7 @@ void test_svluti4_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("za") { svluti4_lane_zt_f32_x4(0, zn, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} } -void test_svluti2_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("za") { +void test_svluti2_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("zt0") { // Test Reg Offset svluti2_lane_zt_u8(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}} // Test index value range @@ -156,7 +156,7 @@ void test_svluti2_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("za") { svluti2_lane_zt_f32(0, zn_u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} } -void test_svluti4_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("za") { +void test_svluti4_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("zt0") { // Test Reg Offset svluti4_lane_zt_u8(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}} // Test index value range @@ -183,7 +183,7 @@ void test_svluti4_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("za") { svluti4_lane_zt_f32(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} } -void test_svluti2_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_in("za") { +void test_svluti2_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_in("zt0") { // Test Reg Offset svluti2_lane_zt_u8_x2(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}} // Test index value range @@ -210,7 +210,7 @@ void test_svluti2_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_in("za") { svluti2_lane_zt_f32_x2(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} } -void test_svluti4_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_in("za") { +void test_svluti4_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_in("zt0") { // Test Reg Offset svluti4_lane_zt_u8_x2(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}} // Test index value range diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index fbedd27bf998d..174304f09007b 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -1722,12 +1722,27 @@ void SVEEmitter::createBuiltinZAState(raw_ostream &OS) { std::map> IntrinsicsPerState; for (auto &Def : Defs) { + std::string Key; + auto AddToKey = [&Key](const std::string &S) -> void { + Key = Key.empty() ? S : (Key + " | " + S); + }; + if (Def->isFlagSet(getEnumValueForFlag("IsInZA"))) - IntrinsicsPerState["ArmInZA"].insert(Def->getMangledName()); + AddToKey("ArmInZA"); else if (Def->isFlagSet(getEnumValueForFlag("IsOutZA"))) - IntrinsicsPerState["ArmOutZA"].insert(Def->getMangledName()); + AddToKey("ArmOutZA"); else if (Def->isFlagSet(getEnumValueForFlag("IsInOutZA"))) - IntrinsicsPerState["ArmInOutZA"].insert(Def->getMangledName()); + AddToKey("ArmInOutZA"); + + if (Def->isFlagSet(getEnumValueForFlag("IsInZT0"))) + AddToKey("ArmInZT0"); + else if (Def->isFlagSet(getEnumValueForFlag("IsOutZT0"))) + AddToKey("ArmOutZT0"); + else if (Def->isFlagSet(getEnumValueForFlag("IsInOutZT0"))) + AddToKey("ArmInOutZT0"); + + if (!Key.empty()) + IntrinsicsPerState[Key].insert(Def->getMangledName()); } OS << "#ifdef GET_SME_BUILTIN_GET_STATE\n";