diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 02039524c41dc..51cf8e29129c6 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1281,9 +1281,9 @@ def SVUNDEF_2 : SInst<"svundef2_{d}", "2v", "csilUcUsUiUlhfd", MergeNone, "", [I def SVUNDEF_3 : SInst<"svundef3_{d}", "3v", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef, IsStreamingCompatible]>; def SVUNDEF_4 : SInst<"svundef4_{d}", "4v", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef, IsStreamingCompatible]>; -def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate]>; -def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate]>; -def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4dddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate]>; +def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>; +def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>; +def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4dddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>; let TargetGuard = "sve,bf16" in { def SVUNDEF_1_BF16 : SInst<"svundef_{d}", "dv", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>; @@ -1291,9 +1291,9 @@ def SVUNDEF_2_BF16 : SInst<"svundef2_{d}", "2v", "b", MergeNone, "", [IsUndef, I def SVUNDEF_3_BF16 : SInst<"svundef3_{d}", "3v", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>; def SVUNDEF_4_BF16 : SInst<"svundef4_{d}", "4v", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>; -def SVCREATE_2_BF16 : SInst<"svcreate2[_{d}]", "2dd", "b", MergeNone, "", [IsTupleCreate]>; -def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd", "b", MergeNone, "", [IsTupleCreate]>; -def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate]>; +def SVCREATE_2_BF16 : SInst<"svcreate2[_{d}]", "2dd", "b", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>; +def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd", "b", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>; +def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>; } let TargetGuard = "sve2p1|sme2" in { @@ -1303,22 +1303,22 @@ let TargetGuard = "sve2p1|sme2" in { //////////////////////////////////////////////////////////////////////////////// // Vector insertion and extraction -def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>; -def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_2>]>; -def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>; +def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; +def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>; +def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; -def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>; -def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_2>]>; -def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>; +def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; +def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>; +def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; let TargetGuard = "sve,bf16" in { -def SVGET_2_BF16 : SInst<"svget2[_{d}]", "d2i", "b", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>; -def SVGET_3_BF16 : SInst<"svget3[_{d}]", "d3i", "b", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_2>]>; -def SVGET_4_BF16 : SInst<"svget4[_{d}]", "d4i", "b", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>; +def SVGET_2_BF16 : SInst<"svget2[_{d}]", "d2i", "b", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; +def SVGET_3_BF16 : SInst<"svget3[_{d}]", "d3i", "b", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>; +def SVGET_4_BF16 : SInst<"svget4[_{d}]", "d4i", "b", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; -def SVSET_2_BF16 : SInst<"svset2[_{d}]", "22id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>; -def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_2>]>; -def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>; +def SVSET_2_BF16 : SInst<"svset2[_{d}]", "22id", "b", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; +def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>; +def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; } let TargetGuard = "sve2p1|sme2" in { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c index 4058af0051359..1ed09cc5965fd 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate2_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[X0:%.*]], i64 0) @@ -27,7 +34,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP0]], [[X1:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP1]] // -svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1) +svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_bf16,,)(x0, x1); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c index 5051ab88d9226..1aead4e5572f3 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate2_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[X0:%.*]], i64 0) @@ -27,7 +34,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP1]] // -svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1) +svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_s8,,)(x0, x1); } @@ -44,7 +51,7 @@ svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP1]] // -svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1) +svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_s16,,)(x0, x1); } @@ -61,7 +68,7 @@ svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP1]] // -svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1) +svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_s32,,)(x0, x1); } @@ -78,7 +85,7 @@ svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP1]] // -svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1) +svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_s64,,)(x0, x1); } @@ -95,7 +102,7 @@ svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP1]] // -svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1) +svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_u8,,)(x0, x1); } @@ -112,7 +119,7 @@ svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP1]] // -svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1) +svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_u16,,)(x0, x1); } @@ -129,7 +136,7 @@ svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP1]] // -svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1) +svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_u32,,)(x0, x1); } @@ -146,7 +153,7 @@ svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP1]] // -svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1) +svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_u64,,)(x0, x1); } @@ -163,7 +170,7 @@ svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP0]], [[X1:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP1]] // -svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1) +svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_f16,,)(x0, x1); } @@ -180,7 +187,7 @@ svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP0]], [[X1:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP1]] // -svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1) +svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_f32,,)(x0, x1); } @@ -197,7 +204,7 @@ svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP0]], [[X1:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP1]] // -svfloat64x2_t test_svcreate2_f64(svfloat64_t x0, svfloat64_t x1) +svfloat64x2_t test_svcreate2_f64(svfloat64_t x0, svfloat64_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_f64,,)(x0, x1); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c index 85eff36241163..90176ff0dbd48 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate3_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[X0:%.*]], i64 0) @@ -29,7 +36,7 @@ // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP1]], [[X2:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP2]] // -svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2) +svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_bf16,,)(x0, x1, x2); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c index 3d6f34d5234cf..2fe1a88441b29 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate3_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[X0:%.*]], i64 0) @@ -29,7 +36,7 @@ // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP1]], [[X2:%.*]], i64 32) // CPP-CHECK-NEXT: ret [[TMP2]] // -svint8x3_t test_svcreate3_s8(svint8_t x0, svint8_t x1, svint8_t x2) +svint8x3_t test_svcreate3_s8(svint8_t x0, svint8_t x1, svint8_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_s8,,)(x0, x1, x2); } @@ -48,7 +55,7 @@ svint8x3_t test_svcreate3_s8(svint8_t x0, svint8_t x1, svint8_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP1]], [[X2:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP2]] // -svint16x3_t test_svcreate3_s16(svint16_t x0, svint16_t x1, svint16_t x2) +svint16x3_t test_svcreate3_s16(svint16_t x0, svint16_t x1, svint16_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_s16,,)(x0, x1, x2); } @@ -67,7 +74,7 @@ svint16x3_t test_svcreate3_s16(svint16_t x0, svint16_t x1, svint16_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP1]], [[X2:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP2]] // -svint32x3_t test_svcreate3_s32(svint32_t x0, svint32_t x1, svint32_t x2) +svint32x3_t test_svcreate3_s32(svint32_t x0, svint32_t x1, svint32_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_s32,,)(x0, x1, x2); } @@ -86,7 +93,7 @@ svint32x3_t test_svcreate3_s32(svint32_t x0, svint32_t x1, svint32_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP1]], [[X2:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP2]] // -svint64x3_t test_svcreate3_s64(svint64_t x0, svint64_t x1, svint64_t x2) +svint64x3_t test_svcreate3_s64(svint64_t x0, svint64_t x1, svint64_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_s64,,)(x0, x1, x2); } @@ -105,7 +112,7 @@ svint64x3_t test_svcreate3_s64(svint64_t x0, svint64_t x1, svint64_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP1]], [[X2:%.*]], i64 32) // CPP-CHECK-NEXT: ret [[TMP2]] // -svuint8x3_t test_svcreate3_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2) +svuint8x3_t test_svcreate3_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_u8,,)(x0, x1, x2); } @@ -124,7 +131,7 @@ svuint8x3_t test_svcreate3_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP1]], [[X2:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP2]] // -svuint16x3_t test_svcreate3_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2) +svuint16x3_t test_svcreate3_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_u16,,)(x0, x1, x2); } @@ -143,7 +150,7 @@ svuint16x3_t test_svcreate3_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP1]], [[X2:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP2]] // -svuint32x3_t test_svcreate3_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2) +svuint32x3_t test_svcreate3_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_u32,,)(x0, x1, x2); } @@ -162,7 +169,7 @@ svuint32x3_t test_svcreate3_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP1]], [[X2:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP2]] // -svuint64x3_t test_svcreate3_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2) +svuint64x3_t test_svcreate3_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_u64,,)(x0, x1, x2); } @@ -181,7 +188,7 @@ svuint64x3_t test_svcreate3_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP1]], [[X2:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP2]] // -svfloat16x3_t test_svcreate3_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2) +svfloat16x3_t test_svcreate3_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_f16,,)(x0, x1, x2); } @@ -200,7 +207,7 @@ svfloat16x3_t test_svcreate3_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP1]], [[X2:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP2]] // -svfloat32x3_t test_svcreate3_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2) +svfloat32x3_t test_svcreate3_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_f32,,)(x0, x1, x2); } @@ -219,7 +226,7 @@ svfloat32x3_t test_svcreate3_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP1]], [[X2:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP2]] // -svfloat64x3_t test_svcreate3_f64(svfloat64_t x0, svfloat64_t x1, svfloat64_t x2) +svfloat64x3_t test_svcreate3_f64(svfloat64_t x0, svfloat64_t x1, svfloat64_t x2) ATTR { return SVE_ACLE_FUNC(svcreate3,_f64,,)(x0, x1, x2); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c index 3d372be19c9f0..8ad8019123454 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate4_bf16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[X0:%.*]], i64 0) @@ -31,7 +38,7 @@ // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[X4:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP3]] // -svbfloat16x4_t test_svcreate4_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2, svbfloat16_t x4) +svbfloat16x4_t test_svcreate4_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2, svbfloat16_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_bf16,,)(x0, x1, x2, x4); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c index ab39e6406dc3d..5953cdd4e207f 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate4_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[X0:%.*]], i64 0) @@ -31,7 +38,7 @@ // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[X4:%.*]], i64 48) // CPP-CHECK-NEXT: ret [[TMP3]] // -svint8x4_t test_svcreate4_s8(svint8_t x0, svint8_t x1, svint8_t x2, svint8_t x4) +svint8x4_t test_svcreate4_s8(svint8_t x0, svint8_t x1, svint8_t x2, svint8_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_s8,,)(x0, x1, x2, x4); } @@ -52,7 +59,7 @@ svint8x4_t test_svcreate4_s8(svint8_t x0, svint8_t x1, svint8_t x2, svint8_t x4) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[X4:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP3]] // -svint16x4_t test_svcreate4_s16(svint16_t x0, svint16_t x1, svint16_t x2, svint16_t x4) +svint16x4_t test_svcreate4_s16(svint16_t x0, svint16_t x1, svint16_t x2, svint16_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_s16,,)(x0, x1, x2, x4); } @@ -73,7 +80,7 @@ svint16x4_t test_svcreate4_s16(svint16_t x0, svint16_t x1, svint16_t x2, svint16 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[X4:%.*]], i64 12) // CPP-CHECK-NEXT: ret [[TMP3]] // -svint32x4_t test_svcreate4_s32(svint32_t x0, svint32_t x1, svint32_t x2, svint32_t x4) +svint32x4_t test_svcreate4_s32(svint32_t x0, svint32_t x1, svint32_t x2, svint32_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_s32,,)(x0, x1, x2, x4); } @@ -94,7 +101,7 @@ svint32x4_t test_svcreate4_s32(svint32_t x0, svint32_t x1, svint32_t x2, svint32 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[X4:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP3]] // -svint64x4_t test_svcreate4_s64(svint64_t x0, svint64_t x1, svint64_t x2, svint64_t x4) +svint64x4_t test_svcreate4_s64(svint64_t x0, svint64_t x1, svint64_t x2, svint64_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_s64,,)(x0, x1, x2, x4); } @@ -115,7 +122,7 @@ svint64x4_t test_svcreate4_s64(svint64_t x0, svint64_t x1, svint64_t x2, svint64 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[X4:%.*]], i64 48) // CPP-CHECK-NEXT: ret [[TMP3]] // -svuint8x4_t test_svcreate4_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2, svuint8_t x4) +svuint8x4_t test_svcreate4_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2, svuint8_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_u8,,)(x0, x1, x2, x4); } @@ -136,7 +143,7 @@ svuint8x4_t test_svcreate4_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2, svuint8_ // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[X4:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP3]] // -svuint16x4_t test_svcreate4_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2, svuint16_t x4) +svuint16x4_t test_svcreate4_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2, svuint16_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_u16,,)(x0, x1, x2, x4); } @@ -157,7 +164,7 @@ svuint16x4_t test_svcreate4_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2, svu // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[X4:%.*]], i64 12) // CPP-CHECK-NEXT: ret [[TMP3]] // -svuint32x4_t test_svcreate4_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2, svuint32_t x4) +svuint32x4_t test_svcreate4_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2, svuint32_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_u32,,)(x0, x1, x2, x4); } @@ -178,7 +185,7 @@ svuint32x4_t test_svcreate4_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2, svu // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[X4:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP3]] // -svuint64x4_t test_svcreate4_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2, svuint64_t x4) +svuint64x4_t test_svcreate4_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2, svuint64_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_u64,,)(x0, x1, x2, x4); } @@ -199,7 +206,7 @@ svuint64x4_t test_svcreate4_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2, svu // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[X4:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP3]] // -svfloat16x4_t test_svcreate4_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2, svfloat16_t x4) +svfloat16x4_t test_svcreate4_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2, svfloat16_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_f16,,)(x0, x1, x2, x4); } @@ -220,7 +227,7 @@ svfloat16x4_t test_svcreate4_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2, // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[X4:%.*]], i64 12) // CPP-CHECK-NEXT: ret [[TMP3]] // -svfloat32x4_t test_svcreate4_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2, svfloat32_t x4) +svfloat32x4_t test_svcreate4_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2, svfloat32_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_f32,,)(x0, x1, x2, x4); } @@ -241,7 +248,7 @@ svfloat32x4_t test_svcreate4_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2, // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[X4:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP3]] // -svfloat64x4_t test_svcreate4_f64(svfloat64_t x0, svfloat64_t x1, svfloat64_t x2, svfloat64_t x4) +svfloat64x4_t test_svcreate4_f64(svfloat64_t x0, svfloat64_t x1, svfloat64_t x2, svfloat64_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_f64,,)(x0, x1, x2, x4); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c index 25dc49a4c2bd3..b9c46b2261f5d 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svget2_bf16_0( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TUPLE:%.*]], i64 0) @@ -25,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget2_bf16_0(svbfloat16x2_t tuple) +svbfloat16_t test_svget2_bf16_0(svbfloat16x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_bf16,,)(tuple, 0); } @@ -40,7 +47,7 @@ svbfloat16_t test_svget2_bf16_0(svbfloat16x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget2_bf16_1(svbfloat16x2_t tuple) +svbfloat16_t test_svget2_bf16_1(svbfloat16x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_bf16,,)(tuple, 1); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c index 32a84c91b74d4..8cd887aaff407 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c @@ -5,6 +5,7 @@ // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS @@ -14,6 +15,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svget2_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[TUPLE:%.*]], i64 0) @@ -24,7 +31,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svget2_s8(svint8x2_t tuple) +svint8_t test_svget2_s8(svint8x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_s8,,)(tuple, 0); } @@ -39,7 +46,7 @@ svint8_t test_svget2_s8(svint8x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16_t test_svget2_s16(svint16x2_t tuple) +svint16_t test_svget2_s16(svint16x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_s16,,)(tuple, 1); } @@ -54,7 +61,7 @@ svint16_t test_svget2_s16(svint16x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32_t test_svget2_s32(svint32x2_t tuple) +svint32_t test_svget2_s32(svint32x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_s32,,)(tuple, 0); } @@ -69,7 +76,7 @@ svint32_t test_svget2_s32(svint32x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[TUPLE:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint64_t test_svget2_s64(svint64x2_t tuple) +svint64_t test_svget2_s64(svint64x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_s64,,)(tuple, 1); } @@ -84,7 +91,7 @@ svint64_t test_svget2_s64(svint64x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svget2_u8(svuint8x2_t tuple) +svuint8_t test_svget2_u8(svuint8x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_u8,,)(tuple, 0); } @@ -99,7 +106,7 @@ svuint8_t test_svget2_u8(svuint8x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16_t test_svget2_u16(svuint16x2_t tuple) +svuint16_t test_svget2_u16(svuint16x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_u16,,)(tuple, 1); } @@ -114,7 +121,7 @@ svuint16_t test_svget2_u16(svuint16x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32_t test_svget2_u32(svuint32x2_t tuple) +svuint32_t test_svget2_u32(svuint32x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_u32,,)(tuple, 0); } @@ -129,7 +136,7 @@ svuint32_t test_svget2_u32(svuint32x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[TUPLE:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint64_t test_svget2_u64(svuint64x2_t tuple) +svuint64_t test_svget2_u64(svuint64x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_u64,,)(tuple, 1); } @@ -144,7 +151,7 @@ svuint64_t test_svget2_u64(svuint64x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16_t test_svget2_f16(svfloat16x2_t tuple) +svfloat16_t test_svget2_f16(svfloat16x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_f16,,)(tuple, 0); } @@ -159,7 +166,7 @@ svfloat16_t test_svget2_f16(svfloat16x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[TUPLE:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32_t test_svget2_f32(svfloat32x2_t tuple) +svfloat32_t test_svget2_f32(svfloat32x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_f32,,)(tuple, 1); } @@ -174,7 +181,7 @@ svfloat32_t test_svget2_f32(svfloat32x2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat64_t test_svget2_f64(svfloat64x2_t tuple) +svfloat64_t test_svget2_f64(svfloat64x2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_f64,,)(tuple, 0); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c index 47ce6bd19244e..7a991bc7431d0 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svget3_bf16_0( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 0) @@ -25,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget3_bf16_0(svbfloat16x3_t tuple) +svbfloat16_t test_svget3_bf16_0(svbfloat16x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 0); } @@ -40,7 +47,7 @@ svbfloat16_t test_svget3_bf16_0(svbfloat16x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget3_bf16_1(svbfloat16x3_t tuple) +svbfloat16_t test_svget3_bf16_1(svbfloat16x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 1); } @@ -55,7 +62,7 @@ svbfloat16_t test_svget3_bf16_1(svbfloat16x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget3_bf16_2(svbfloat16x3_t tuple) +svbfloat16_t test_svget3_bf16_2(svbfloat16x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_bf16,,)(tuple, 2); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c index 54847152dee7c..de7c3c303ffcb 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c @@ -5,6 +5,8 @@ // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + #include #ifdef SVE_OVERLOADED_FORMS @@ -14,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svget3_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[TUPLE:%.*]], i64 0) @@ -24,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svget3_s8(svint8x3_t tuple) +svint8_t test_svget3_s8(svint8x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_s8,,)(tuple, 0); } @@ -39,7 +47,7 @@ svint8_t test_svget3_s8(svint8x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16_t test_svget3_s16(svint16x3_t tuple) +svint16_t test_svget3_s16(svint16x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_s16,,)(tuple, 2); } @@ -54,7 +62,7 @@ svint16_t test_svget3_s16(svint16x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[TUPLE:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32_t test_svget3_s32(svint32x3_t tuple) +svint32_t test_svget3_s32(svint32x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_s32,,)(tuple, 1); } @@ -69,7 +77,7 @@ svint32_t test_svget3_s32(svint32x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint64_t test_svget3_s64(svint64x3_t tuple) +svint64_t test_svget3_s64(svint64x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_s64,,)(tuple, 0); } @@ -84,7 +92,7 @@ svint64_t test_svget3_s64(svint64x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[TUPLE:%.*]], i64 32) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svget3_u8(svuint8x3_t tuple) +svuint8_t test_svget3_u8(svuint8x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_u8,,)(tuple, 2); } @@ -99,7 +107,7 @@ svuint8_t test_svget3_u8(svuint8x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16_t test_svget3_u16(svuint16x3_t tuple) +svuint16_t test_svget3_u16(svuint16x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_u16,,)(tuple, 1); } @@ -114,7 +122,7 @@ svuint16_t test_svget3_u16(svuint16x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32_t test_svget3_u32(svuint32x3_t tuple) +svuint32_t test_svget3_u32(svuint32x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_u32,,)(tuple, 0); } @@ -129,7 +137,7 @@ svuint32_t test_svget3_u32(svuint32x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[TUPLE:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint64_t test_svget3_u64(svuint64x3_t tuple) +svuint64_t test_svget3_u64(svuint64x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_u64,,)(tuple, 2); } @@ -144,7 +152,7 @@ svuint64_t test_svget3_u64(svuint64x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16_t test_svget3_f16(svfloat16x3_t tuple) +svfloat16_t test_svget3_f16(svfloat16x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_f16,,)(tuple, 1); } @@ -159,7 +167,7 @@ svfloat16_t test_svget3_f16(svfloat16x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32_t test_svget3_f32(svfloat32x3_t tuple) +svfloat32_t test_svget3_f32(svfloat32x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_f32,,)(tuple, 0); } @@ -174,7 +182,7 @@ svfloat32_t test_svget3_f32(svfloat32x3_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[TUPLE:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat64_t test_svget3_f64(svfloat64x3_t tuple) +svfloat64_t test_svget3_f64(svfloat64x3_t tuple) ATTR { return SVE_ACLE_FUNC(svget3,_f64,,)(tuple, 2); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c index 454b3bf38bd31..3a5e282bfdfa3 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svget4_bf16_0( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 0) @@ -25,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget4_bf16_0(svbfloat16x4_t tuple) +svbfloat16_t test_svget4_bf16_0(svbfloat16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 0); } @@ -40,7 +47,7 @@ svbfloat16_t test_svget4_bf16_0(svbfloat16x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget4_bf16_1(svbfloat16x4_t tuple) +svbfloat16_t test_svget4_bf16_1(svbfloat16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 1); } @@ -55,7 +62,7 @@ svbfloat16_t test_svget4_bf16_1(svbfloat16x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget4_bf16_2(svbfloat16x4_t tuple) +svbfloat16_t test_svget4_bf16_2(svbfloat16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 2); } @@ -70,7 +77,7 @@ svbfloat16_t test_svget4_bf16_2(svbfloat16x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16_t test_svget4_bf16_3(svbfloat16x4_t tuple) +svbfloat16_t test_svget4_bf16_3(svbfloat16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_bf16,,)(tuple, 3); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c index 13f8c2a2906ef..9b4f9e5332a57 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c @@ -5,6 +5,7 @@ // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS @@ -14,6 +15,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // NOTE: For these tests clang converts the struct parameter into // several parameters, one for each member of the original struct. // CHECK-LABEL: @test_svget4_s8( @@ -26,7 +33,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8_t test_svget4_s8(svint8x4_t tuple) +svint8_t test_svget4_s8(svint8x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_s8,,)(tuple, 0); } @@ -41,7 +48,7 @@ svint8_t test_svget4_s8(svint8x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16_t test_svget4_s16(svint16x4_t tuple) +svint16_t test_svget4_s16(svint16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_s16,,)(tuple, 2); } @@ -56,7 +63,7 @@ svint16_t test_svget4_s16(svint16x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[TUPLE:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32_t test_svget4_s32(svint32x4_t tuple) +svint32_t test_svget4_s32(svint32x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_s32,,)(tuple, 2); } @@ -71,7 +78,7 @@ svint32_t test_svget4_s32(svint32x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[TUPLE:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint64_t test_svget4_s64(svint64x4_t tuple) +svint64_t test_svget4_s64(svint64x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_s64,,)(tuple, 3); } @@ -86,7 +93,7 @@ svint64_t test_svget4_s64(svint64x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[TUPLE:%.*]], i64 32) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8_t test_svget4_u8(svuint8x4_t tuple) +svuint8_t test_svget4_u8(svuint8x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_u8,,)(tuple, 2); } @@ -101,7 +108,7 @@ svuint8_t test_svget4_u8(svuint8x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[TUPLE:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16_t test_svget4_u16(svuint16x4_t tuple) +svuint16_t test_svget4_u16(svuint16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_u16,,)(tuple, 3); } @@ -116,7 +123,7 @@ svuint16_t test_svget4_u16(svuint16x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32_t test_svget4_u32(svuint32x4_t tuple) +svuint32_t test_svget4_u32(svuint32x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_u32,,)(tuple, 0); } @@ -131,7 +138,7 @@ svuint32_t test_svget4_u32(svuint32x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[TUPLE:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint64_t test_svget4_u64(svuint64x4_t tuple) +svuint64_t test_svget4_u64(svuint64x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_u64,,)(tuple, 3); } @@ -146,7 +153,7 @@ svuint64_t test_svget4_u64(svuint64x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16_t test_svget4_f16(svfloat16x4_t tuple) +svfloat16_t test_svget4_f16(svfloat16x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_f16,,)(tuple, 2); } @@ -161,7 +168,7 @@ svfloat16_t test_svget4_f16(svfloat16x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32_t test_svget4_f32(svfloat32x4_t tuple) +svfloat32_t test_svget4_f32(svfloat32x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_f32,,)(tuple, 0); } @@ -176,7 +183,7 @@ svfloat32_t test_svget4_f32(svfloat32x4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[TUPLE:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat64_t test_svget4_f64(svfloat64x4_t tuple) +svfloat64_t test_svget4_f64(svfloat64x4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_f64,,)(tuple, 2); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c index e54cf82370c3b..8d683784ddf8c 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svset2_bf16_0( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) @@ -25,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x2_t test_svset2_bf16_0(svbfloat16x2_t tuple, svbfloat16_t x) +svbfloat16x2_t test_svset2_bf16_0(svbfloat16x2_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset2,_bf16,,)(tuple, 0, x); } @@ -40,7 +47,7 @@ svbfloat16x2_t test_svset2_bf16_0(svbfloat16x2_t tuple, svbfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x2_t test_svset2_bf16_1(svbfloat16x2_t tuple, svbfloat16_t x) +svbfloat16x2_t test_svset2_bf16_1(svbfloat16x2_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset2,_bf16,,)(tuple, 1, x); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c index 9ae3011549207..b2bf4ad08aa9e 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svset2_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) @@ -25,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8x2_t test_svset2_s8(svint8x2_t tuple, svint8_t x) +svint8x2_t test_svset2_s8(svint8x2_t tuple, svint8_t x) ATTR { return SVE_ACLE_FUNC(svset2,_s8,,)(tuple, 1, x); } @@ -40,7 +47,7 @@ svint8x2_t test_svset2_s8(svint8x2_t tuple, svint8_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16x2_t test_svset2_s16(svint16x2_t tuple, svint16_t x) +svint16x2_t test_svset2_s16(svint16x2_t tuple, svint16_t x) ATTR { return SVE_ACLE_FUNC(svset2,_s16,,)(tuple, 0, x); } @@ -55,7 +62,7 @@ svint16x2_t test_svset2_s16(svint16x2_t tuple, svint16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32x2_t test_svset2_s32(svint32x2_t tuple, svint32_t x) +svint32x2_t test_svset2_s32(svint32x2_t tuple, svint32_t x) ATTR { return SVE_ACLE_FUNC(svset2,_s32,,)(tuple, 1, x); } @@ -70,7 +77,7 @@ svint32x2_t test_svset2_s32(svint32x2_t tuple, svint32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint64x2_t test_svset2_s64(svint64x2_t tuple, svint64_t x) +svint64x2_t test_svset2_s64(svint64x2_t tuple, svint64_t x) ATTR { return SVE_ACLE_FUNC(svset2,_s64,,)(tuple, 0, x); } @@ -85,7 +92,7 @@ svint64x2_t test_svset2_s64(svint64x2_t tuple, svint64_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8x2_t test_svset2_u8(svuint8x2_t tuple, svuint8_t x) +svuint8x2_t test_svset2_u8(svuint8x2_t tuple, svuint8_t x) ATTR { return SVE_ACLE_FUNC(svset2,_u8,,)(tuple, 1, x); } @@ -100,7 +107,7 @@ svuint8x2_t test_svset2_u8(svuint8x2_t tuple, svuint8_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16x2_t test_svset2_u16(svuint16x2_t tuple, svuint16_t x) +svuint16x2_t test_svset2_u16(svuint16x2_t tuple, svuint16_t x) ATTR { return SVE_ACLE_FUNC(svset2,_u16,,)(tuple, 0, x); } @@ -115,7 +122,7 @@ svuint16x2_t test_svset2_u16(svuint16x2_t tuple, svuint16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32x2_t test_svset2_u32(svuint32x2_t tuple, svuint32_t x) +svuint32x2_t test_svset2_u32(svuint32x2_t tuple, svuint32_t x) ATTR { return SVE_ACLE_FUNC(svset2,_u32,,)(tuple, 1, x); } @@ -130,7 +137,7 @@ svuint32x2_t test_svset2_u32(svuint32x2_t tuple, svuint32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint64x2_t test_svset2_u64(svuint64x2_t tuple, svuint64_t x) +svuint64x2_t test_svset2_u64(svuint64x2_t tuple, svuint64_t x) ATTR { return SVE_ACLE_FUNC(svset2,_u64,,)(tuple, 0, x); } @@ -145,7 +152,7 @@ svuint64x2_t test_svset2_u64(svuint64x2_t tuple, svuint64_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TUPLE:%.*]], [[X:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16x2_t test_svset2_f16(svfloat16x2_t tuple, svfloat16_t x) +svfloat16x2_t test_svset2_f16(svfloat16x2_t tuple, svfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset2,_f16,,)(tuple, 1, x); } @@ -160,7 +167,7 @@ svfloat16x2_t test_svset2_f16(svfloat16x2_t tuple, svfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32x2_t test_svset2_f32(svfloat32x2_t tuple, svfloat32_t x) +svfloat32x2_t test_svset2_f32(svfloat32x2_t tuple, svfloat32_t x) ATTR { return SVE_ACLE_FUNC(svset2,_f32,,)(tuple, 0, x); } @@ -175,7 +182,7 @@ svfloat32x2_t test_svset2_f32(svfloat32x2_t tuple, svfloat32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TUPLE:%.*]], [[X:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat64x2_t test_svset2_f64(svfloat64x2_t tuple, svfloat64_t x) +svfloat64x2_t test_svset2_f64(svfloat64x2_t tuple, svfloat64_t x) ATTR { return SVE_ACLE_FUNC(svset2,_f64,,)(tuple, 1, x); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c index fffdd0cc36e63..d488576c4be7a 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,11 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif // CHECK-LABEL: @test_svset3_bf16_0( // CHECK-NEXT: entry: @@ -26,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x3_t test_svset3_bf16_0(svbfloat16x3_t tuple, svbfloat16_t x) +svbfloat16x3_t test_svset3_bf16_0(svbfloat16x3_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 0, x); } @@ -41,7 +47,7 @@ svbfloat16x3_t test_svset3_bf16_0(svbfloat16x3_t tuple, svbfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x3_t test_svset3_bf16_1(svbfloat16x3_t tuple, svbfloat16_t x) +svbfloat16x3_t test_svset3_bf16_1(svbfloat16x3_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 1, x); } @@ -56,7 +62,7 @@ svbfloat16x3_t test_svset3_bf16_1(svbfloat16x3_t tuple, svbfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x3_t test_svset3_bf16_2(svbfloat16x3_t tuple, svbfloat16_t x) +svbfloat16x3_t test_svset3_bf16_2(svbfloat16x3_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset3,_bf16,,)(tuple, 2, x); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c index 1b9191cc8a330..9d10e6afca935 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,11 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif // NOTE: For these tests clang converts the struct parameter into // several parameters, one for each member of the original struct. @@ -28,7 +34,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8x3_t test_svset3_s8(svint8x3_t tuple, svint8_t x) +svint8x3_t test_svset3_s8(svint8x3_t tuple, svint8_t x) ATTR { return SVE_ACLE_FUNC(svset3,_s8,,)(tuple, 1, x); } @@ -43,7 +49,7 @@ svint8x3_t test_svset3_s8(svint8x3_t tuple, svint8_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16x3_t test_svset3_s16(svint16x3_t tuple, svint16_t x) +svint16x3_t test_svset3_s16(svint16x3_t tuple, svint16_t x) ATTR { return SVE_ACLE_FUNC(svset3,_s16,,)(tuple, 2, x); } @@ -58,7 +64,7 @@ svint16x3_t test_svset3_s16(svint16x3_t tuple, svint16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32x3_t test_svset3_s32(svint32x3_t tuple, svint32_t x) +svint32x3_t test_svset3_s32(svint32x3_t tuple, svint32_t x) ATTR { return SVE_ACLE_FUNC(svset3,_s32,,)(tuple, 0, x); } @@ -73,7 +79,7 @@ svint32x3_t test_svset3_s32(svint32x3_t tuple, svint32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint64x3_t test_svset3_s64(svint64x3_t tuple, svint64_t x) +svint64x3_t test_svset3_s64(svint64x3_t tuple, svint64_t x) ATTR { return SVE_ACLE_FUNC(svset3,_s64,,)(tuple, 1, x); } @@ -88,7 +94,7 @@ svint64x3_t test_svset3_s64(svint64x3_t tuple, svint64_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 32) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8x3_t test_svset3_u8(svuint8x3_t tuple, svuint8_t x) +svuint8x3_t test_svset3_u8(svuint8x3_t tuple, svuint8_t x) ATTR { return SVE_ACLE_FUNC(svset3,_u8,,)(tuple, 2, x); } @@ -103,7 +109,7 @@ svuint8x3_t test_svset3_u8(svuint8x3_t tuple, svuint8_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16x3_t test_svset3_u16(svuint16x3_t tuple, svuint16_t x) +svuint16x3_t test_svset3_u16(svuint16x3_t tuple, svuint16_t x) ATTR { return SVE_ACLE_FUNC(svset3,_u16,,)(tuple, 0, x); } @@ -118,7 +124,7 @@ svuint16x3_t test_svset3_u16(svuint16x3_t tuple, svuint16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32x3_t test_svset3_u32(svuint32x3_t tuple, svuint32_t x) +svuint32x3_t test_svset3_u32(svuint32x3_t tuple, svuint32_t x) ATTR { return SVE_ACLE_FUNC(svset3,_u32,,)(tuple, 1, x); } @@ -133,7 +139,7 @@ svuint32x3_t test_svset3_u32(svuint32x3_t tuple, svuint32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint64x3_t test_svset3_u64(svuint64x3_t tuple, svuint64_t x) +svuint64x3_t test_svset3_u64(svuint64x3_t tuple, svuint64_t x) ATTR { return SVE_ACLE_FUNC(svset3,_u64,,)(tuple, 2, x); } @@ -148,7 +154,7 @@ svuint64x3_t test_svset3_u64(svuint64x3_t tuple, svuint64_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16x3_t test_svset3_f16(svfloat16x3_t tuple, svfloat16_t x) +svfloat16x3_t test_svset3_f16(svfloat16x3_t tuple, svfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset3,_f16,,)(tuple, 0, x); } @@ -163,7 +169,7 @@ svfloat16x3_t test_svset3_f16(svfloat16x3_t tuple, svfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32x3_t test_svset3_f32(svfloat32x3_t tuple, svfloat32_t x) +svfloat32x3_t test_svset3_f32(svfloat32x3_t tuple, svfloat32_t x) ATTR { return SVE_ACLE_FUNC(svset3,_f32,,)(tuple, 1, x); } @@ -178,7 +184,7 @@ svfloat32x3_t test_svset3_f32(svfloat32x3_t tuple, svfloat32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat64x3_t test_svset3_f64(svfloat64x3_t tuple, svfloat64_t x) +svfloat64x3_t test_svset3_f64(svfloat64x3_t tuple, svfloat64_t x) ATTR { return SVE_ACLE_FUNC(svset3,_f64,,)(tuple, 2, x); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c index d1d5fb02d5463..f7124ac2ac4b7 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,11 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif // CHECK-LABEL: @test_svset4_bf16_0( // CHECK-NEXT: entry: @@ -26,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x4_t test_svset4_bf16_0(svbfloat16x4_t tuple, svbfloat16_t x) +svbfloat16x4_t test_svset4_bf16_0(svbfloat16x4_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 0, x); } @@ -41,7 +47,7 @@ svbfloat16x4_t test_svset4_bf16_0(svbfloat16x4_t tuple, svbfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x4_t test_svset4_bf16_1(svbfloat16x4_t tuple, svbfloat16_t x) +svbfloat16x4_t test_svset4_bf16_1(svbfloat16x4_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 1, x); } @@ -56,7 +62,7 @@ svbfloat16x4_t test_svset4_bf16_1(svbfloat16x4_t tuple, svbfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x4_t test_svset4_bf16_2(svbfloat16x4_t tuple, svbfloat16_t x) +svbfloat16x4_t test_svset4_bf16_2(svbfloat16x4_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 2, x); } @@ -71,7 +77,7 @@ svbfloat16x4_t test_svset4_bf16_2(svbfloat16x4_t tuple, svbfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbfloat16x4_t test_svset4_bf16_3(svbfloat16x4_t tuple, svbfloat16_t x) +svbfloat16x4_t test_svset4_bf16_3(svbfloat16x4_t tuple, svbfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_bf16,,)(tuple, 3, x); } diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c index e4ece8c2a65ff..ce35bfb83c88d 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -15,6 +16,11 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif // CHECK-LABEL: @test_svset4_s8( // CHECK-NEXT: entry: @@ -26,7 +32,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint8x4_t test_svset4_s8(svint8x4_t tuple, svint8_t x) +svint8x4_t test_svset4_s8(svint8x4_t tuple, svint8_t x) ATTR { return SVE_ACLE_FUNC(svset4,_s8,,)(tuple, 1, x); } @@ -41,7 +47,7 @@ svint8x4_t test_svset4_s8(svint8x4_t tuple, svint8_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 24) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint16x4_t test_svset4_s16(svint16x4_t tuple, svint16_t x) +svint16x4_t test_svset4_s16(svint16x4_t tuple, svint16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_s16,,)(tuple, 3, x); } @@ -56,7 +62,7 @@ svint16x4_t test_svset4_s16(svint16x4_t tuple, svint16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint32x4_t test_svset4_s32(svint32x4_t tuple, svint32_t x) +svint32x4_t test_svset4_s32(svint32x4_t tuple, svint32_t x) ATTR { return SVE_ACLE_FUNC(svset4,_s32,,)(tuple, 1, x); } @@ -71,7 +77,7 @@ svint32x4_t test_svset4_s32(svint32x4_t tuple, svint32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 2) // CPP-CHECK-NEXT: ret [[TMP0]] // -svint64x4_t test_svset4_s64(svint64x4_t tuple, svint64_t x) +svint64x4_t test_svset4_s64(svint64x4_t tuple, svint64_t x) ATTR { return SVE_ACLE_FUNC(svset4,_s64,,)(tuple, 1, x); } @@ -86,7 +92,7 @@ svint64x4_t test_svset4_s64(svint64x4_t tuple, svint64_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 48) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint8x4_t test_svset4_u8(svuint8x4_t tuple, svuint8_t x) +svuint8x4_t test_svset4_u8(svuint8x4_t tuple, svuint8_t x) ATTR { return SVE_ACLE_FUNC(svset4,_u8,,)(tuple, 3, x); } @@ -101,7 +107,7 @@ svuint8x4_t test_svset4_u8(svuint8x4_t tuple, svuint8_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint16x4_t test_svset4_u16(svuint16x4_t tuple, svuint16_t x) +svuint16x4_t test_svset4_u16(svuint16x4_t tuple, svuint16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_u16,,)(tuple, 1, x); } @@ -116,7 +122,7 @@ svuint16x4_t test_svset4_u16(svuint16x4_t tuple, svuint16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint32x4_t test_svset4_u32(svuint32x4_t tuple, svuint32_t x) +svuint32x4_t test_svset4_u32(svuint32x4_t tuple, svuint32_t x) ATTR { return SVE_ACLE_FUNC(svset4,_u32,,)(tuple, 1, x); } @@ -131,7 +137,7 @@ svuint32x4_t test_svset4_u32(svuint32x4_t tuple, svuint32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP0]] // -svuint64x4_t test_svset4_u64(svuint64x4_t tuple, svuint64_t x) +svuint64x4_t test_svset4_u64(svuint64x4_t tuple, svuint64_t x) ATTR { return SVE_ACLE_FUNC(svset4,_u64,,)(tuple, 3, x); } @@ -146,7 +152,7 @@ svuint64x4_t test_svset4_u64(svuint64x4_t tuple, svuint64_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TUPLE:%.*]], [[X:%.*]], i64 8) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat16x4_t test_svset4_f16(svfloat16x4_t tuple, svfloat16_t x) +svfloat16x4_t test_svset4_f16(svfloat16x4_t tuple, svfloat16_t x) ATTR { return SVE_ACLE_FUNC(svset4,_f16,,)(tuple, 1, x); } @@ -161,7 +167,7 @@ svfloat16x4_t test_svset4_f16(svfloat16x4_t tuple, svfloat16_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TUPLE:%.*]], [[X:%.*]], i64 4) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat32x4_t test_svset4_f32(svfloat32x4_t tuple, svfloat32_t x) +svfloat32x4_t test_svset4_f32(svfloat32x4_t tuple, svfloat32_t x) ATTR { return SVE_ACLE_FUNC(svset4,_f32,,)(tuple, 1, x); } @@ -176,7 +182,7 @@ svfloat32x4_t test_svset4_f32(svfloat32x4_t tuple, svfloat32_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TUPLE:%.*]], [[X:%.*]], i64 6) // CPP-CHECK-NEXT: ret [[TMP0]] // -svfloat64x4_t test_svset4_f64(svfloat64x4_t tuple, svfloat64_t x) +svfloat64x4_t test_svset4_f64(svfloat64x4_t tuple, svfloat64_t x) ATTR { return SVE_ACLE_FUNC(svset4,_f64,,)(tuple, 3, x); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c index 0cf7902bdfc76..c4cbec11bf324 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c @@ -5,14 +5,14 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -25,6 +25,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate2_b( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[X0:%.*]], i64 0) @@ -37,7 +43,7 @@ // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP0]], [[X1:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP1]] // -svboolx2_t test_svcreate2_b(svbool_t x0, svbool_t x1) +svboolx2_t test_svcreate2_b(svbool_t x0, svbool_t x1) ATTR { return SVE_ACLE_FUNC(svcreate2,_b,,)(x0, x1); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create4_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create4_bool.c index 01b2a17809f68..bc889d9f60183 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create4_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create4_bool.c @@ -5,14 +5,14 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -25,6 +25,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svcreate4_b( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( poison, [[X0:%.*]], i64 0) @@ -41,7 +47,7 @@ // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TMP2]], [[X4:%.*]], i64 48) // CPP-CHECK-NEXT: ret [[TMP3]] // -svboolx4_t test_svcreate4_b(svbool_t x0, svbool_t x1, svbool_t x2, svbool_t x4) +svboolx4_t test_svcreate4_b(svbool_t x0, svbool_t x1, svbool_t x2, svbool_t x4) ATTR { return SVE_ACLE_FUNC(svcreate4,_b,,)(x0, x1, x2, x4); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get2_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get2_bool.c index 0bb57bf6bc57d..35e6f1b84ab1b 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get2_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get2_bool.c @@ -5,14 +5,14 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target #include @@ -23,6 +23,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svget2_b_0( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv32i1( [[TUPLE:%.*]], i64 0) @@ -33,7 +39,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv32i1( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbool_t test_svget2_b_0(svboolx2_t tuple) +svbool_t test_svget2_b_0(svboolx2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_b,,)(tuple, 0); } @@ -48,7 +54,7 @@ svbool_t test_svget2_b_0(svboolx2_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv32i1( [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbool_t test_svget2_b_1(svboolx2_t tuple) +svbool_t test_svget2_b_1(svboolx2_t tuple) ATTR { return SVE_ACLE_FUNC(svget2,_b,,)(tuple, 1); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c index bfef3c56a3513..8a5f9568e3677 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c @@ -5,14 +5,14 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -25,6 +25,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // NOTE: For these tests clang converts the struct parameter into // several parameters, one for each member of the original struct. // CHECK-LABEL: @test_svget4_b_0( @@ -37,7 +43,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv64i1( [[TUPLE:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbool_t test_svget4_b_0(svboolx4_t tuple) +svbool_t test_svget4_b_0(svboolx4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_b,,)(tuple, 0); } @@ -54,7 +60,7 @@ svbool_t test_svget4_b_0(svboolx4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv64i1( [[TUPLE:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbool_t test_svget4_b_1(svboolx4_t tuple) +svbool_t test_svget4_b_1(svboolx4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_b,,)(tuple, 1); } @@ -71,7 +77,7 @@ svbool_t test_svget4_b_1(svboolx4_t tuple) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv64i1( [[TUPLE:%.*]], i64 48) // CPP-CHECK-NEXT: ret [[TMP0]] // -svbool_t test_svget4_b_3(svboolx4_t tuple) +svbool_t test_svget4_b_3(svboolx4_t tuple) ATTR { return SVE_ACLE_FUNC(svget4,_b,,)(tuple, 3); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set2_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set2_bool.c index 21e7282e9ffb6..75c8d035aedb0 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set2_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set2_bool.c @@ -5,14 +5,14 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target @@ -25,6 +25,12 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming +#endif + // CHECK-LABEL: @test_svset2_b_0( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 0) @@ -35,7 +41,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svboolx2_t test_svset2_b_0(svboolx2_t tuple, svbool_t x) +svboolx2_t test_svset2_b_0(svboolx2_t tuple, svbool_t x) ATTR { return SVE_ACLE_FUNC(svset2,_b,,)(tuple, 0, x); } @@ -50,7 +56,7 @@ svboolx2_t test_svset2_b_0(svboolx2_t tuple, svbool_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svboolx2_t test_svset2_b_1(svboolx2_t tuple, svbool_t x) +svboolx2_t test_svset2_b_1(svboolx2_t tuple, svbool_t x) ATTR { return SVE_ACLE_FUNC(svset2,_b,,)(tuple, 1, x); } diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set4_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set4_bool.c index 9c0233646a7a4..d68810352693b 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set4_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set4_bool.c @@ -5,14 +5,14 @@ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s\ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s \ // RUN: | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -DTEST_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target #include @@ -24,6 +24,11 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming +#endif // CHECK-LABEL: @test_svset4_b_0( // CHECK-NEXT: entry: @@ -35,7 +40,7 @@ // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 0) // CPP-CHECK-NEXT: ret [[TMP0]] // -svboolx4_t test_svset4_b_0(svboolx4_t tuple, svbool_t x) +svboolx4_t test_svset4_b_0(svboolx4_t tuple, svbool_t x) ATTR { return SVE_ACLE_FUNC(svset4,_b,,)(tuple, 0, x); } @@ -50,7 +55,7 @@ svboolx4_t test_svset4_b_0(svboolx4_t tuple, svbool_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 16) // CPP-CHECK-NEXT: ret [[TMP0]] // -svboolx4_t test_svset4_b_1(svboolx4_t tuple, svbool_t x) +svboolx4_t test_svset4_b_1(svboolx4_t tuple, svbool_t x) ATTR { return SVE_ACLE_FUNC(svset4,_b,,)(tuple, 1, x); } @@ -65,7 +70,7 @@ svboolx4_t test_svset4_b_1(svboolx4_t tuple, svbool_t x) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 48) // CPP-CHECK-NEXT: ret [[TMP0]] // -svboolx4_t test_svset4_b_3(svboolx4_t tuple, svbool_t x) +svboolx4_t test_svset4_b_3(svboolx4_t tuple, svbool_t x) ATTR { return SVE_ACLE_FUNC(svset4,_b,,)(tuple, 3, x); }