diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a429a3c5fe378..cbc2af73d6052 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2077,11 +2077,11 @@ def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aar def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } -let TargetGuard = "sve2p1" in { +let TargetGuard = "sve2p1|sme" in { def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [], []>; def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>; -defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; +defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlbhfd", "aarch64_sve_revd">; } let TargetGuard = "sve2p1|sme2" in { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c index 9d912c5d9e276..74a90583a173a 100644 --- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c @@ -1,14 +1,15 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \ // RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ // RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \ // RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK - +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s #include #ifdef SVE_OVERLOADED_FORMS @@ -388,3 +389,196 @@ svuint32_t test_svrevd_u32_x(svbool_t pg, svuint32_t op) { svuint64_t test_svrevd_u64_x(svbool_t pg, svuint64_t op) { return SVE_ACLE_FUNC(svrevd, _u64, _x, )(pg, op); } + + +// CHECK-LABEL: @test_svrevd_bf16_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8bf16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z18test_svrevd_bf16_zu10__SVBool_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8bf16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svrevd_bf16_z(svbool_t pg, svbfloat16_t op) { + return SVE_ACLE_FUNC(svrevd, _bf16, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_f16_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8f16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f16_zu10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8f16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat16_t test_svrevd_f16_z(svbool_t pg, svfloat16_t op) { + return SVE_ACLE_FUNC(svrevd, _f16, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_f32_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4f32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f32_zu10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4f32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svrevd_f32_z(svbool_t pg, svfloat32_t op) { + return SVE_ACLE_FUNC(svrevd, _f32, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_f64_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2f64( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f64_zu10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2f64( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat64_t test_svrevd_f64_z(svbool_t pg, svfloat64_t op) { + return SVE_ACLE_FUNC(svrevd, _f64, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_bf16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8bf16( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z18test_svrevd_bf16_mu14__SVBfloat16_tu10__SVBool_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8bf16( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svrevd_bf16_m(svbfloat16_t inactive, svbool_t pg, svbfloat16_t op) { + return SVE_ACLE_FUNC(svrevd, _bf16, _m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svrevd_f16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8f16( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f16_mu13__SVFloat16_tu10__SVBool_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8f16( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat16_t test_svrevd_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) { + return SVE_ACLE_FUNC(svrevd, _f16, _m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svrevd_f32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4f32( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f32_mu13__SVFloat32_tu10__SVBool_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4f32( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svrevd_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) { + return SVE_ACLE_FUNC(svrevd, _f32, _m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svrevd_f64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2f64( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f64_mu13__SVFloat64_tu10__SVBool_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2f64( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat64_t test_svrevd_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) { + return SVE_ACLE_FUNC(svrevd, _f64, _m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svrevd_bf16_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8bf16( undef, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z18test_svrevd_bf16_xu10__SVBool_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8bf16( undef, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svrevd_bf16_x(svbool_t pg, svbfloat16_t op) { + return SVE_ACLE_FUNC(svrevd, _bf16, _x, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_f16_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8f16( undef, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f16_xu10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8f16( undef, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat16_t test_svrevd_f16_x(svbool_t pg, svfloat16_t op) { + return SVE_ACLE_FUNC(svrevd, _f16, _x, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_f32_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4f32( undef, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f32_xu10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4f32( undef, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svrevd_f32_x(svbool_t pg, svfloat32_t op) { + return SVE_ACLE_FUNC(svrevd, _f32, _x, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_f64_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2f64( undef, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_f64_xu10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2f64( undef, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat64_t test_svrevd_f64_x(svbool_t pg, svfloat64_t op) { + return SVE_ACLE_FUNC(svrevd, _f64, _x, )(pg, op); +} diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 4f8917618ea40..659e2ddc5d543 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -1259,6 +1259,12 @@ multiclass sve2_int_perm_revd { def : SVE_1_Op_Passthru_Pat(NAME)>; def : SVE_1_Op_Passthru_Pat(NAME)>; def : SVE_1_Op_Passthru_Pat(NAME)>; + + def : SVE_1_Op_Passthru_Pat(NAME)>; + def : SVE_1_Op_Passthru_Pat(NAME)>; + def : SVE_1_Op_Passthru_Pat(NAME)>; + def : SVE_1_Op_Passthru_Pat(NAME)>; + } class sve2_clamp sz, bit U, ZPRRegOp zpr_ty> diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-revd.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-revd.ll index 87b15baffa0d2..dd02e1a89e9ac 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-revd.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-revd.ll @@ -37,7 +37,48 @@ define @test_revd_i64( %a, %res } +define @test_revd_bf16( %a, %pg, %b) { +; CHECK-LABEL: test_revd_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: revd z0.q, p0/m, z1.q +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.revd.nxv8bf16( %a, %pg, %b) + ret %res +} + +define @test_revd_f16( %a, %pg, %b) { +; CHECK-LABEL: test_revd_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: revd z0.q, p0/m, z1.q +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.revd.nxv8f16( %a, %pg, %b) + ret %res +} + +define @test_revd_f32( %a, %pg, %b) { +; CHECK-LABEL: test_revd_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: revd z0.q, p0/m, z1.q +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.revd.nxv4f32( %a, %pg, %b) + ret %res +} + +define @test_revd_f64( %a, %pg, %b) { +; CHECK-LABEL: test_revd_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: revd z0.q, p0/m, z1.q +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.revd.nxv2f64( %a, %pg, %b) + ret %res +} + declare @llvm.aarch64.sve.revd.nxv16i8(, , ) declare @llvm.aarch64.sve.revd.nxv8i16(, , ) declare @llvm.aarch64.sve.revd.nxv4i32(, , ) declare @llvm.aarch64.sve.revd.nxv2i64(, , ) + +declare @llvm.aarch64.sve.revd.nxv8bf16(, , ) +declare @llvm.aarch64.sve.revd.nxv8f16(, , ) +declare @llvm.aarch64.sve.revd.nxv4f32(, , ) +declare @llvm.aarch64.sve.revd.nxv2f64(, , )