diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index d2b7b78b9970f..f9402659b4254 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2082,6 +2082,13 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,sve-b16b16"in { defm SVBFMAXNM : BfSingleMultiVector<"maxnm">; } +let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,sve-bfscale" in { + // BFMUL + defm SVBFMUL : BfSingleMultiVector<"mul">; + // BFSCALE + defm SVBFSCALE : BfSingleMultiVector<"scale">; +} + let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { // == ADD (vectors) == def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; @@ -2389,3 +2396,11 @@ let SVETargetGuard = "sve2,fp8fma", SMETargetGuard = "ssve-fp8fma" in { def SVFMLALLTB_LANE : SInst<"svmlalltb_lane[_f32_mf8]", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fmlalltb_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; def SVFMLALLTT_LANE : SInst<"svmlalltt_lane[_f32_mf8]", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fmlalltt_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; } + +let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2p2" in { + def FMUL_SINGLE_X2 : SInst<"svmul[_single_{d}_x2]", "22d", "hfd", MergeNone, "aarch64_sve_fmul_single_x2", [IsStreaming], []>; + def FMUL_SINGLE_X4 : SInst<"svmul[_single_{d}_x4]", "44d", "hfd", MergeNone, "aarch64_sve_fmul_single_x4", [IsStreaming], []>; + + def FMUL_X2 : SInst<"svmul[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sve_fmul_x2", [IsStreaming], []>; + def FMUL_X4 : SInst<"svmul[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sve_fmul_x4", [IsStreaming], []>; +} diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_bfmul.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_bfmul.c new file mode 100644 index 0000000000000..187e9390f742c --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_bfmul.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-bfscale -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-bfscale -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-bfscale -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-bfscale -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-bfscale -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// CHECK-LABEL: define dso_local { , } @test_svmul_single_bf16_x2( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.single.x2.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , } @_Z25test_svmul_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.single.x2.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svbfloat16x2_t test_svmul_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_single_bf16_x2)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , , , } @test_svmul_single_bf16_x4( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.single.x4.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z25test_svmul_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.single.x4.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svbfloat16x4_t test_svmul_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_single_bf16_x4)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , } @test_svmul_bf16_x2( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.x2.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , } @_Z18test_svmul_bf16_x214svbfloat16x2_tS_( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.x2.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svbfloat16x2_t test_svmul_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_bf16_x2)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , , , } @test_svmul_bf16_x4( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.x4.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z18test_svmul_bf16_x414svbfloat16x4_tS_( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.x4.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svbfloat16x4_t test_svmul_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_bf16_x4)(zdn, zm); +} diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_bfscale.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_bfscale.c new file mode 100644 index 0000000000000..6f8606c22954f --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_bfscale.c @@ -0,0 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-bfscale -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-bfscale -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-bfscale -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-bfscale -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve-bfscale -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// CHECK-LABEL: define dso_local { , } @test_svscale_single_bf16_x2( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fscale.single.x2.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , } @_Z27test_svscale_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fscale.single.x2.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svbfloat16x2_t test_svscale_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svscale,_single_bf16_x2)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , , , } @test_svscale_single_bf16_x4( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fscale.single.x4.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z27test_svscale_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fscale.single.x4.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svbfloat16x4_t test_svscale_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svscale,_single_bf16_x4)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , } @test_svscale_bf16_x2( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fscale.x2.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , } @_Z20test_svscale_bf16_x214svbfloat16x2_tS_( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fscale.x2.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svbfloat16x2_t test_svscale_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svscale,_bf16_x2)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , , , } @test_svscale_bf16_x4( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fscale.x4.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z20test_svscale_bf16_x414svbfloat16x4_tS_( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fscale.x4.nxv8bf16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svbfloat16x4_t test_svscale_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svscale,_bf16_x4)(zdn, zm); +} diff --git a/clang/test/CodeGen/AArch64/sme2p2-intrinsics/acle_sme2p2_fmul.c b/clang/test/CodeGen/AArch64/sme2p2-intrinsics/acle_sme2p2_fmul.c new file mode 100644 index 0000000000000..52e2a5e4b96d2 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme2p2-intrinsics/acle_sme2p2_fmul.c @@ -0,0 +1,198 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// CHECK-LABEL: define dso_local { , } @test_svmul_single_f16_x2( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.single.x2.nxv8f16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svmul_single_f16_x213svfloat16x2_tu13__SVFloat16_t( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.single.x2.nxv8f16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svfloat16x2_t test_svmul_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_single_f16_x2)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , , , } @test_svmul_single_f16_x4( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.single.x4.nxv8f16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svmul_single_f16_x413svfloat16x4_tu13__SVFloat16_t( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.single.x4.nxv8f16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svfloat16x4_t test_svmul_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_single_f16_x4)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , } @test_svmul_f16_x2( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.x2.nxv8f16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , } @_Z17test_svmul_f16_x213svfloat16x2_tS_( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.x2.nxv8f16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svfloat16x2_t test_svmul_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_f16_x2)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , , , } @test_svmul_f16_x4( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.x4.nxv8f16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z17test_svmul_f16_x413svfloat16x4_tS_( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.x4.nxv8f16( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svfloat16x4_t test_svmul_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_f16_x4)(zdn, zm); +} + +// CHECK-LABEL: define dso_local { , } @test_svmul_single_f32_x2( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.single.x2.nxv4f32( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svmul_single_f32_x213svfloat32x2_tu13__SVFloat32_t( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.single.x2.nxv4f32( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svfloat32x2_t test_svmul_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_single_f32_x2)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , , , } @test_svmul_single_f32_x4( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.single.x4.nxv4f32( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svmul_single_f32_x413svfloat32x4_tu13__SVFloat32_t( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.single.x4.nxv4f32( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svfloat32x4_t test_svmul_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_single_f32_x4)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , } @test_svmul_f32_x2( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.x2.nxv4f32( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , } @_Z17test_svmul_f32_x213svfloat32x2_tS_( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.x2.nxv4f32( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svfloat32x2_t test_svmul_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_f32_x2)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , , , } @test_svmul_f32_x4( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.x4.nxv4f32( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z17test_svmul_f32_x413svfloat32x4_tS_( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.x4.nxv4f32( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svfloat32x4_t test_svmul_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_f32_x4)(zdn, zm); +} + +// CHECK-LABEL: define dso_local { , } @test_svmul_single_f64_x2( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.single.x2.nxv2f64( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svmul_single_f64_x213svfloat64x2_tu13__SVFloat64_t( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.single.x2.nxv2f64( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svfloat64x2_t test_svmul_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_single_f64_x2)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , , , } @test_svmul_single_f64_x4( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.single.x4.nxv2f64( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svmul_single_f64_x413svfloat64x4_tu13__SVFloat64_t( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.single.x4.nxv2f64( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svfloat64x4_t test_svmul_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_single_f64_x4)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , } @test_svmul_f64_x2( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.x2.nxv2f64( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) +// CHECK-NEXT: ret { , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , } @_Z17test_svmul_f64_x213svfloat64x2_tS_( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmul.x2.nxv2f64( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret { , } [[TMP0]] +// +svfloat64x2_t test_svmul_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_f64_x2)(zdn, zm); +} +// CHECK-LABEL: define dso_local { , , , } @test_svmul_f64_x4( +// CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.x4.nxv2f64( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) +// CHECK-NEXT: ret { , , , } [[TMP0]] +// +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z17test_svmul_f64_x413svfloat64x4_tS_( +// CPP-CHECK-SAME: [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmul.x4.nxv2f64( [[ZDN_COERCE0]], [[ZDN_COERCE1]], [[ZDN_COERCE2]], [[ZDN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] +// +svfloat64x4_t test_svmul_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming{ + return SVE_ACLE_FUNC(svmul,_f64_x4)(zdn, zm); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b0269eec3347a..09cc158dc9767 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3627,10 +3627,10 @@ let TargetPrefix = "aarch64" in { } // - // Multi-vector floating point min/max number + // Multi-vector floating point min/max number, scale, and multiply // - foreach instr = ["fmaxnm", "fminnm"] in { + foreach instr = ["fmaxnm", "fminnm", "fscale", "fmul"] in { def int_aarch64_sve_ # instr # _single_x2 : SVE2_VG2_Multi_Single_Intrinsic; def int_aarch64_sve_ # instr # _single_x4 : SVE2_VG4_Multi_Single_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index e7b2d20e2a6cb..bd2785da59ad1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -6220,6 +6220,26 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D})) SelectDestructiveMultiIntrinsic(Node, 4, false, Op); return; + case Intrinsic::aarch64_sve_fscale_single_x4: + SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ); + return; + case Intrinsic::aarch64_sve_fscale_single_x2: + SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ); + return; + case Intrinsic::aarch64_sve_fmul_single_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S, + AArch64::FMUL_4ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 4, false, Op); + return; + case Intrinsic::aarch64_sve_fmul_single_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S, + AArch64::FMUL_2ZZ_D})) + SelectDestructiveMultiIntrinsic(Node, 2, false, Op); + return; case Intrinsic::aarch64_sve_fmaxnm_x2: if (auto Op = SelectOpcodeFromVT( Node->getValueType(0), @@ -6248,6 +6268,26 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D})) SelectDestructiveMultiIntrinsic(Node, 4, true, Op); return; + case Intrinsic::aarch64_sve_fscale_x4: + SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z); + return; + case Intrinsic::aarch64_sve_fscale_x2: + SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z); + return; + case Intrinsic::aarch64_sve_fmul_x4: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S, + AArch64::FMUL_4Z4Z_D})) + SelectDestructiveMultiIntrinsic(Node, 4, true, Op); + return; + case Intrinsic::aarch64_sve_fmul_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S, + AArch64::FMUL_2Z2Z_D})) + SelectDestructiveMultiIntrinsic(Node, 2, true, Op); + return; case Intrinsic::aarch64_sve_fcvtzs_x2: SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); return; diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfmul.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfmul.ll new file mode 100644 index 0000000000000..0ab82db690c28 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfmul.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve-bfscale -force-streaming -verify-machineinstrs < %s | FileCheck %s + +define { , } @multi_vec_scale_single_x2_bf16( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfmul { z0.h, z1.h }, { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmul.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , , , } @multi_vec_scale_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: bfmul { z0.h - z3.h }, { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fmul.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , } @multi_vec_scale_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_scale_x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: bfmul { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmul.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , , , } @multi_vec_scale_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_scale_x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: bfmul { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fmul.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfscale.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfscale.ll new file mode 100644 index 0000000000000..97f0570fe8ca0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfscale.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve-bfscale -force-streaming -verify-machineinstrs < %s | FileCheck %s + +define { , } @multi_vec_scale_single_x2_bf16( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfscale { z0.h, z1.h }, { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fscale.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , , , } @multi_vec_scale_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_scale_single_x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: bfscale { z0.h - z3.h }, { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fscale.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , } @multi_vec_scale_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_scale_x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: bfscale { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fscale.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , , , } @multi_vec_scale_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_scale_x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: bfscale { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fscale.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} diff --git a/llvm/test/CodeGen/AArch64/sme2p2-intrinsics-fmul.ll b/llvm/test/CodeGen/AArch64/sme2p2-intrinsics-fmul.ll new file mode 100644 index 0000000000000..cdd032cd2a52c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2p2-intrinsics-fmul.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p2 -force-streaming -verify-machineinstrs < %s | FileCheck %s + +define { , } @multi_vec_mul_single_x2_f16( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_mul_single_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmul { z0.h, z1.h }, { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmul.single.x2.nxv8f16( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , , , } @multi_vec_mul_single_x4_f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_mul_single_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmul { z0.h - z3.h }, { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fmul.single.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , } @multi_vec_mul_x2_f16( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_mul_x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fmul { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmul.x2.nxv8f16( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , , , } @multi_vec_mul_x4_f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_mul_x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fmul { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fmul.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , } @multi_vec_mul_single_x2_f32( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_mul_single_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmul { z0.s, z1.s }, { z0.s, z1.s }, z2.s +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmul.single.x2.nxv4f32( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , , , } @multi_vec_mul_single_x4_f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_mul_single_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmul { z0.s - z3.s }, { z0.s - z3.s }, z4.s +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fmul.single.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , } @multi_vec_mul_x2_f32( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_mul_x2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fmul { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmul.x2.nxv4f32( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , , , } @multi_vec_mul_x4_f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_mul_x4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fmul { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fmul.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +} + +define { , } @multi_vec_mul_single_x2_f64( %zdn1, %zdn2, %zm) { +; CHECK-LABEL: multi_vec_mul_single_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmul { z0.d, z1.d }, { z0.d, z1.d }, z2.d +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmul.single.x2.nxv2f64( %zdn1, %zdn2, %zm) + ret { , } %res +} + +define { , , , } @multi_vec_mul_single_x4_f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { +; CHECK-LABEL: multi_vec_mul_single_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmul { z0.d - z3.d }, { z0.d - z3.d }, z4.d +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fmul.single.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) + ret { , , , } %res +} + +define { , } @multi_vec_mul_x2_f64( %zdn1, %zdn2, %zm1, %zm2) { +; CHECK-LABEL: multi_vec_mul_x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: fmul { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.fmul.x2.nxv2f64( %zdn1, %zdn2, %zm1, %zm2) + ret { , } %res +} + +define { , , , } @multi_vec_mul_x4_f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { +; CHECK-LABEL: multi_vec_mul_x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: fmul { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.fmul.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) + ret { , , , } %res +}