[SveEmitter] Add immediate checks for lanes and complex imms

Adds another bunch of of intrinsics that take immediates with varying ranges based, some being a complex rotation immediate which are a set of allowed immediates rather than a range. svmla_lane: lane immediate ranging 0..(128/(1*sizeinbits(elt)) - 1) svcmla_lane: lane immediate ranging 0..(128/(2*sizeinbits(elt)) - 1) svdot_lane: lane immediate ranging 0..(128/(4*sizeinbits(elt)) - 1) svcadd: complex rotate immediate [90, 270] svcmla: svcmla_lane: complex rotate immediate [0, 90, 180, 270] Reviewers: efriedma, SjoerdMeijer, rovka Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D76680
llvm · Apr 20, 2020 · fc64539 · fc64539
1 parent a8a31fd
commit fc64539
Show file tree

Hide file tree

Showing 11 changed files with 483 additions and 0 deletions.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9244,6 +9244,10 @@ def err_argument_not_shifted_byte : Error<
   "argument should be an 8-bit value shifted by a multiple of 8 bits">;
 def err_argument_not_shifted_byte_or_xxff : Error<
   "argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF">;
+def err_rotation_argument_to_cadd
+    : Error<"argument should be the value 90 or 270">;
+def err_rotation_argument_to_cmla
+    : Error<"argument should be the value 0, 90, 180 or 270">;
 def warn_neon_vector_initializer_non_portable : Warning<
   "vector initializers are not compatible with NEON intrinsics in big endian "
   "mode">, InGroup<DiagGroup<"nonportable-vector-initialization">>;

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
@@ -62,7 +62,10 @@
 // d: default
 // c: const pointer type
 // P: predicate type
+// e: 1/2 width unsigned elements, 2x element count
 // h: 1/2 width elements, 2x element count
+// q: 1/4 width elements, 4x element count
+// o: 4x width elements, 1/4 element count
 //
 // i: constant uint64_t
 //
@@ -164,6 +167,11 @@ def ImmCheckShiftRight          : ImmCheckType<3>;  // 1..sizeinbits(elt)
 def ImmCheckShiftRightNarrow    : ImmCheckType<4>;  // 1..sizeinbits(elt)/2
 def ImmCheckShiftLeft           : ImmCheckType<5>;  // 0..(sizeinbits(elt) - 1)
 def ImmCheck0_7                 : ImmCheckType<6>;  // 0..7
+def ImmCheckLaneIndex           : ImmCheckType<7>;  // 0..(128/(1*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexCompRotate : ImmCheckType<8>;  // 0..(128/(2*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexDot        : ImmCheckType<9>;  // 0..(128/(4*sizeinbits(elt)) - 1)
+def ImmCheckComplexRot90_270    : ImmCheckType<10>; // [90,270]
+def ImmCheckComplexRotAll90     : ImmCheckType<11>; // [0, 90, 180,270]
 
 class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
   int Arg = arg;
@@ -312,7 +320,19 @@ def SVQSHLU_M  : SInst<"svqshlu[_n_{d}]", "uPdi", "csil",         MergeOp1,  "aa
 // Floating-point arithmetic
 def SVTMAD  : SInst<"svtmad[_{d}]",  "dddi", "hfd", MergeNone, "aarch64_sve_ftmad_x", [], [ImmCheck<2, ImmCheck0_7>]>;
 
+def SVMLA_LANE  : SInst<"svmla_lane[_{d}]",  "ddddi",  "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf",  MergeNone, "aarch64_sve_fcmla_lane", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>,
+                                                                                                        ImmCheck<4, ImmCheckComplexRotAll90>]>;
+
+def SVCADD_M : SInst<"svcadd[_{d}]", "dPddi",  "hfd", MergeOp1,  "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>;
+def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1,  "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>;
+
 ////////////////////////////////////////////////////////////////////////////////
 // Saturating scalar arithmetic
 def SVQDECH_S : SInst<"svqdech_pat[_{d}]",   "ddIi", "s", MergeNone, "aarch64_sve_sqdech", [], [ImmCheck<2, ImmCheck1_16>]>;
 def SVQDECH_U : SInst<"svqdech_pat[_{d}]",   "ddIi", "Us", MergeNone, "aarch64_sve_uqdech", [], [ImmCheck<2, ImmCheck1_16>]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer arithmetic
+def SVDOT_LANE_S : SInst<"svdot_lane[_{d}]",  "ddqqi",  "il",   MergeNone, "aarch64_sve_sdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
+def SVDOT_LANE_U : SInst<"svdot_lane[_{d}]",  "ddqqi",  "UiUl", MergeNone, "aarch64_sve_udot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
@@ -2016,6 +2016,27 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     int ArgNum, CheckTy, ElementSizeInBits;
     std::tie(ArgNum, CheckTy, ElementSizeInBits) = I;
 
+    typedef bool(*OptionSetCheckFnTy)(int64_t Value);
+
+    // Function that checks whether the operand (ArgNum) is an immediate
+    // that is one of the predefined values.
+    auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm,
+                                   int ErrDiag) -> bool {
+      // We can't check the value of a dependent argument.
+      Expr *Arg = TheCall->getArg(ArgNum);
+      if (Arg->isTypeDependent() || Arg->isValueDependent())
+        return false;
+
+      // Check constant-ness first.
+      llvm::APSInt Imm;
+      if (SemaBuiltinConstantArg(TheCall, ArgNum, Imm))
+        return true;
+
+      if (!CheckImm(Imm.getSExtValue()))
+        return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
+      return false;
+    };
+
     switch ((SVETypeFlags::ImmCheckType)CheckTy) {
     case SVETypeFlags::ImmCheck0_31:
       if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
@@ -2048,6 +2069,34 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
                                       ElementSizeInBits - 1))
         HasError = true;
       break;
+    case SVETypeFlags::ImmCheckLaneIndex:
+      if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+                                      (128 / (1 * ElementSizeInBits)) - 1))
+        HasError = true;
+      break;
+    case SVETypeFlags::ImmCheckLaneIndexCompRotate:
+      if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+                                      (128 / (2 * ElementSizeInBits)) - 1))
+        HasError = true;
+      break;
+    case SVETypeFlags::ImmCheckLaneIndexDot:
+      if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+                                      (128 / (4 * ElementSizeInBits)) - 1))
+        HasError = true;
+      break;
+    case SVETypeFlags::ImmCheckComplexRot90_270:
+      if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
+                              diag::err_rotation_argument_to_cadd))
+        HasError = true;
+      break;
+    case SVETypeFlags::ImmCheckComplexRotAll90:
+      if (CheckImmediateInSet(
+              [](int64_t V) {
+                return V == 0 || V == 90 || V == 180 || V == 270;
+              },
+              diag::err_rotation_argument_to_cmla))
+        HasError = true;
+      break;
     }
   }
 

diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svfloat16_t test_svcmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // CHECK-LABEL: test_svcmla_f16_m
+  // CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> %[[PG]], <vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 180)
+  // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla,_f16,_m,)(pg, op1, op2, op3, 180);
+}
+
+svfloat32_t test_svcmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // CHECK-LABEL: test_svcmla_f32_m
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 270)
+  // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla,_f32,_m,)(pg, op1, op2, op3, 270);
+}
+
+svfloat64_t test_svcmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+{
+  // CHECK-LABEL: test_svcmla_f64_m
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %op3, i32 0)
+  // CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla,_f64,_m,)(pg, op1, op2, op3, 0);
+}
+
+svfloat16_t test_svcmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // CHECK-LABEL: test_svcmla_lane_f16
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 0, i32 0)
+  // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 0, 0);
+}
+
+svfloat16_t test_svcmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // CHECK-LABEL: test_svcmla_lane_f16_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 3, i32 90)
+  // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 3, 90);
+}
+
+svfloat32_t test_svcmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // CHECK-LABEL: test_svcmla_lane_f32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 0, i32 180)
+  // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 0, 180);
+}
+
+svfloat32_t test_svcmla_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // CHECK-LABEL: test_svcmla_lane_f32_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 1, i32 270)
+  // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 1, 270);
+}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svint32_t test_svdot_lane_s32(svint32_t op1, svint8_t op2, svint8_t op3)
+{
+  // CHECK-LABEL: test_svdot_lane_s32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %op3, i32 0)
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svdot_lane,_s32,,)(op1, op2, op3, 0);
+}
+
+svint32_t test_svdot_lane_s32_1(svint32_t op1, svint8_t op2, svint8_t op3)
+{
+  // CHECK-LABEL: test_svdot_lane_s32_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %op3, i32 3)
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svdot_lane,_s32,,)(op1, op2, op3, 3);
+}
+
+svint64_t test_svdot_lane_s64(svint64_t op1, svint16_t op2, svint16_t op3)
+{
+  // CHECK-LABEL: test_svdot_lane_s64
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.lane.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %op3, i32 0)
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svdot_lane,_s64,,)(op1, op2, op3, 0);
+}
+
+svint64_t test_svdot_lane_s64_1(svint64_t op1, svint16_t op2, svint16_t op3)
+{
+  // CHECK-LABEL: test_svdot_lane_s64_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.lane.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %op3, i32 1)
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svdot_lane,_s64,,)(op1, op2, op3, 1);
+}
+
+svuint32_t test_svdot_lane_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3)
+{
+  // CHECK-LABEL: test_svdot_lane_u32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %op3, i32 3)
+  // CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svdot_lane,_u32,,)(op1, op2, op3, 3);
+}
+
+svuint64_t test_svdot_lane_u64(svuint64_t op1, svuint16_t op2, svuint16_t op3)
+{
+  // CHECK-LABEL: test_svdot_lane_u64
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.udot.lane.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %op3, i32 1)
+  // CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svdot_lane,_u64,,)(op1, op2, op3, 1);
+}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+svfloat16_t test_svmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // CHECK-LABEL: test_svmla_lane_f16
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 0)
+  // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svmla_lane,_f16,,)(op1, op2, op3, 0);
+}
+
+svfloat16_t test_svmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  // CHECK-LABEL: test_svmla_lane_f16_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half> %op1, <vscale x 8 x half> %op2, <vscale x 8 x half> %op3, i32 7)
+  // CHECK: ret <vscale x 8 x half> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svmla_lane,_f16,,)(op1, op2, op3, 7);
+}
+
+svfloat32_t test_svmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // CHECK-LABEL: test_svmla_lane_f32
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 0)
+  // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svmla_lane,_f32,,)(op1, op2, op3, 0);
+}
+
+svfloat32_t test_svmla_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+{
+  // CHECK-LABEL: test_svmla_lane_f32_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float> %op1, <vscale x 4 x float> %op2, <vscale x 4 x float> %op3, i32 3)
+  // CHECK: ret <vscale x 4 x float> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svmla_lane,_f32,,)(op1, op2, op3, 3);
+}
+
+svfloat64_t test_svmla_lane_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+{
+  // CHECK-LABEL: test_svmla_lane_f64
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %op3, i32 0)
+  // CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svmla_lane,_f64,,)(op1, op2, op3, 0);
+}
+
+svfloat64_t test_svmla_lane_f64_1(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+{
+  // CHECK-LABEL: test_svmla_lane_f64_1
+  // CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double> %op1, <vscale x 2 x double> %op2, <vscale x 2 x double> %op3, i32 1)
+  // CHECK: ret <vscale x 2 x double> %[[INTRINSIC]]
+  return SVE_ACLE_FUNC(svmla_lane,_f64,,)(op1, op2, op3, 1);
+}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cadd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/negative/acle_sve_cadd.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s
+// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify %s
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#include <arm_sve.h>
+
+svfloat16_t test_svcadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+{
+  // expected-error@+1 {{argument should be the value 90 or 270}}
+  return SVE_ACLE_FUNC(svcadd,_f16,_m,)(pg, op1, op2, 0);
+}
+
+svfloat32_t test_svcadd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+{
+  // expected-error@+1 {{argument should be the value 90 or 270}}
+  return SVE_ACLE_FUNC(svcadd,_f32,_m,)(pg, op1, op2, 0);
+}
+
+svfloat64_t test_svcadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+{
+  // expected-error@+1 {{argument should be the value 90 or 270}}
+  return SVE_ACLE_FUNC(svcadd,_f64,_m,)(pg, op1, op2, 0);
+}