-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[ARM] Introduce intrinsics for MVE fp-converts under strict-fp. #170686
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This is the last of the generic instructions created from MVE intrinsics. It was a little more awkward than the others due to it taking a Type as one of the arguments. This creates a new function to create the intrinsic we need.
|
@llvm/pr-subscribers-backend-arm @llvm/pr-subscribers-llvm-ir Author: David Green (davemgreen) ChangesThis is the last of the generic instructions created from MVE intrinsics. It was a little more awkward than the others due to it taking a Type as one of the arguments. This creates a new function to create the intrinsic we need. Patch is 87.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170686.diff 8 Files Affected:
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index 77531c31538c1..eae3a9f9624ab 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -598,7 +598,7 @@ foreach half = [ "b", "t" ] in {
} // params = [f16], pnt = PNT_None
} // loop over half = "b", "t"
-multiclass float_int_conversions<Type FScalar, Type IScalar, IRBuilderBase ftoi, IRBuilderBase itof> {
+multiclass float_int_conversions<Type FScalar, Type IScalar, Builder ftoi, Builder itof> {
defvar FVector = VecOf<FScalar>;
defvar IVector = VecOf<IScalar>;
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 3210549d0cb56..d228b298a9aa6 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -123,10 +123,10 @@ def fcmp_ule: IRBuilder<"CreateFCmpULE">;
def splat: CGHelperFn<"ARMMVEVectorSplat">;
def select: IRBuilder<"CreateSelect">;
def fneg: IRBuilder<"CreateFNeg">;
-def sitofp: IRBuilder<"CreateSIToFP">;
-def uitofp: IRBuilder<"CreateUIToFP">;
-def fptosi: IRBuilder<"CreateFPToSI">;
-def fptoui: IRBuilder<"CreateFPToUI">;
+def sitofp_node: IRBuilder<"CreateSIToFP">;
+def uitofp_node: IRBuilder<"CreateUIToFP">;
+def fptosi_node: IRBuilder<"CreateFPToSI">;
+def fptoui_node: IRBuilder<"CreateFPToUI">;
def vrev: CGHelperFn<"ARMMVEVectorElementReverse"> {
let special_params = [IRBuilderIntParam<1, "unsigned">];
}
@@ -215,9 +215,9 @@ def bitsize;
// strictFPAlt allows a node to have different code generation under strict-fp.
// TODO: The standard node can be IRBuilderBase or IRIntBase.
-class strictFPAlt<Builder standard_, IRIntBase strictfp_> : Builder {
+class strictFPAlt<Builder standard_, Builder strictfp_> : Builder {
Builder standard = standard_;
- IRIntBase strictfp = strictfp_;
+ Builder strictfp = strictfp_;
}
// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
@@ -593,6 +593,14 @@ def fminnm : strictFPAlt<IRIntBase<"minnum", [Vector]>,
IRInt<"vminnm", [Vector]>>;
def fmaxnm : strictFPAlt<IRIntBase<"maxnum", [Vector]>,
IRInt<"vmaxnm", [Vector]>>;
+def sitofp: strictFPAlt<sitofp_node,
+ CGFHelperFn<"ARMMVECreateSIToFP">>;
+def uitofp: strictFPAlt<uitofp_node,
+ CGFHelperFn<"ARMMVECreateUIToFP">>;
+def fptosi: strictFPAlt<fptosi_node,
+ CGFHelperFn<"ARMMVECreateFPToSI">>;
+def fptoui: strictFPAlt<fptoui_node,
+ CGFHelperFn<"ARMMVECreateFPToUI">>;
// -----------------------------------------------------------------------------
// Convenience lists of parameter types. 'T' is just a container record, so you
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index d4b0b81d3d87f..744cd1b0a324a 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -3512,6 +3512,38 @@ static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
return Builder.CreateShuffleVector(V, Indices);
}
+static llvm::Value *ARMMVECreateSIToFP(CGBuilderTy &Builder,
+ CodeGenFunction *CGF, llvm::Value *V,
+ llvm::Type *Ty) {
+ return Builder.CreateCall(
+ CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
+ {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
+}
+
+static llvm::Value *ARMMVECreateUIToFP(CGBuilderTy &Builder,
+ CodeGenFunction *CGF, llvm::Value *V,
+ llvm::Type *Ty) {
+ return Builder.CreateCall(
+ CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_fp_int, {Ty, V->getType()}),
+ {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
+}
+
+static llvm::Value *ARMMVECreateFPToSI(CGBuilderTy &Builder,
+ CodeGenFunction *CGF, llvm::Value *V,
+ llvm::Type *Ty) {
+ return Builder.CreateCall(
+ CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
+ {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 0)});
+}
+
+static llvm::Value *ARMMVECreateFPToUI(CGBuilderTy &Builder,
+ CodeGenFunction *CGF, llvm::Value *V,
+ llvm::Type *Ty) {
+ return Builder.CreateCall(
+ CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vcvt_int_fp, {Ty, V->getType()}),
+ {V, llvm::ConstantInt::get(Builder.getInt32Ty(), 1)});
+}
+
Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue,
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
index b2a6d0c1ea668..14a9116208b87 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -1,15 +1,22 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
-// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_mve.h>
-// CHECK-LABEL: @test_vcvtq_f16_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = sitofp <8 x i16> [[A:%.*]] to <8 x half>
-// CHECK-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_f16_s16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = sitofp <8 x i16> [[A:%.*]] to <8 x half>
+// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_f16_s16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.v8f16.v8i16(<8 x i16> [[A:%.*]], i32 0) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vcvtq_f16_s16(int16x8_t a)
{
@@ -20,10 +27,15 @@ float16x8_t test_vcvtq_f16_s16(int16x8_t a)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcvtq_f16_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = uitofp <8 x i16> [[A:%.*]] to <8 x half>
-// CHECK-NEXT: ret <8 x half> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_f16_u16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = uitofp <8 x i16> [[A:%.*]] to <8 x half>
+// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_f16_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.v8f16.v8i16(<8 x i16> [[A:%.*]], i32 1) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vcvtq_f16_u16(uint16x8_t a)
{
@@ -34,10 +46,15 @@ float16x8_t test_vcvtq_f16_u16(uint16x8_t a)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcvtq_f32_s32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_f32_s32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_f32_s32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.v4f32.v4i32(<4 x i32> [[A:%.*]], i32 0) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]]
//
float32x4_t test_vcvtq_f32_s32(int32x4_t a)
{
@@ -48,10 +65,15 @@ float32x4_t test_vcvtq_f32_s32(int32x4_t a)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcvtq_f32_u32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_f32_u32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_f32_u32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.v4f32.v4i32(<4 x i32> [[A:%.*]], i32 1) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]]
//
float32x4_t test_vcvtq_f32_u32(uint32x4_t a)
{
@@ -62,52 +84,79 @@ float32x4_t test_vcvtq_f32_u32(uint32x4_t a)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcvtq_s16_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fptosi <8 x half> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_s16_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fptosi <8 x half> [[A:%.*]] to <8 x i16>
+// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_s16_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.int.fp.v8i16.v8f16(<8 x half> [[A:%.*]], i32 0) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP0]]
//
int16x8_t test_vcvtq_s16_f16(float16x8_t a)
{
return vcvtq_s16_f16(a);
}
-// CHECK-LABEL: @test_vcvtq_s32_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fptosi <4 x float> [[A:%.*]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_s32_f32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fptosi <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_s32_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.int.fp.v4i32.v4f32(<4 x float> [[A:%.*]], i32 0) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP0]]
//
int32x4_t test_vcvtq_s32_f32(float32x4_t a)
{
return vcvtq_s32_f32(a);
}
-// CHECK-LABEL: @test_vcvtq_u16_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fptoui <8 x half> [[A:%.*]] to <8 x i16>
-// CHECK-NEXT: ret <8 x i16> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_u16_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fptoui <8 x half> [[A:%.*]] to <8 x i16>
+// CHECK-NOSTRICT-NEXT: ret <8 x i16> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_u16_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.int.fp.v8i16.v8f16(<8 x half> [[A:%.*]], i32 1) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x i16> [[TMP0]]
//
uint16x8_t test_vcvtq_u16_f16(float16x8_t a)
{
return vcvtq_u16_f16(a);
}
-// CHECK-LABEL: @test_vcvtq_u32_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fptoui <4 x float> [[A:%.*]] to <4 x i32>
-// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_u32_f32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fptoui <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NOSTRICT-NEXT: ret <4 x i32> [[TMP0]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_u32_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.int.fp.v4i32.v4f32(<4 x float> [[A:%.*]], i32 1) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x i32> [[TMP0]]
//
uint32x4_t test_vcvtq_u32_f32(float32x4_t a)
{
return vcvtq_u32_f32(a);
}
-// CHECK-LABEL: @test_vcvtq_m_f16_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_f16_s16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_m_f16_s16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vcvtq_m_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16_t p)
{
@@ -118,12 +167,19 @@ float16x8_t test_vcvtq_m_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16_t
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcvtq_m_f16_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <8 x half> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_f16_u16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_m_f16_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vcvtq_m_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred16_t p)
{
@@ -134,12 +190,19 @@ float16x8_t test_vcvtq_m_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred16_
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcvtq_m_f32_s32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_f32_s32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_m_f32_s32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
//
float32x4_t test_vcvtq_m_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16_t p)
{
@@ -150,12 +213,19 @@ float32x4_t test_vcvtq_m_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16_t
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcvtq_m_f32_u32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <4 x float> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_f32_u32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
+// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcvtq_m_f32_u32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
//
float32x4_t test_vcvtq_m_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred16_t p)
{
@@ -166,12 +236,19 @@ float32x4_t test_vcvtq_m_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred16_
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcvtq_m_s16_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
-// CHECK-NEXT: ret <8 x i16> [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcvtq_m_s16_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:...
[truncated]
|
This is the last of the generic instructions created from MVE intrinsics. It was a little more awkward than the others due to it taking a Type as one of the arguments. This creates a new function to create the intrinsic we need.