-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[ARM] Introduce intrinsics for MVE vcmp under strict-fp. #169798
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
4942e51 to
5fb18ab
Compare
|
@llvm/pr-subscribers-llvm-ir Author: David Green (davemgreen) ChangesSimilar to #169156 again, this adds intrinsics for strict-fp compare nodes to Patch is 370.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169798.diff 5 Files Affected:
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 3714262898476..be79002bcbe64 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -114,12 +114,12 @@ def icmp_sgt: IRBuilder<"CreateICmpSGT">;
def icmp_sge: IRBuilder<"CreateICmpSGE">;
def icmp_slt: IRBuilder<"CreateICmpSLT">;
def icmp_sle: IRBuilder<"CreateICmpSLE">;
-def fcmp_eq: IRBuilder<"CreateFCmpOEQ">;
-def fcmp_ne: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
-def fcmp_gt: IRBuilder<"CreateFCmpOGT">;
-def fcmp_ge: IRBuilder<"CreateFCmpOGE">;
-def fcmp_ult: IRBuilder<"CreateFCmpULT">;
-def fcmp_ule: IRBuilder<"CreateFCmpULE">;
+def fcmp_eq_node: IRBuilder<"CreateFCmpOEQ">;
+def fcmp_ne_node: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
+def fcmp_gt_node: IRBuilder<"CreateFCmpOGT">;
+def fcmp_ge_node: IRBuilder<"CreateFCmpOGE">;
+def fcmp_ult_node: IRBuilder<"CreateFCmpULT">;
+def fcmp_ule_node: IRBuilder<"CreateFCmpULE">;
def splat: CGHelperFn<"ARMMVEVectorSplat">;
def select: IRBuilder<"CreateSelect">;
def fneg: IRBuilder<"CreateFNeg">;
@@ -589,6 +589,18 @@ def fsub: strictFPAlt<fsub_node,
IRInt<"vsub", [Vector]>>;
def fmul: strictFPAlt<fmul_node,
IRInt<"vmul", [Vector]>>;
+def fcmp_eq : strictFPAlt<fcmp_eq_node,
+ IRInt<"cmp_eq", [Predicate, Vector]>>;
+def fcmp_ne : strictFPAlt<fcmp_ne_node,
+ IRInt<"cmp_ne", [Predicate, Vector]>>;
+def fcmp_gt : strictFPAlt<fcmp_gt_node,
+ IRInt<"cmp_gt", [Predicate, Vector]>>;
+def fcmp_ge : strictFPAlt<fcmp_ge_node,
+ IRInt<"cmp_ge", [Predicate, Vector]>>;
+def fcmp_ult : strictFPAlt<fcmp_ult_node,
+ IRInt<"cmp_lt", [Predicate, Vector]>>;
+def fcmp_ule : strictFPAlt<fcmp_ule_node,
+ IRInt<"cmp_le", [Predicate, Vector]>>;
// -----------------------------------------------------------------------------
// Convenience lists of parameter types. 'T' is just a container record, so you
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/compare.c b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
index 8886cf5c10058..dd756a401e5cd 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/compare.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
@@ -1,17 +1,26 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_mve.h>
-// CHECK-LABEL: @test_vcmpeqq_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x i1> @llvm.arm.mve.cmp.eq.v8i1.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
{
@@ -22,12 +31,19 @@ mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_f32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.cmp.eq.v4i1.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_f32(float32x4_t a, float32x4_t b)
{
@@ -38,12 +54,19 @@ mve_pred16_t test_vcmpeqq_f32(float32x4_t a, float32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s8(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_s8(int8x16_t a, int8x16_t b)
{
@@ -54,12 +77,19 @@ mve_pred16_t test_vcmpeqq_s8(int8x16_t a, int8x16_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_s16(int16x8_t a, int16x8_t b)
{
@@ -70,12 +100,19 @@ mve_pred16_t test_vcmpeqq_s16(int16x8_t a, int16x8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_s32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_s32(int32x4_t a, int32x4_t b)
{
@@ -86,12 +123,19 @@ mve_pred16_t test_vcmpeqq_s32(int32x4_t a, int32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_u8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u8(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_u8(uint8x16_t a, uint8x16_t b)
{
@@ -102,12 +146,19 @@ mve_pred16_t test_vcmpeqq_u8(uint8x16_t a, uint8x16_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_u16(uint16x8_t a, uint16x8_t b)
{
@@ -118,12 +169,19 @@ mve_pred16_t test_vcmpeqq_u16(uint16x8_t a, uint16x8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_u32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
{
@@ -134,14 +192,23 @@ mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_n_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x i1> @llvm.arm.mve.cmp.eq.v8i1.v8f16(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
{
@@ -152,14 +219,23 @@ mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_n_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_f32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.cmp.eq.v4i1.v4f32(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
{
@@ -170,14 +246,23 @@ mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_n_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_s8(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
{
@@ -188,14 +273,23 @@ mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_n_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x ...
[truncated]
|
|
@llvm/pr-subscribers-backend-arm Author: David Green (davemgreen) ChangesSimilar to #169156 again, this adds intrinsics for strict-fp compare nodes to Patch is 370.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169798.diff 5 Files Affected:
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 3714262898476..be79002bcbe64 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -114,12 +114,12 @@ def icmp_sgt: IRBuilder<"CreateICmpSGT">;
def icmp_sge: IRBuilder<"CreateICmpSGE">;
def icmp_slt: IRBuilder<"CreateICmpSLT">;
def icmp_sle: IRBuilder<"CreateICmpSLE">;
-def fcmp_eq: IRBuilder<"CreateFCmpOEQ">;
-def fcmp_ne: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
-def fcmp_gt: IRBuilder<"CreateFCmpOGT">;
-def fcmp_ge: IRBuilder<"CreateFCmpOGE">;
-def fcmp_ult: IRBuilder<"CreateFCmpULT">;
-def fcmp_ule: IRBuilder<"CreateFCmpULE">;
+def fcmp_eq_node: IRBuilder<"CreateFCmpOEQ">;
+def fcmp_ne_node: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
+def fcmp_gt_node: IRBuilder<"CreateFCmpOGT">;
+def fcmp_ge_node: IRBuilder<"CreateFCmpOGE">;
+def fcmp_ult_node: IRBuilder<"CreateFCmpULT">;
+def fcmp_ule_node: IRBuilder<"CreateFCmpULE">;
def splat: CGHelperFn<"ARMMVEVectorSplat">;
def select: IRBuilder<"CreateSelect">;
def fneg: IRBuilder<"CreateFNeg">;
@@ -589,6 +589,18 @@ def fsub: strictFPAlt<fsub_node,
IRInt<"vsub", [Vector]>>;
def fmul: strictFPAlt<fmul_node,
IRInt<"vmul", [Vector]>>;
+def fcmp_eq : strictFPAlt<fcmp_eq_node,
+ IRInt<"cmp_eq", [Predicate, Vector]>>;
+def fcmp_ne : strictFPAlt<fcmp_ne_node,
+ IRInt<"cmp_ne", [Predicate, Vector]>>;
+def fcmp_gt : strictFPAlt<fcmp_gt_node,
+ IRInt<"cmp_gt", [Predicate, Vector]>>;
+def fcmp_ge : strictFPAlt<fcmp_ge_node,
+ IRInt<"cmp_ge", [Predicate, Vector]>>;
+def fcmp_ult : strictFPAlt<fcmp_ult_node,
+ IRInt<"cmp_lt", [Predicate, Vector]>>;
+def fcmp_ule : strictFPAlt<fcmp_ule_node,
+ IRInt<"cmp_le", [Predicate, Vector]>>;
// -----------------------------------------------------------------------------
// Convenience lists of parameter types. 'T' is just a container record, so you
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/compare.c b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
index 8886cf5c10058..dd756a401e5cd 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/compare.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
@@ -1,17 +1,26 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_mve.h>
-// CHECK-LABEL: @test_vcmpeqq_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x i1> @llvm.arm.mve.cmp.eq.v8i1.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
{
@@ -22,12 +31,19 @@ mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_f32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.cmp.eq.v4i1.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_f32(float32x4_t a, float32x4_t b)
{
@@ -38,12 +54,19 @@ mve_pred16_t test_vcmpeqq_f32(float32x4_t a, float32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s8(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_s8(int8x16_t a, int8x16_t b)
{
@@ -54,12 +77,19 @@ mve_pred16_t test_vcmpeqq_s8(int8x16_t a, int8x16_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_s16(int16x8_t a, int16x8_t b)
{
@@ -70,12 +100,19 @@ mve_pred16_t test_vcmpeqq_s16(int16x8_t a, int16x8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_s32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_s32(int32x4_t a, int32x4_t b)
{
@@ -86,12 +123,19 @@ mve_pred16_t test_vcmpeqq_s32(int32x4_t a, int32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_u8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u8(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_u8(uint8x16_t a, uint8x16_t b)
{
@@ -102,12 +146,19 @@ mve_pred16_t test_vcmpeqq_u8(uint8x16_t a, uint8x16_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_u16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_u16(uint16x8_t a, uint16x8_t b)
{
@@ -118,12 +169,19 @@ mve_pred16_t test_vcmpeqq_u16(uint16x8_t a, uint16x8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_u32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
{
@@ -134,14 +192,23 @@ mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_n_f16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_f16(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_f16(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x i1> @llvm.arm.mve.cmp.eq.v8i1.v8f16(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
{
@@ -152,14 +219,23 @@ mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_n_f32(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_f32(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_f32(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.cmp.eq.v4i1.v4f32(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
{
@@ -170,14 +246,23 @@ mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_n_s8(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT: ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_s8(
+// CHECK-NOSTRICT-NEXT: entry:
+// CHECK-NOSTRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT: ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_s8(
+// CHECK-STRICT-NEXT: entry:
+// CHECK-STRICT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-STRICT-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT: ret i16 [[TMP2]]
//
mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
{
@@ -188,14 +273,23 @@ mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
#endif /* POLYMORPHIC */
}
-// CHECK-LABEL: @test_vcmpeqq_n_s16(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
-// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x ...
[truncated]
|
Similar to llvm#169156 again, this adds intrinsics for strict-fp compare nodes to make sure they end up as the original instruction.
5fb18ab to
0d11589
Compare
Similar to #169156 again, this adds intrinsics for strict-fp compare nodes to
make sure they end up as the original instruction.