Skip to content

Conversation

@davemgreen
Copy link
Collaborator

Similar to #169156 again, this adds intrinsics for strict-fp compare nodes to
make sure they end up as the original instruction.

@davemgreen davemgreen marked this pull request as ready for review November 30, 2025 11:24
@llvmbot llvmbot added backend:ARM clang:frontend Language frontend issues, e.g. anything involving "Sema" llvm:ir labels Nov 30, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 30, 2025

@llvm/pr-subscribers-llvm-ir

Author: David Green (davemgreen)

Changes

Similar to #169156 again, this adds intrinsics for strict-fp compare nodes to
make sure they end up as the original instruction.


Patch is 370.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169798.diff

5 Files Affected:

  • (modified) clang/include/clang/Basic/arm_mve_defs.td (+18-6)
  • (modified) clang/test/CodeGen/arm-mve-intrinsics/compare.c (+3030-1430)
  • (modified) llvm/include/llvm/IR/IntrinsicsARM.td (+9)
  • (modified) llvm/lib/Target/ARM/ARMInstrMVE.td (+34-15)
  • (added) llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-compare.ll (+820)
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 3714262898476..be79002bcbe64 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -114,12 +114,12 @@ def icmp_sgt: IRBuilder<"CreateICmpSGT">;
 def icmp_sge: IRBuilder<"CreateICmpSGE">;
 def icmp_slt: IRBuilder<"CreateICmpSLT">;
 def icmp_sle: IRBuilder<"CreateICmpSLE">;
-def fcmp_eq: IRBuilder<"CreateFCmpOEQ">;
-def fcmp_ne: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
-def fcmp_gt: IRBuilder<"CreateFCmpOGT">;
-def fcmp_ge: IRBuilder<"CreateFCmpOGE">;
-def fcmp_ult: IRBuilder<"CreateFCmpULT">;
-def fcmp_ule: IRBuilder<"CreateFCmpULE">;
+def fcmp_eq_node: IRBuilder<"CreateFCmpOEQ">;
+def fcmp_ne_node: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
+def fcmp_gt_node: IRBuilder<"CreateFCmpOGT">;
+def fcmp_ge_node: IRBuilder<"CreateFCmpOGE">;
+def fcmp_ult_node: IRBuilder<"CreateFCmpULT">;
+def fcmp_ule_node: IRBuilder<"CreateFCmpULE">;
 def splat: CGHelperFn<"ARMMVEVectorSplat">;
 def select: IRBuilder<"CreateSelect">;
 def fneg: IRBuilder<"CreateFNeg">;
@@ -589,6 +589,18 @@ def fsub: strictFPAlt<fsub_node,
                       IRInt<"vsub", [Vector]>>;
 def fmul: strictFPAlt<fmul_node,
                       IRInt<"vmul", [Vector]>>;
+def fcmp_eq  : strictFPAlt<fcmp_eq_node,
+                           IRInt<"cmp_eq", [Predicate, Vector]>>;
+def fcmp_ne  : strictFPAlt<fcmp_ne_node,
+                           IRInt<"cmp_ne", [Predicate, Vector]>>;
+def fcmp_gt  : strictFPAlt<fcmp_gt_node,
+                           IRInt<"cmp_gt", [Predicate, Vector]>>;
+def fcmp_ge  : strictFPAlt<fcmp_ge_node,
+                           IRInt<"cmp_ge", [Predicate, Vector]>>;
+def fcmp_ult : strictFPAlt<fcmp_ult_node,
+                           IRInt<"cmp_lt", [Predicate, Vector]>>;
+def fcmp_ule : strictFPAlt<fcmp_ule_node,
+                           IRInt<"cmp_le", [Predicate, Vector]>>;
 
 // -----------------------------------------------------------------------------
 // Convenience lists of parameter types. 'T' is just a container record, so you
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/compare.c b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
index 8886cf5c10058..dd756a401e5cd 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/compare.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
@@ -1,17 +1,26 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vcmpeqq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x i1> @llvm.arm.mve.cmp.eq.v8i1.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
 {
@@ -22,12 +31,19 @@ mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.cmp.eq.v4i1.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_f32(float32x4_t a, float32x4_t b)
 {
@@ -38,12 +54,19 @@ mve_pred16_t test_vcmpeqq_f32(float32x4_t a, float32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_s8(int8x16_t a, int8x16_t b)
 {
@@ -54,12 +77,19 @@ mve_pred16_t test_vcmpeqq_s8(int8x16_t a, int8x16_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_s16(int16x8_t a, int16x8_t b)
 {
@@ -70,12 +100,19 @@ mve_pred16_t test_vcmpeqq_s16(int16x8_t a, int16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_s32(int32x4_t a, int32x4_t b)
 {
@@ -86,12 +123,19 @@ mve_pred16_t test_vcmpeqq_s32(int32x4_t a, int32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_u8(uint8x16_t a, uint8x16_t b)
 {
@@ -102,12 +146,19 @@ mve_pred16_t test_vcmpeqq_u8(uint8x16_t a, uint8x16_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_u16(uint16x8_t a, uint16x8_t b)
 {
@@ -118,12 +169,19 @@ mve_pred16_t test_vcmpeqq_u16(uint16x8_t a, uint16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
 {
@@ -134,14 +192,23 @@ mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_n_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x i1> @llvm.arm.mve.cmp.eq.v8i1.v8f16(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
 {
@@ -152,14 +219,23 @@ mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_n_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.cmp.eq.v4i1.v4f32(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
 {
@@ -170,14 +246,23 @@ mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
 {
@@ -188,14 +273,23 @@ mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x ...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Nov 30, 2025

@llvm/pr-subscribers-backend-arm

Author: David Green (davemgreen)

Changes

Similar to #169156 again, this adds intrinsics for strict-fp compare nodes to
make sure they end up as the original instruction.


Patch is 370.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169798.diff

5 Files Affected:

  • (modified) clang/include/clang/Basic/arm_mve_defs.td (+18-6)
  • (modified) clang/test/CodeGen/arm-mve-intrinsics/compare.c (+3030-1430)
  • (modified) llvm/include/llvm/IR/IntrinsicsARM.td (+9)
  • (modified) llvm/lib/Target/ARM/ARMInstrMVE.td (+34-15)
  • (added) llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-compare.ll (+820)
diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 3714262898476..be79002bcbe64 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -114,12 +114,12 @@ def icmp_sgt: IRBuilder<"CreateICmpSGT">;
 def icmp_sge: IRBuilder<"CreateICmpSGE">;
 def icmp_slt: IRBuilder<"CreateICmpSLT">;
 def icmp_sle: IRBuilder<"CreateICmpSLE">;
-def fcmp_eq: IRBuilder<"CreateFCmpOEQ">;
-def fcmp_ne: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
-def fcmp_gt: IRBuilder<"CreateFCmpOGT">;
-def fcmp_ge: IRBuilder<"CreateFCmpOGE">;
-def fcmp_ult: IRBuilder<"CreateFCmpULT">;
-def fcmp_ule: IRBuilder<"CreateFCmpULE">;
+def fcmp_eq_node: IRBuilder<"CreateFCmpOEQ">;
+def fcmp_ne_node: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
+def fcmp_gt_node: IRBuilder<"CreateFCmpOGT">;
+def fcmp_ge_node: IRBuilder<"CreateFCmpOGE">;
+def fcmp_ult_node: IRBuilder<"CreateFCmpULT">;
+def fcmp_ule_node: IRBuilder<"CreateFCmpULE">;
 def splat: CGHelperFn<"ARMMVEVectorSplat">;
 def select: IRBuilder<"CreateSelect">;
 def fneg: IRBuilder<"CreateFNeg">;
@@ -589,6 +589,18 @@ def fsub: strictFPAlt<fsub_node,
                       IRInt<"vsub", [Vector]>>;
 def fmul: strictFPAlt<fmul_node,
                       IRInt<"vmul", [Vector]>>;
+def fcmp_eq  : strictFPAlt<fcmp_eq_node,
+                           IRInt<"cmp_eq", [Predicate, Vector]>>;
+def fcmp_ne  : strictFPAlt<fcmp_ne_node,
+                           IRInt<"cmp_ne", [Predicate, Vector]>>;
+def fcmp_gt  : strictFPAlt<fcmp_gt_node,
+                           IRInt<"cmp_gt", [Predicate, Vector]>>;
+def fcmp_ge  : strictFPAlt<fcmp_ge_node,
+                           IRInt<"cmp_ge", [Predicate, Vector]>>;
+def fcmp_ult : strictFPAlt<fcmp_ult_node,
+                           IRInt<"cmp_lt", [Predicate, Vector]>>;
+def fcmp_ule : strictFPAlt<fcmp_ule_node,
+                           IRInt<"cmp_le", [Predicate, Vector]>>;
 
 // -----------------------------------------------------------------------------
 // Convenience lists of parameter types. 'T' is just a container record, so you
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/compare.c b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
index 8886cf5c10058..dd756a401e5cd 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/compare.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/compare.c
@@ -1,17 +1,26 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes='mem2reg,sroa,early-cse<>' | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vcmpeqq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x i1> @llvm.arm.mve.cmp.eq.v8i1.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
 {
@@ -22,12 +31,19 @@ mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.cmp.eq.v4i1.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_f32(float32x4_t a, float32x4_t b)
 {
@@ -38,12 +54,19 @@ mve_pred16_t test_vcmpeqq_f32(float32x4_t a, float32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_s8(int8x16_t a, int8x16_t b)
 {
@@ -54,12 +77,19 @@ mve_pred16_t test_vcmpeqq_s8(int8x16_t a, int8x16_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_s16(int16x8_t a, int16x8_t b)
 {
@@ -70,12 +100,19 @@ mve_pred16_t test_vcmpeqq_s16(int16x8_t a, int16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_s32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_s32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_s32(int32x4_t a, int32x4_t b)
 {
@@ -86,12 +123,19 @@ mve_pred16_t test_vcmpeqq_s32(int32x4_t a, int32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_u8(uint8x16_t a, uint8x16_t b)
 {
@@ -102,12 +146,19 @@ mve_pred16_t test_vcmpeqq_u8(uint8x16_t a, uint8x16_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_u16(uint16x8_t a, uint16x8_t b)
 {
@@ -118,12 +169,19 @@ mve_pred16_t test_vcmpeqq_u16(uint16x8_t a, uint16x8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_u32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_u32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
 {
@@ -134,14 +192,23 @@ mve_pred16_t test_vcmpeqq_u32(uint32x4_t a, uint32x4_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_n_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_f16(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_f16(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <8 x i1> @llvm.arm.mve.cmp.eq.v8i1.v8f16(<8 x half> [[A:%.*]], <8 x half> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
 {
@@ -152,14 +219,23 @@ mve_pred16_t test_vcmpeqq_n_f16(float16x8_t a, float16_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_n_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_f32(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = fcmp oeq <4 x float> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_f32(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = call <4 x i1> @llvm.arm.mve.cmp.eq.v4i1.v4f32(<4 x float> [[A:%.*]], <4 x float> [[DOTSPLAT]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
 {
@@ -170,14 +246,23 @@ mve_pred16_t test_vcmpeqq_n_f32(float32x4_t a, float32_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_n_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
-// CHECK-NEXT:    ret i16 [[TMP2]]
+// CHECK-NOSTRICT-LABEL: @test_vcmpeqq_n_s8(
+// CHECK-NOSTRICT-NEXT:  entry:
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NOSTRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-NOSTRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-NOSTRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]])
+// CHECK-NOSTRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NOSTRICT-NEXT:    ret i16 [[TMP2]]
+//
+// CHECK-STRICT-LABEL: @test_vcmpeqq_n_s8(
+// CHECK-STRICT-NEXT:  entry:
+// CHECK-STRICT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-STRICT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+// CHECK-STRICT-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], [[DOTSPLAT]]
+// CHECK-STRICT-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[TMP0]]) #[[ATTR2]]
+// CHECK-STRICT-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-STRICT-NEXT:    ret i16 [[TMP2]]
 //
 mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
 {
@@ -188,14 +273,23 @@ mve_pred16_t test_vcmpeqq_n_s8(int8x16_t a, int8_t b)
 #endif /* POLYMORPHIC */
 }
 
-// CHECK-LABEL: @test_vcmpeqq_n_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i64 0
-// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x ...
[truncated]

Similar to llvm#169156 again, this adds intrinsics for strict-fp compare nodes to
make sure they end up as the original instruction.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:ARM clang:frontend Language frontend issues, e.g. anything involving "Sema" llvm:ir

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants