Skip to content

Commit c037128

Browse files
authored
[ARM] Introduce intrinsics for MVE minnm/maxnm under strict-fp. (#169795)
Similar to #169156 again, this is mostly for denormal handling as there is no rounding step in a minnum/maxnum.
1 parent 2ad7174 commit c037128

File tree

9 files changed

+387
-133
lines changed

9 files changed

+387
-133
lines changed

clang/include/clang/Basic/arm_mve.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -783,15 +783,15 @@ let params = T.Unsigned in {
783783
}
784784
let params = T.Float in {
785785
def vminnmq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
786-
(IRIntBase<"minnum", [Vector]> $a, $b)>;
786+
(fminnm $a, $b)>;
787787
def vmaxnmq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
788-
(IRIntBase<"maxnum", [Vector]> $a, $b)>;
788+
(fmaxnm $a, $b)>;
789789
def vminnmaq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
790-
(IRIntBase<"minnum", [Vector]>
790+
(fminnm
791791
(IRIntBase<"fabs", [Vector]> $a),
792792
(IRIntBase<"fabs", [Vector]> $b))>;
793793
def vmaxnmaq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
794-
(IRIntBase<"maxnum", [Vector]>
794+
(fmaxnm
795795
(IRIntBase<"fabs", [Vector]> $a),
796796
(IRIntBase<"fabs", [Vector]> $b))>;
797797
}

clang/include/clang/Basic/arm_mve_defs.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,10 @@ def fsub: strictFPAlt<fsub_node,
589589
IRInt<"vsub", [Vector]>>;
590590
def fmul: strictFPAlt<fmul_node,
591591
IRInt<"vmul", [Vector]>>;
592+
def fminnm : strictFPAlt<IRIntBase<"minnum", [Vector]>,
593+
IRInt<"vminnm", [Vector]>>;
594+
def fmaxnm : strictFPAlt<IRIntBase<"maxnum", [Vector]>,
595+
IRInt<"vmaxnm", [Vector]>>;
592596

593597
// -----------------------------------------------------------------------------
594598
// Convenience lists of parameter types. 'T' is just a container record, so you
Lines changed: 58 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,26 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2-
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3-
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
2+
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
3+
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
4+
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
5+
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
46

57
// REQUIRES: aarch64-registered-target || arm-registered-target
68

79
#include <arm_mve.h>
810

9-
// CHECK-LABEL: @test_vmaxnmaq_f16(
10-
// CHECK-NEXT: entry:
11-
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]])
12-
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]])
13-
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
14-
// CHECK-NEXT: ret <8 x half> [[TMP2]]
11+
// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_f16(
12+
// CHECK-NOSTRICT-NEXT: entry:
13+
// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]])
14+
// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]])
15+
// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
16+
// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]]
17+
//
18+
// CHECK-STRICT-LABEL: @test_vmaxnmaq_f16(
19+
// CHECK-STRICT-NEXT: entry:
20+
// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) #[[ATTR3:[0-9]+]]
21+
// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) #[[ATTR3]]
22+
// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) #[[ATTR3]]
23+
// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
1524
//
1625
float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b)
1726
{
@@ -22,12 +31,19 @@ float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b)
2231
#endif /* POLYMORPHIC */
2332
}
2433

25-
// CHECK-LABEL: @test_vmaxnmaq_f32(
26-
// CHECK-NEXT: entry:
27-
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]])
28-
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]])
29-
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
30-
// CHECK-NEXT: ret <4 x float> [[TMP2]]
34+
// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_f32(
35+
// CHECK-NOSTRICT-NEXT: entry:
36+
// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]])
37+
// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]])
38+
// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
39+
// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]]
40+
//
41+
// CHECK-STRICT-LABEL: @test_vmaxnmaq_f32(
42+
// CHECK-STRICT-NEXT: entry:
43+
// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) #[[ATTR3]]
44+
// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) #[[ATTR3]]
45+
// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR3]]
46+
// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
3147
//
3248
float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b)
3349
{
@@ -38,12 +54,19 @@ float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b)
3854
#endif /* POLYMORPHIC */
3955
}
4056

41-
// CHECK-LABEL: @test_vmaxnmaq_m_f16(
42-
// CHECK-NEXT: entry:
43-
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
44-
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
45-
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
46-
// CHECK-NEXT: ret <8 x half> [[TMP2]]
57+
// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_m_f16(
58+
// CHECK-NOSTRICT-NEXT: entry:
59+
// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
60+
// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
61+
// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
62+
// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]]
63+
//
64+
// CHECK-STRICT-LABEL: @test_vmaxnmaq_m_f16(
65+
// CHECK-STRICT-NEXT: entry:
66+
// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
67+
// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR3]]
68+
// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]]) #[[ATTR3]]
69+
// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
4770
//
4871
float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
4972
{
@@ -54,12 +77,19 @@ float16x8_t test_vmaxnmaq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
5477
#endif /* POLYMORPHIC */
5578
}
5679

57-
// CHECK-LABEL: @test_vmaxnmaq_m_f32(
58-
// CHECK-NEXT: entry:
59-
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
60-
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
61-
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
62-
// CHECK-NEXT: ret <4 x float> [[TMP2]]
80+
// CHECK-NOSTRICT-LABEL: @test_vmaxnmaq_m_f32(
81+
// CHECK-NOSTRICT-NEXT: entry:
82+
// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
83+
// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
84+
// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
85+
// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]]
86+
//
87+
// CHECK-STRICT-LABEL: @test_vmaxnmaq_m_f32(
88+
// CHECK-STRICT-NEXT: entry:
89+
// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
90+
// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR3]]
91+
// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]]) #[[ATTR3]]
92+
// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
6393
//
6494
float32x4_t test_vmaxnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
6595
{
@@ -69,3 +99,5 @@ float32x4_t test_vmaxnmaq_m_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
6999
return vmaxnmaq_m_f32(a, b, p);
70100
#endif /* POLYMORPHIC */
71101
}
102+
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
103+
// CHECK: {{.*}}
Lines changed: 76 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,22 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2-
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3-
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
2+
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
3+
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-NOSTRICT
4+
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
5+
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -disable-O0-optnone -frounding-math -fexperimental-strict-floating-point -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT
46

57
// REQUIRES: aarch64-registered-target || arm-registered-target
68

79
#include <arm_mve.h>
810

9-
// CHECK-LABEL: @test_vmaxnmq_f16(
10-
// CHECK-NEXT: entry:
11-
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
12-
// CHECK-NEXT: ret <8 x half> [[TMP0]]
11+
// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_f16(
12+
// CHECK-NOSTRICT-NEXT: entry:
13+
// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
14+
// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP0]]
15+
//
16+
// CHECK-STRICT-LABEL: @test_vmaxnmq_f16(
17+
// CHECK-STRICT-NEXT: entry:
18+
// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vmaxnm.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) #[[ATTR2:[0-9]+]]
19+
// CHECK-STRICT-NEXT: ret <8 x half> [[TMP0]]
1320
//
1421
float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b)
1522
{
@@ -20,10 +27,15 @@ float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b)
2027
#endif /* POLYMORPHIC */
2128
}
2229

23-
// CHECK-LABEL: @test_vmaxnmq_f32(
24-
// CHECK-NEXT: entry:
25-
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
26-
// CHECK-NEXT: ret <4 x float> [[TMP0]]
30+
// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_f32(
31+
// CHECK-NOSTRICT-NEXT: entry:
32+
// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
33+
// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP0]]
34+
//
35+
// CHECK-STRICT-LABEL: @test_vmaxnmq_f32(
36+
// CHECK-STRICT-NEXT: entry:
37+
// CHECK-STRICT-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vmaxnm.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR2]]
38+
// CHECK-STRICT-NEXT: ret <4 x float> [[TMP0]]
2739
//
2840
float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b)
2941
{
@@ -34,12 +46,19 @@ float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b)
3446
#endif /* POLYMORPHIC */
3547
}
3648

37-
// CHECK-LABEL: @test_vmaxnmq_m_f16(
38-
// CHECK-NEXT: entry:
39-
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
40-
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
41-
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
42-
// CHECK-NEXT: ret <8 x half> [[TMP2]]
49+
// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_m_f16(
50+
// CHECK-NOSTRICT-NEXT: entry:
51+
// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
52+
// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
53+
// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]])
54+
// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]]
55+
//
56+
// CHECK-STRICT-LABEL: @test_vmaxnmq_m_f16(
57+
// CHECK-STRICT-NEXT: entry:
58+
// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
59+
// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
60+
// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) #[[ATTR2]]
61+
// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
4362
//
4463
float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
4564
{
@@ -50,12 +69,19 @@ float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t
5069
#endif /* POLYMORPHIC */
5170
}
5271

53-
// CHECK-LABEL: @test_vmaxnmq_m_f32(
54-
// CHECK-NEXT: entry:
55-
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
56-
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
57-
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
58-
// CHECK-NEXT: ret <4 x float> [[TMP2]]
72+
// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_m_f32(
73+
// CHECK-NOSTRICT-NEXT: entry:
74+
// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
75+
// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
76+
// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
77+
// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]]
78+
//
79+
// CHECK-STRICT-LABEL: @test_vmaxnmq_m_f32(
80+
// CHECK-STRICT-NEXT: entry:
81+
// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
82+
// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
83+
// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) #[[ATTR2]]
84+
// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
5985
//
6086
float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
6187
{
@@ -66,12 +92,19 @@ float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t
6692
#endif /* POLYMORPHIC */
6793
}
6894

69-
// CHECK-LABEL: @test_vmaxnmq_x_f16(
70-
// CHECK-NEXT: entry:
71-
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
72-
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
73-
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
74-
// CHECK-NEXT: ret <8 x half> [[TMP2]]
95+
// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_x_f16(
96+
// CHECK-NOSTRICT-NEXT: entry:
97+
// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
98+
// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
99+
// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef)
100+
// CHECK-NOSTRICT-NEXT: ret <8 x half> [[TMP2]]
101+
//
102+
// CHECK-STRICT-LABEL: @test_vmaxnmq_x_f16(
103+
// CHECK-STRICT-NEXT: entry:
104+
// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
105+
// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) #[[ATTR2]]
106+
// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) #[[ATTR2]]
107+
// CHECK-STRICT-NEXT: ret <8 x half> [[TMP2]]
75108
//
76109
float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
77110
{
@@ -82,12 +115,19 @@ float16x8_t test_vmaxnmq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
82115
#endif /* POLYMORPHIC */
83116
}
84117

85-
// CHECK-LABEL: @test_vmaxnmq_x_f32(
86-
// CHECK-NEXT: entry:
87-
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
88-
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
89-
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
90-
// CHECK-NEXT: ret <4 x float> [[TMP2]]
118+
// CHECK-NOSTRICT-LABEL: @test_vmaxnmq_x_f32(
119+
// CHECK-NOSTRICT-NEXT: entry:
120+
// CHECK-NOSTRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
121+
// CHECK-NOSTRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
122+
// CHECK-NOSTRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef)
123+
// CHECK-NOSTRICT-NEXT: ret <4 x float> [[TMP2]]
124+
//
125+
// CHECK-STRICT-LABEL: @test_vmaxnmq_x_f32(
126+
// CHECK-STRICT-NEXT: entry:
127+
// CHECK-STRICT-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
128+
// CHECK-STRICT-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) #[[ATTR2]]
129+
// CHECK-STRICT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) #[[ATTR2]]
130+
// CHECK-STRICT-NEXT: ret <4 x float> [[TMP2]]
91131
//
92132
float32x4_t test_vmaxnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
93133
{
@@ -97,3 +137,5 @@ float32x4_t test_vmaxnmq_x_f32(float32x4_t a, float32x4_t b, mve_pred16_t p)
97137
return vmaxnmq_x_f32(a, b, p);
98138
#endif /* POLYMORPHIC */
99139
}
140+
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
141+
// CHECK: {{.*}}

0 commit comments

Comments
 (0)