Skip to content

Commit 9b30251

Browse files
committed
[AArch64] Add missing intrinsics for vrnd
1 parent 6955524 commit 9b30251

File tree

8 files changed

+204
-5
lines changed

8 files changed

+204
-5
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,6 +1210,13 @@ def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">;
12101210
def FRINTI_S64 : SInst<"vrndi", "..", "dQd">;
12111211
}
12121212

1213+
let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_FRINT)" in {
1214+
def FRINT32X_S32 : SInst<"vrnd32x", "..", "fQf">;
1215+
def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">;
1216+
def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">;
1217+
def FRINT64Z_S32 : SInst<"vrnd64z", "..", "fQf">;
1218+
}
1219+
12131220
////////////////////////////////////////////////////////////////////////////////
12141221
// MaxNum/MinNum Floating Point
12151222

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ void AArch64TargetInfo::getTargetDefinesARMV84A(const LangOptions &Opts,
182182

183183
void AArch64TargetInfo::getTargetDefinesARMV85A(const LangOptions &Opts,
184184
MacroBuilder &Builder) const {
185+
Builder.defineMacro("__ARM_FEATURE_FRINT", "1");
185186
// Also include the Armv8.4 defines
186187
getTargetDefinesARMV84A(Opts, Builder);
187188
}

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5823,6 +5823,14 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
58235823
NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
58245824
NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
58255825
NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
5826+
NEONMAP1(vrnd32x_v, aarch64_neon_frint32x, Add1ArgType),
5827+
NEONMAP1(vrnd32xq_v, aarch64_neon_frint32x, Add1ArgType),
5828+
NEONMAP1(vrnd32z_v, aarch64_neon_frint32z, Add1ArgType),
5829+
NEONMAP1(vrnd32zq_v, aarch64_neon_frint32z, Add1ArgType),
5830+
NEONMAP1(vrnd64x_v, aarch64_neon_frint64x, Add1ArgType),
5831+
NEONMAP1(vrnd64xq_v, aarch64_neon_frint64x, Add1ArgType),
5832+
NEONMAP1(vrnd64z_v, aarch64_neon_frint64z, Add1ArgType),
5833+
NEONMAP1(vrnd64zq_v, aarch64_neon_frint64z, Add1ArgType),
58265834
NEONMAP0(vrndi_v),
58275835
NEONMAP0(vrndiq_v),
58285836
NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
@@ -10539,6 +10547,30 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1053910547
: Intrinsic::trunc;
1054010548
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
1054110549
}
10550+
case NEON::BI__builtin_neon_vrnd32x_v:
10551+
case NEON::BI__builtin_neon_vrnd32xq_v: {
10552+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
10553+
Int = Intrinsic::aarch64_neon_frint32x;
10554+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
10555+
}
10556+
case NEON::BI__builtin_neon_vrnd32z_v:
10557+
case NEON::BI__builtin_neon_vrnd32zq_v: {
10558+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
10559+
Int = Intrinsic::aarch64_neon_frint32z;
10560+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
10561+
}
10562+
case NEON::BI__builtin_neon_vrnd64x_v:
10563+
case NEON::BI__builtin_neon_vrnd64xq_v: {
10564+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
10565+
Int = Intrinsic::aarch64_neon_frint64x;
10566+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
10567+
}
10568+
case NEON::BI__builtin_neon_vrnd64z_v:
10569+
case NEON::BI__builtin_neon_vrnd64zq_v: {
10570+
Ops.push_back(EmitScalarExpr(E->getArg(0)));
10571+
Int = Intrinsic::aarch64_neon_frint64z;
10572+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
10573+
}
1054210574
case NEON::BI__builtin_neon_vrnd_v:
1054310575
case NEON::BI__builtin_neon_vrndq_v: {
1054410576
Int = Builder.getIsFPConstrained()
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +v8.5a\
2+
// RUN: -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \
3+
// RUN: | opt -S -mem2reg \
4+
// RUN: | FileCheck %s
5+
6+
// REQUIRES: aarch64-registered-target
7+
8+
#include <arm_neon.h>
9+
10+
// CHECK-LABEL: test_vrnd32x_f32
11+
// CHECK: [[RND:%.*]] = call <2 x float> @llvm.aarch64.neon.frint32x.v2f32(<2 x float> %a)
12+
// CHECK: ret <2 x float> [[RND]]
13+
float32x2_t test_vrnd32x_f32(float32x2_t a) {
14+
return vrnd32x_f32(a);
15+
}
16+
17+
// CHECK-LABEL: test_vrnd32xq_f32
18+
// CHECK: [[RND:%.*]] = call <4 x float> @llvm.aarch64.neon.frint32x.v4f32(<4 x float> %a)
19+
// CHECK: ret <4 x float> [[RND]]
20+
float32x4_t test_vrnd32xq_f32(float32x4_t a) {
21+
return vrnd32xq_f32(a);
22+
}
23+
24+
// CHECK-LABEL: test_vrnd32z_f32
25+
// CHECK: [[RND:%.*]] = call <2 x float> @llvm.aarch64.neon.frint32z.v2f32(<2 x float> %a)
26+
// CHECK: ret <2 x float> [[RND]]
27+
float32x2_t test_vrnd32z_f32(float32x2_t a) {
28+
return vrnd32z_f32(a);
29+
}
30+
31+
// CHECK-LABEL: test_vrnd32zq_f32
32+
// CHECK: [[RND:%.*]] = call <4 x float> @llvm.aarch64.neon.frint32z.v4f32(<4 x float> %a)
33+
// CHECK: ret <4 x float> [[RND]]
34+
float32x4_t test_vrnd32zq_f32(float32x4_t a) {
35+
return vrnd32zq_f32(a);
36+
}
37+
38+
// CHECK-LABEL: test_vrnd64x_f32
39+
// CHECK: [[RND:%.*]] = call <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float> %a)
40+
// CHECK: ret <2 x float> [[RND]]
41+
float32x2_t test_vrnd64x_f32(float32x2_t a) {
42+
return vrnd64x_f32(a);
43+
}
44+
45+
// CHECK-LABEL: test_vrnd64xq_f32
46+
// CHECK: [[RND:%.*]] = call <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float> %a)
47+
// CHECK: ret <4 x float> [[RND]]
48+
float32x4_t test_vrnd64xq_f32(float32x4_t a) {
49+
return vrnd64xq_f32(a);
50+
}
51+
52+
// CHECK-LABEL: test_vrnd64z_f32
53+
// CHECK: [[RND:%.*]] = call <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float> %a)
54+
// CHECK: ret <2 x float> [[RND]]
55+
float32x2_t test_vrnd64z_f32(float32x2_t a) {
56+
return vrnd64z_f32(a);
57+
}
58+
59+
// CHECK-LABEL: test_vrnd64zq_f32
60+
// CHECK: [[RND:%.*]] = call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a)
61+
// CHECK: ret <4 x float> [[RND]]
62+
float32x4_t test_vrnd64zq_f32(float32x4_t a) {
63+
return vrnd64zq_f32(a);
64+
}

clang/test/Preprocessor/aarch64-target-features.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,12 @@
5858
// RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+crypto -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRYPTO %s
5959
// CHECK-CRYPTO: __ARM_FEATURE_CRYPTO 1
6060

61+
// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.5-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-8_5 %s
62+
// CHECK-8_5: __ARM_FEATURE_FRINT 1
63+
64+
// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.4-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-8_4 %s
65+
// CHECK-8_4-NOT: __ARM_FEATURE_FRINT 1
66+
6167
// RUN: %clang -target aarch64-none-linux-gnu -mcrc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s
6268
// RUN: %clang -target arm64-none-linux-gnu -mcrc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s
6369
// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+crc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,12 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
462462
// intrinsic.
463463
def int_aarch64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
464464

465+
// v8.5-A Vector FP Rounding
466+
def int_aarch64_neon_frint32x : AdvSIMD_1FloatArg_Intrinsic;
467+
def int_aarch64_neon_frint32z : AdvSIMD_1FloatArg_Intrinsic;
468+
def int_aarch64_neon_frint64x : AdvSIMD_1FloatArg_Intrinsic;
469+
def int_aarch64_neon_frint64z : AdvSIMD_1FloatArg_Intrinsic;
470+
465471
// Scalar FP->Int conversions
466472

467473
// Vector FP Inexact Narrowing
@@ -475,7 +481,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
475481
def int_aarch64_neon_udot : AdvSIMD_Dot_Intrinsic;
476482
def int_aarch64_neon_sdot : AdvSIMD_Dot_Intrinsic;
477483

478-
// v8.6-A Matrix Multiply Intrinsics
484+
// v8.6-A Matrix Multiply Intrinsics
479485
def int_aarch64_neon_ummla : AdvSIMD_MatMul_Intrinsic;
480486
def int_aarch64_neon_smmla : AdvSIMD_MatMul_Intrinsic;
481487
def int_aarch64_neon_usmmla : AdvSIMD_MatMul_Intrinsic;

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4091,10 +4091,10 @@ defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
40914091
defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
40924092

40934093
let Predicates = [HasFRInt3264] in {
4094-
defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z">;
4095-
defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z">;
4096-
defm FRINT32X : FRIntNNTVector<1, 0, "frint32x">;
4097-
defm FRINT64X : FRIntNNTVector<1, 1, "frint64x">;
4094+
defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
4095+
defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
4096+
defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
4097+
defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
40984098
} // HasFRInt3264
40994099

41004100
defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.5a | FileCheck %s
2+
3+
declare <2 x float> @llvm.aarch64.neon.frint32x.v2f32(<2 x float>)
4+
declare <4 x float> @llvm.aarch64.neon.frint32x.v4f32(<4 x float>)
5+
declare <2 x float> @llvm.aarch64.neon.frint32z.v2f32(<2 x float>)
6+
declare <4 x float> @llvm.aarch64.neon.frint32z.v4f32(<4 x float>)
7+
8+
define dso_local <2 x float> @t_vrnd32x_f32(<2 x float> %a) {
9+
; CHECK-LABEL: t_vrnd32x_f32:
10+
; CHECK: frint32x v0.2s, v0.2s
11+
; CHECK-NEXT: ret
12+
entry:
13+
%val = tail call <2 x float> @llvm.aarch64.neon.frint32x.v2f32(<2 x float> %a)
14+
ret <2 x float> %val
15+
}
16+
17+
define dso_local <4 x float> @t_vrnd32xq_f32(<4 x float> %a) {
18+
; CHECK-LABEL: t_vrnd32xq_f32:
19+
; CHECK: frint32x v0.4s, v0.4s
20+
; CHECK-NEXT: ret
21+
entry:
22+
%val = tail call <4 x float> @llvm.aarch64.neon.frint32x.v4f32(<4 x float> %a)
23+
ret <4 x float> %val
24+
}
25+
26+
define dso_local <2 x float> @t_vrnd32z_f32(<2 x float> %a) {
27+
; CHECK-LABEL: t_vrnd32z_f32:
28+
; CHECK: frint32z v0.2s, v0.2s
29+
; CHECK-NEXT: ret
30+
entry:
31+
%val = tail call <2 x float> @llvm.aarch64.neon.frint32z.v2f32(<2 x float> %a)
32+
ret <2 x float> %val
33+
}
34+
35+
define dso_local <4 x float> @t_vrnd32zq_f32(<4 x float> %a) {
36+
; CHECK-LABEL: t_vrnd32zq_f32:
37+
; CHECK: frint32z v0.4s, v0.4s
38+
; CHECK-NEXT: ret
39+
entry:
40+
%val = tail call <4 x float> @llvm.aarch64.neon.frint32z.v4f32(<4 x float> %a)
41+
ret <4 x float> %val
42+
}
43+
44+
declare <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float>)
45+
declare <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float>)
46+
declare <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float>)
47+
declare <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float>)
48+
49+
define dso_local <2 x float> @t_vrnd64x_f32(<2 x float> %a) {
50+
; CHECK-LABEL: t_vrnd64x_f32:
51+
; CHECK: frint64x v0.2s, v0.2s
52+
; CHECK-NEXT: ret
53+
entry:
54+
%val = tail call <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float> %a)
55+
ret <2 x float> %val
56+
}
57+
58+
define dso_local <4 x float> @t_vrnd64xq_f32(<4 x float> %a) {
59+
; CHECK-LABEL: t_vrnd64xq_f32:
60+
; CHECK: frint64x v0.4s, v0.4s
61+
; CHECK-NEXT: ret
62+
entry:
63+
%val = tail call <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float> %a)
64+
ret <4 x float> %val
65+
}
66+
67+
define dso_local <2 x float> @t_vrnd64z_f32(<2 x float> %a) {
68+
; CHECK-LABEL: t_vrnd64z_f32:
69+
; CHECK: frint64z v0.2s, v0.2s
70+
; CHECK-NEXT: ret
71+
entry:
72+
%val = tail call <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float> %a)
73+
ret <2 x float> %val
74+
}
75+
76+
define dso_local <4 x float> @t_vrnd64zq_f32(<4 x float> %a) {
77+
; CHECK-LABEL: t_vrnd64zq_f32:
78+
; CHECK: frint64z v0.4s, v0.4s
79+
; CHECK-NEXT: ret
80+
entry:
81+
%val = tail call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a)
82+
ret <4 x float> %val
83+
}

0 commit comments

Comments
 (0)