Skip to content

Commit

Permalink
[AArch64] Make ACLE intrinsics always available part1
Browse files Browse the repository at this point in the history
A given arch feature might enabled by a pragma or a function attribute so in this cases would be nice to use intrinsics.
Today GCC offers the intrinsics without the march flag[1].
PR[2] for ACLE to clarify the intention and remove the need for -march flag for a given intrinsics.

This is going to be more useful when D127812 lands.

[1] https://godbolt.org/z/bxcMhav3z
[2] ARM-software/acle#214

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D133359
  • Loading branch information
DanielKristofKiss committed Oct 14, 2022
1 parent 22e4203 commit 30b67c6
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 52 deletions.
30 changes: 15 additions & 15 deletions clang/include/clang/Basic/BuiltinsAArch64.def
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ BUILTIN(__builtin_arm_dmb, "vUi", "nc")
BUILTIN(__builtin_arm_dsb, "vUi", "nc")
BUILTIN(__builtin_arm_isb, "vUi", "nc")

BUILTIN(__builtin_arm_jcvt, "Zid", "nc")
TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a")

// Prefetch
BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
Expand Down Expand Up @@ -107,24 +107,24 @@ BUILTIN(__builtin_arm_tcancel, "vWUIi", "n")
BUILTIN(__builtin_arm_ttest, "WUi", "nc")

// Armv8.5-A FP rounding intrinsics
BUILTIN(__builtin_arm_rint32zf, "ff", "")
BUILTIN(__builtin_arm_rint32z, "dd", "")
BUILTIN(__builtin_arm_rint64zf, "ff", "")
BUILTIN(__builtin_arm_rint64z, "dd", "")
BUILTIN(__builtin_arm_rint32xf, "ff", "")
BUILTIN(__builtin_arm_rint32x, "dd", "")
BUILTIN(__builtin_arm_rint64xf, "ff", "")
BUILTIN(__builtin_arm_rint64x, "dd", "")
TARGET_BUILTIN(__builtin_arm_rint32zf, "ff", "", "v8.5a")
TARGET_BUILTIN(__builtin_arm_rint32z, "dd", "", "v8.5a")
TARGET_BUILTIN(__builtin_arm_rint64zf, "ff", "", "v8.5a")
TARGET_BUILTIN(__builtin_arm_rint64z, "dd", "", "v8.5a")
TARGET_BUILTIN(__builtin_arm_rint32xf, "ff", "", "v8.5a")
TARGET_BUILTIN(__builtin_arm_rint32x, "dd", "", "v8.5a")
TARGET_BUILTIN(__builtin_arm_rint64xf, "ff", "", "v8.5a")
TARGET_BUILTIN(__builtin_arm_rint64x, "dd", "", "v8.5a")

// Armv8.5-A Random number generation intrinsics
BUILTIN(__builtin_arm_rndr, "iWUi*", "n")
BUILTIN(__builtin_arm_rndrrs, "iWUi*", "n")
TARGET_BUILTIN(__builtin_arm_rndr, "iWUi*", "n", "rand")
TARGET_BUILTIN(__builtin_arm_rndrrs, "iWUi*", "n", "rand")

// Armv8.7-A load/store 64-byte intrinsics
BUILTIN(__builtin_arm_ld64b, "vvC*WUi*", "n")
BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n")
BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n")
BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n")
TARGET_BUILTIN(__builtin_arm_ld64b, "vvC*WUi*", "n", "ls64")
TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64")
TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64")
TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")

TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
Expand Down
69 changes: 35 additions & 34 deletions clang/lib/Headers/arm_acle.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,122 +589,123 @@ __smusdx(int16x2_t __a, int16x2_t __b) {
#endif

/* 9.7 CRC32 intrinsics */
#if defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \
(defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE)
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32b(uint32_t __a, uint8_t __b) {
return __builtin_arm_crc32b(__a, __b);
}

static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32h(uint32_t __a, uint16_t __b) {
return __builtin_arm_crc32h(__a, __b);
}

static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32w(uint32_t __a, uint32_t __b) {
return __builtin_arm_crc32w(__a, __b);
}

static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32d(uint32_t __a, uint64_t __b) {
return __builtin_arm_crc32d(__a, __b);
}

static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32cb(uint32_t __a, uint8_t __b) {
return __builtin_arm_crc32cb(__a, __b);
}

static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32ch(uint32_t __a, uint16_t __b) {
return __builtin_arm_crc32ch(__a, __b);
}

static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32cw(uint32_t __a, uint32_t __b) {
return __builtin_arm_crc32cw(__a, __b);
}

static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc")))
__crc32cd(uint32_t __a, uint64_t __b) {
return __builtin_arm_crc32cd(__a, __b);
}
#endif

/* Armv8.3-A Javascript conversion intrinsic */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT)
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a")))
__jcvt(double __a) {
return __builtin_arm_jcvt(__a);
}
#endif

/* Armv8.5-A FP rounding intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_FRINT)
static __inline__ float __attribute__((__always_inline__, __nodebug__))
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint32zf(float __a) {
return __builtin_arm_rint32zf(__a);
}

static __inline__ double __attribute__((__always_inline__, __nodebug__))
static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint32z(double __a) {
return __builtin_arm_rint32z(__a);
}

static __inline__ float __attribute__((__always_inline__, __nodebug__))
static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint64zf(float __a) {
return __builtin_arm_rint64zf(__a);
}

static __inline__ double __attribute__((__always_inline__, __nodebug__))
static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint64z(double __a) {
return __builtin_arm_rint64z(__a);
}

static __inline__ float __attribute__((__always_inline__, __nodebug__))
static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint32xf(float __a) {
return __builtin_arm_rint32xf(__a);
}

static __inline__ double __attribute__((__always_inline__, __nodebug__))
static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint32x(double __a) {
return __builtin_arm_rint32x(__a);
}

static __inline__ float __attribute__((__always_inline__, __nodebug__))
static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint64xf(float __a) {
return __builtin_arm_rint64xf(__a);
}

static __inline__ double __attribute__((__always_inline__, __nodebug__))
static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a")))
__rint64x(double __a) {
return __builtin_arm_rint64x(__a);
}
#endif

/* Armv8.7-A load/store 64-byte intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_LS64)
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
typedef struct {
uint64_t val[8];
} data512_t;

static __inline__ data512_t __attribute__((__always_inline__, __nodebug__))
static __inline__ data512_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
__arm_ld64b(const void *__addr) {
data512_t __value;
__builtin_arm_ld64b(__addr, __value.val);
return __value;
data512_t __value;
__builtin_arm_ld64b(__addr, __value.val);
return __value;
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
static __inline__ void __attribute__((__always_inline__, __nodebug__, target("ls64")))
__arm_st64b(void *__addr, data512_t __value) {
__builtin_arm_st64b(__addr, __value.val);
__builtin_arm_st64b(__addr, __value.val);
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
__arm_st64bv(void *__addr, data512_t __value) {
return __builtin_arm_st64bv(__addr, __value.val);
return __builtin_arm_st64bv(__addr, __value.val);
}
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64")))
__arm_st64bv0(void *__addr, data512_t __value) {
return __builtin_arm_st64bv0(__addr, __value.val);
return __builtin_arm_st64bv0(__addr, __value.val);
}
#endif

Expand Down Expand Up @@ -759,12 +760,12 @@ __arm_st64bv0(void *__addr, data512_t __value) {
#endif /* __ARM_FEATURE_TME */

/* Armv8.5-A Random number generation intrinsics */
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG)
static __inline__ int __attribute__((__always_inline__, __nodebug__))
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
__rndr(uint64_t *__p) {
return __builtin_arm_rndr(__p);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand")))
__rndrrs(uint64_t *__p) {
return __builtin_arm_rndrrs(__p);
}
Expand Down
6 changes: 3 additions & 3 deletions clang/test/CodeGen/arm_acle.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ void test_sevl(void) {
__sevl();
}

#if __ARM_32BIT_STATE
#ifdef __ARM_32BIT_STATE
// AArch32-LABEL: @test_dbg(
// AArch32-NEXT: entry:
// AArch32-NEXT: call void @llvm.arm.dbg(i32 0)
Expand Down Expand Up @@ -1646,7 +1646,7 @@ void test_wsrf64(double v) {
#endif
}

#ifdef __ARM_64BIT_STATE
#if defined(__ARM_64BIT_STATE) && defined(__ARM_FEATURE_JCVT)
// AArch6483-LABEL: @test_jcvt(
// AArch6483-NEXT: entry:
// AArch6483-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.fjcvtzs(double [[V:%.*]])
Expand All @@ -1658,7 +1658,7 @@ int32_t test_jcvt(double v) {
#endif


#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG)
#if defined(__ARM_64BIT_STATE) && defined(__ARM_FEATURE_RNG)

// AArch6485-LABEL: @test_rndr(
// AArch6485-NEXT: entry:
Expand Down
3 changes: 3 additions & 0 deletions clang/test/CodeGen/builtins-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ void prefetch(void) {
// CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 0)
}

__attribute__((target("v8.5a")))
int32_t jcvt(double v) {
//CHECK-LABEL: @jcvt(
//CHECK: call i32 @llvm.aarch64.fjcvtzs
Expand Down Expand Up @@ -133,6 +134,7 @@ unsigned int clsll(uint64_t v) {
// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
// CHECK-NEXT: ret i32 [[TMP3]]
//
__attribute__((target("rand")))
int rndr(uint64_t *__addr) {
return __builtin_arm_rndr(__addr);
}
Expand All @@ -146,6 +148,7 @@ int rndr(uint64_t *__addr) {
// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
// CHECK-NEXT: ret i32 [[TMP3]]
//
__attribute__((target("rand")))
int rndrrs(uint64_t *__addr) {
return __builtin_arm_rndrrs(__addr);
}
Expand Down

0 comments on commit 30b67c6

Please sign in to comment.