From 623393f31e20768b97287381af93355cc01bee2e Mon Sep 17 00:00:00 2001 From: Yi Kong Date: Thu, 28 Aug 2014 09:44:07 +0000 Subject: [PATCH] arm_acle: Implement data processing intrinsics Summary: ACLE 2.0 section 9.2 defines the following "miscellaneous data processing intrinsics": `__clz`, `__cls`, `__ror`, `__rev`, `__rev16`, `__revsh` and `__rbit`. `__clz` has already been implemented in the arm_acle.h header file. The rest are not supported yet. This patch completes ACLE data processing intrinsics. Reviewers: t.p.northover, rengolin Reviewed By: rengolin Subscribers: aemerson, mroth, llvm-commits Differential Revision: http://reviews.llvm.org/D4983 llvm-svn: 216658 --- clang/lib/Headers/arm_acle.h | 74 +++++++++++++++++++++++ clang/test/CodeGen/arm_acle.c | 108 ++++++++++++++++++++++++++++++---- 2 files changed, 169 insertions(+), 13 deletions(-) diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index f7c71a6465d71..8238323f5e11a 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -108,6 +108,32 @@ static __inline__ void __attribute__((always_inline, nodebug)) __nop(void) { /* 9 DATA-PROCESSING INTRINSICS */ /* 9.2 Miscellaneous data-processing intrinsics */ +/* ROR */ +static __inline__ uint32_t __attribute__((always_inline, nodebug)) + __ror(uint32_t x, uint32_t y) { + if (y == 0) return y; + if (y >= 32) y %= 32; + return (x >> y) | (x << (32 - y)); +} + +static __inline__ uint64_t __attribute__((always_inline, nodebug)) + __rorll(uint64_t x, uint32_t y) { + if (y == 0) return y; + if (y >= 64) y %= 64; + return (x >> y) | (x << (64 - y)); +} + +static __inline__ unsigned long __attribute__((always_inline, nodebug)) + __rorl(unsigned long x, uint32_t y) { +#if __SIZEOF_LONG__ == 4 + return __ror(x, y); +#else + return __rorll(x, y); +#endif +} + + +/* CLZ */ static __inline__ uint32_t __attribute__((always_inline, nodebug)) __clz(uint32_t t) { return __builtin_clz(t); @@ -123,6 +149,7 @@ static __inline__ uint64_t __attribute__((always_inline, nodebug)) return __builtin_clzll(t); } +/* REV */ static __inline__ uint32_t __attribute__((always_inline, nodebug)) __rev(uint32_t t) { return __builtin_bswap32(t); @@ -142,6 +169,53 @@ static __inline__ uint64_t __attribute__((always_inline, nodebug)) return __builtin_bswap64(t); } +/* REV16 */ +static __inline__ uint32_t __attribute__((always_inline, nodebug)) + __rev16(uint32_t t) { + return __ror(__rev(t), 16); +} + +static __inline__ unsigned long __attribute__((always_inline, nodebug)) + __rev16l(unsigned long t) { + return __rorl(__revl(t), sizeof(long) / 2); +} + +static __inline__ uint64_t __attribute__((always_inline, nodebug)) + __rev16ll(uint64_t t) { + return __rorll(__revll(t), 32); +} + +/* REVSH */ +static __inline__ int16_t __attribute__((always_inline, nodebug)) + __revsh(int16_t t) { + return __builtin_bswap16(t); +} + +/* RBIT */ +static __inline__ uint32_t __attribute__((always_inline, nodebug)) + __rbit(uint32_t t) { + return __builtin_arm_rbit(t); +} + +static __inline__ uint64_t __attribute__((always_inline, nodebug)) + __rbitll(uint64_t t) { +#if __ARM_32BIT_STATE + return (((uint64_t) __builtin_arm_rbit(t)) << 32) | + __builtin_arm_rbit(t >> 32); +#else + return __builtin_arm_rbit64(t); +#endif +} + +static __inline__ unsigned long __attribute__((always_inline, nodebug)) + __rbitl(unsigned long t) { +#if __SIZEOF_LONG__ == 4 + return __rbit(t); +#else + return __rbitll(t); +#endif +} + /* * 9.4 Saturating intrinsics * diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c index 0f226b516dd72..5b024506a50e4 100644 --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -119,23 +119,31 @@ void test_nop(void) { /* 9 DATA-PROCESSING INTRINSICS */ /* 9.2 Miscellaneous data-processing intrinsics */ -// ARM-LABEL: test_rev -// ARM: call i32 @llvm.bswap.i32(i32 %t) -uint32_t test_rev(uint32_t t) { - return __rev(t); +// ARM-LABEL: test_ror +// ARM: lshr +// ARM: sub +// ARM: shl +// ARM: or +uint32_t test_ror(uint32_t x, uint32_t y) { + return __ror(x, y); } -// ARM-LABEL: test_revl -// AArch32: call i32 @llvm.bswap.i32(i32 %t) -// AArch64: call i64 @llvm.bswap.i64(i64 %t) -long test_revl(long t) { - return __revl(t); +// ARM-LABEL: test_rorl +// ARM: lshr +// ARM: sub +// ARM: shl +// ARM: or +unsigned long test_rorl(unsigned long x, uint32_t y) { + return __rorl(x, y); } -// ARM-LABEL: test_revll -// ARM: call i64 @llvm.bswap.i64(i64 %t) -uint64_t test_revll(uint64_t t) { - return __revll(t); +// ARM-LABEL: test_rorll +// ARM: lshr +// ARM: sub +// ARM: shl +// ARM: or +uint64_t test_rorll(uint64_t x, uint32_t y) { + return __rorll(x, y); } // ARM-LABEL: test_clz @@ -157,6 +165,80 @@ uint64_t test_clzll(uint64_t t) { return __clzll(t); } +// ARM-LABEL: test_rev +// ARM: call i32 @llvm.bswap.i32(i32 %t) +uint32_t test_rev(uint32_t t) { + return __rev(t); +} + +// ARM-LABEL: test_revl +// AArch32: call i32 @llvm.bswap.i32(i32 %t) +// AArch64: call i64 @llvm.bswap.i64(i64 %t) +long test_revl(long t) { + return __revl(t); +} + +// ARM-LABEL: test_revll +// ARM: call i64 @llvm.bswap.i64(i64 %t) +uint64_t test_revll(uint64_t t) { + return __revll(t); +} + +// ARM-LABEL: test_rev16 +// ARM: llvm.bswap +// ARM: lshr +// ARM: shl +// ARM: or +uint32_t test_rev16(uint32_t t) { + return __rev16(t); +} + +// ARM-LABEL: test_rev16l +// ARM: llvm.bswap +// ARM: lshr +// ARM: shl +// ARM: or +long test_rev16l(long t) { + return __rev16l(t); +} + +// ARM-LABEL: test_rev16ll +// ARM: llvm.bswap +// ARM: lshr +// ARM: shl +// ARM: or +uint64_t test_rev16ll(uint64_t t) { + return __rev16ll(t); +} + +// ARM-LABEL: test_revsh +// ARM: call i16 @llvm.bswap.i16(i16 %t) +int16_t test_revsh(int16_t t) { + return __revsh(t); +} + +// ARM-LABEL: test_rbit +// AArch32: call i32 @llvm.arm.rbit +// AArch64: call i32 @llvm.aarch64.rbit.i32 +uint32_t test_rbit(uint32_t t) { + return __rbit(t); +} + +// ARM-LABEL: test_rbitl +// AArch32: call i32 @llvm.arm.rbit +// AArch64: call i64 @llvm.aarch64.rbit.i64 +long test_rbitl(long t) { + return __rbitl(t); +} + +// ARM-LABEL: test_rbitll +// AArch32: call i32 @llvm.arm.rbit +// AArch32: call i32 @llvm.arm.rbit +// AArch64: call i64 @llvm.aarch64.rbit.i64 +uint64_t test_rbitll(uint64_t t) { + return __rbitll(t); +} + /* 9.4 Saturating intrinsics */ #ifdef __ARM_32BIT_STATE