Skip to content

Commit

Permalink
Windows arm64 zend and standard extension support
Browse files Browse the repository at this point in the history
* Port zend_cpuid for windows arm64
* Fix zend_atomic windows arm64 build
* Fix windows arm64 multiply
* Enable arm64 neon for windows in standard extension
* Enable arm64 neon for windows in zend_hash.c
* Workaround for msvc arm64 optimization bug

Closes GH-9115.
  • Loading branch information
dixyes authored and cmb69 committed Aug 9, 2022
1 parent aeabb51 commit 3a843f9
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 28 deletions.
7 changes: 7 additions & 0 deletions Zend/zend_atomic.h
Expand Up @@ -57,6 +57,13 @@ BEGIN_EXTERN_C()

#if ZEND_WIN32

#ifndef InterlockedExchange8
#define InterlockedExchange8 _InterlockedExchange8
#endif
#ifndef InterlockedOr8
#define InterlockedOr8 _InterlockedOr8
#endif

#define ZEND_ATOMIC_BOOL_INIT(obj, desired) ((obj)->value = (desired))

static zend_always_inline bool zend_atomic_bool_exchange_ex(zend_atomic_bool *obj, bool desired) {
Expand Down
18 changes: 9 additions & 9 deletions Zend/zend_cpuinfo.c
Expand Up @@ -28,15 +28,15 @@ typedef struct _zend_cpu_info {

static zend_cpu_info cpuinfo = {0};

#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
# if defined(HAVE_CPUID_H) && defined(HAVE_CPUID_COUNT)
# include <cpuid.h>
#if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
# if defined(HAVE_CPUID_H) && defined(HAVE_CPUID_COUNT) /* use cpuid.h functions */
# include <cpuid.h>
static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) {
__cpuid_count(func, subfunc, cpuinfo->eax, cpuinfo->ebx, cpuinfo->ecx, cpuinfo->edx);
}
# else
# else /* use inline asm */
static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) {
#if defined(__i386__) && (defined(__pic__) || defined(__PIC__))
# if defined(__i386__) && (defined(__pic__) || defined(__PIC__))
/* PIC on i386 uses %ebx, so preserve it. */
__asm__ __volatile__ (
"pushl %%ebx\n"
Expand All @@ -46,16 +46,16 @@ static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo
: "=a"(cpuinfo->eax), "=r"(cpuinfo->ebx), "=c"(cpuinfo->ecx), "=d"(cpuinfo->edx)
: "a"(func), "c"(subfunc)
);
#else
# else
__asm__ __volatile__ (
"cpuid"
: "=a"(cpuinfo->eax), "=b"(cpuinfo->ebx), "=c"(cpuinfo->ecx), "=d"(cpuinfo->edx)
: "a"(func), "c"(subfunc)
);
#endif
# endif
}
# endif
#elif defined(ZEND_WIN32) && !defined(__clang__)
#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_X64) || defined(_M_IX86)) /* use MSVC __cpuidex intrin */
# include <intrin.h>
static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) {
int regs[4];
Expand All @@ -67,7 +67,7 @@ static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo
cpuinfo->ecx = regs[2];
cpuinfo->edx = regs[3];
}
#else
#else /* fall back to zero */
static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) {
cpuinfo->eax = 0;
}
Expand Down
4 changes: 2 additions & 2 deletions Zend/zend_hash.c
Expand Up @@ -22,7 +22,7 @@
#include "zend_globals.h"
#include "zend_variables.h"

#if defined(__aarch64__)
#if defined(__aarch64__) || defined(_M_ARM64)
# include <arm_neon.h>
#endif

Expand Down Expand Up @@ -183,7 +183,7 @@ static zend_always_inline void zend_hash_real_init_mixed_ex(HashTable *ht)
_mm_storeu_si128((__m128i*)&HT_HASH_EX(data, 8), xmm0);
_mm_storeu_si128((__m128i*)&HT_HASH_EX(data, 12), xmm0);
} while (0);
#elif defined(__aarch64__)
#elif defined(__aarch64__) || defined(_M_ARM64)
do {
int32x4_t t = vdupq_n_s32(-1);
vst1q_s32((int32_t*)&HT_HASH_EX(data, 0), t);
Expand Down
13 changes: 13 additions & 0 deletions Zend/zend_multiply.h
Expand Up @@ -94,6 +94,19 @@
(dval) = (double)(a) * (double)(b); \
} \
} while (0)
# elif defined(_M_ARM64)
# pragma intrinsic(__mulh)
# define ZEND_SIGNED_MULTIPLY_LONG(a, b, lval, dval, usedval) do { \
__int64 __high = __mulh((a), (b)); \
__int64 __low = (a) * (b); \
if ((__low >> 63I64) == __high) { \
(usedval) = 0; \
(lval) = __low; \
} else { \
(usedval) = 1; \
(dval) = (double)(a) * (double)(b); \
} \
} while (0)
# else
# define ZEND_SIGNED_MULTIPLY_LONG(a, b, lval, dval, usedval) do { \
zend_long __lres = (a) * (b); \
Expand Down
9 changes: 8 additions & 1 deletion ext/json/json_encoder.c
Expand Up @@ -71,7 +71,14 @@ static inline void php_json_pretty_print_indent(smart_str *buf, int options, php

/* }}} */

static inline bool php_json_is_valid_double(double d) /* {{{ */
static
#if defined(_MSC_VER) && defined(_M_ARM64)
// MSVC bug: https://developercommunity.visualstudio.com/t/corrupt-optimization-on-arm64-with-Ox-/10102551
zend_never_inline
#else
inline
#endif
bool php_json_is_valid_double(double d) /* {{{ */
{
return !zend_isinf(d) && !zend_isnan(d);
}
Expand Down
2 changes: 1 addition & 1 deletion ext/opcache/ZendAccelerator.c
Expand Up @@ -2190,7 +2190,7 @@ zend_op_array *persistent_compile_file(zend_file_handle *file_handle, int type)
ZCSG(hits)++; /* TBFixed: may lose one hit */
persistent_script->dynamic_members.hits++; /* see above */
#else
#ifdef _M_X64
#if ZEND_ENABLE_ZVAL_LONG64
InterlockedIncrement64(&ZCSG(hits));
#else
InterlockedIncrement(&ZCSG(hits));
Expand Down
12 changes: 6 additions & 6 deletions ext/standard/base64.c
Expand Up @@ -51,7 +51,7 @@ static const short base64_reverse_table[256] = {
};
/* }}} */

#ifdef __aarch64__
#if defined(__aarch64__) || defined(_M_ARM64)
#include <arm_neon.h>

static zend_always_inline uint8x16_t encode_toascii(const uint8x16_t input, const uint8x16x2_t shift_LUT)
Expand Down Expand Up @@ -118,11 +118,11 @@ static zend_always_inline unsigned char *neon_base64_encode(const unsigned char
*left = inl;
return out;
}
#endif /* __aarch64__ */
#endif /* defined(__aarch64__) || defined(_M_ARM64) */

static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out) /* {{{ */
{
#ifdef __aarch64__
#if defined(__aarch64__) || defined(_M_ARM64)
if (inl >= 16 * 3) {
size_t left = 0;
out = neon_base64_encode(in, inl, out, &left);
Expand Down Expand Up @@ -161,7 +161,7 @@ static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned c
}
/* }}} */

#ifdef __aarch64__
#if defined(__aarch64__) || defined(_M_ARM64)
static zend_always_inline uint8x16_t decode_fromascii(const uint8x16_t input, uint8x16_t *error, const uint8x16x2_t shiftLUT, const uint8x16x2_t maskLUT, const uint8x16x2_t bitposLUT) {
const uint8x16_t higher_nibble = vshrq_n_u8(input, 4);
const uint8x16_t lower_nibble = vandq_u8(input, vdupq_n_u8(0x0f));
Expand Down Expand Up @@ -241,14 +241,14 @@ static zend_always_inline size_t neon_base64_decode(const unsigned char *in, siz
*left = inl;
return out - out_orig;
}
#endif /* __aarch64__ */
#endif /* defined(__aarch64__) || defined(_M_ARM64) */

static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, bool strict) /* {{{ */
{
int ch;
size_t i = 0, padding = 0, j = *outl;

#ifdef __aarch64__
#if defined(__aarch64__) || defined(_M_ARM64)
if (inl >= 16 * 4) {
size_t left = 0;
j += neon_base64_decode(in, inl, out, &left);
Expand Down
31 changes: 22 additions & 9 deletions ext/standard/string.c
Expand Up @@ -3338,7 +3338,7 @@ PHP_FUNCTION(strtr)
/* {{{ Reverse a string */
#ifdef ZEND_INTRIN_SSSE3_NATIVE
#include <tmmintrin.h>
#elif defined(__aarch64__)
#elif defined(__aarch64__) || defined(_M_ARM64)
#include <arm_neon.h>
#endif
PHP_FUNCTION(strrev)
Expand Down Expand Up @@ -3385,6 +3385,19 @@ PHP_FUNCTION(strrev)
e -= 16;
} while (e - s > 15);
}
#elif defined(_M_ARM64)
if (e - s > 15) {
do {
const __n128 str = vld1q_u8((uint8_t *)(e - 15));
/* Synthesize rev128 with a rev64 + ext. */
/* strange force cast limit on windows: you cannot convert anything */
const __n128 rev = vrev64q_u8(str);
const __n128 ext = vextq_u64(rev, rev, 1);
vst1q_u8((uint8_t *)p, ext);
p += 16;
e -= 16;
} while (e - s > 15);
}
#endif
while (e >= s) {
*p++ = *e--;
Expand Down Expand Up @@ -3864,7 +3877,7 @@ zend_string *php_addslashes_sse42(zend_string *str)
/* }}} */
#endif

#ifdef __aarch64__
#if defined(__aarch64__) || defined(_M_ARM64)
typedef union {
uint8_t mem[16];
uint64_t dw[2];
Expand Down Expand Up @@ -3899,7 +3912,7 @@ static zend_always_inline char *aarch64_add_slashes(quad_word res, const char *s
}
return target;
}
#endif /* __aarch64__ */
#endif /* defined(__aarch64__) || defined(_M_ARM64) */

#ifndef ZEND_INTRIN_SSE4_2_NATIVE
# ifdef ZEND_INTRIN_SSE4_2_RESOLVER
Expand All @@ -3921,7 +3934,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str)
source = ZSTR_VAL(str);
end = source + ZSTR_LEN(str);

# ifdef __aarch64__
# if defined(__aarch64__) || defined(_M_ARM64)
quad_word res = {0};
if (ZSTR_LEN(str) > 15) {
do {
Expand All @@ -3932,7 +3945,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str)
} while ((end - source) > 15);
}
/* Finish the last 15 bytes or less with the scalar loop. */
# endif /* __aarch64__ */
# endif /* defined(__aarch64__) || defined(_M_ARM64) */

while (source < end) {
switch (*source) {
Expand All @@ -3955,7 +3968,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str)
memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
target = ZSTR_VAL(new_str) + offset;

# ifdef __aarch64__
# if defined(__aarch64__) || defined(_M_ARM64)
if (res.dw[0] | res.dw[1]) {
target = aarch64_add_slashes(res, source, target);
source += 16;
Expand All @@ -3971,7 +3984,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str)
}
}
/* Finish the last 15 bytes or less with the scalar loop. */
# endif /* __aarch64__ */
# endif /* defined(__aarch64__) || defined(_M_ARM64) */

while (source < end) {
switch (*source) {
Expand Down Expand Up @@ -4010,7 +4023,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str)
* be careful, this edits the string in-place */
static zend_always_inline char *php_stripslashes_impl(const char *str, char *out, size_t len)
{
#ifdef __aarch64__
#if defined(__aarch64__) || defined(_M_ARM64)
while (len > 15) {
uint8x16_t x = vld1q_u8((uint8_t *)str);
quad_word q;
Expand Down Expand Up @@ -4040,7 +4053,7 @@ static zend_always_inline char *php_stripslashes_impl(const char *str, char *out
}
}
/* Finish the last 15 bytes or less with the scalar loop. */
#endif /* __aarch64__ */
#endif /* defined(__aarch64__) || defined(_M_ARM64) */
while (len > 0) {
if (*str == '\\') {
str++; /* skip the slash */
Expand Down

0 comments on commit 3a843f9

Please sign in to comment.