Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion Zend/zend_operators.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
# include <langinfo.h>
#endif

#ifdef ZEND_INTRIN_AVX2_NATIVE
#include <immintrin.h>
#endif
#ifdef __SSE2__
#include <emmintrin.h>
#endif
Expand All @@ -54,7 +57,30 @@ static _locale_t current_locale = NULL;

#define TYPE_PAIR(t1,t2) (((t1) << 4) | (t2))

#if __SSE2__
#ifdef ZEND_INTRIN_AVX2_NATIVE
#define HAVE_BLOCKCONV

#define BLOCKCONV_INIT_RANGE(start, end) \
const __m256i blconv_offset = _mm256_set1_epi8((signed char)(SCHAR_MIN - start)); \
const __m256i blconv_threshold = _mm256_set1_epi8(SCHAR_MIN + (end - start) + 1);

#define BLOCKCONV_STRIDE sizeof(__m256i)

#define BLOCKCONV_INIT_DELTA(delta) \
const __m256i blconv_delta = _mm256_set1_epi8(delta);

#define BLOCKCONV_LOAD(input) \
__m256i blconv_operand = _mm256_loadu_si256((__m256i*)(input)); \
__m256i blconv_mask = _mm256_cmpgt_epi8(blconv_threshold, _mm256_add_epi8(blconv_operand, blconv_offset));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Maybe this should be changed to lt instead of gt with the argument swapped in order to be symmetric with the SSE2 version.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😄 There is no such thing as _mm256_cmplt_epi8. That's why I had to change to cmpgt and swap the arguments to get the code to compile.

Ref: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8

If we want to make it symmetric, then we would need to change the SSE2 version.


#define BLOCKCONV_FOUND() _mm256_movemask_epi8(blconv_mask)

#define BLOCKCONV_STORE(dest) \
__m256i blconv_add = _mm256_and_si256(blconv_mask, blconv_delta); \
__m256i blconv_result = _mm256_add_epi8(blconv_operand, blconv_add); \
_mm256_storeu_si256((__m256i*)(dest), blconv_result);

#elif __SSE2__
#define HAVE_BLOCKCONV

/* Common code for SSE2 accelerated character case conversion */
Expand Down
17 changes: 17 additions & 0 deletions ext/standard/tests/strings/strtoupper1.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ $strings = array (
"zzzzzzzzzzzzzzzzzzzz",
"````````````````````",
"{{{{{{{{{{{{{{{{{{{{",
/* And the AVX2 implementation also */
"{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{",
"abcdefghijklmnopqrstuvwxyz01234",
"abcdefghijklmnopqrstuvwxyz012345",
"abcdefghijklmnopqrstuvwxyz0123456",
"abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
);

Expand Down Expand Up @@ -348,6 +353,18 @@ string(20) "````````````````````"
string(20) "{{{{{{{{{{{{{{{{{{{{"

-- Iteration 12 --
string(40) "{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{"

-- Iteration 13 --
string(31) "ABCDEFGHIJKLMNOPQRSTUVWXYZ01234"

-- Iteration 14 --
string(32) "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345"

-- Iteration 15 --
string(33) "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456"

-- Iteration 16 --
string(62) "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"

*** Testing strtoupper() with two different case strings ***
Expand Down