diff --git a/Zend/zend_execute_API.c b/Zend/zend_execute_API.c index 660975f9bc1b5..a3333f4e89690 100644 --- a/Zend/zend_execute_API.c +++ b/Zend/zend_execute_API.c @@ -196,6 +196,8 @@ void init_executor(void) /* {{{ */ EG(num_errors) = 0; EG(errors) = NULL; + EG(transitive_compare_mode) = false; + EG(filename_override) = NULL; EG(lineno_override) = -1; diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index ef81ae5faaf25..11646fa77d1c2 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -301,6 +301,10 @@ struct _zend_executor_globals { uint32_t num_errors; zend_error_info **errors; + /* If transitive_compare_mode is enabled, string comparisons in zendi_smart_strcmp + * will enforce transitivity by consistently ordering numeric vs non-numeric strings. */ + bool transitive_compare_mode; + /* Override filename or line number of thrown errors and exceptions */ zend_string *filename_override; zend_long lineno_override; diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 2550fcbeb1cde..7def07d19096c 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -2271,6 +2271,18 @@ static int compare_long_to_string(zend_long lval, zend_string *str) /* {{{ */ return ZEND_THREEWAY_COMPARE((double) lval, str_dval); } + /* String is non-numeric. In transitive mode, enforce consistent ordering. + * Empty string < numeric < non-numeric string. + * Since str is non-numeric, check if it's empty. */ + if (UNEXPECTED(EG(transitive_compare_mode))) { + /* Empty string comes before everything */ + if (ZSTR_LEN(str) == 0) { + return 1; /* lval > empty string */ + } + /* Non-empty, non-numeric string comes after numbers */ + return -1; /* lval < non-numeric string */ + } + zend_string *lval_as_str = zend_long_to_str(lval); int cmp_result = zend_binary_strcmp( ZSTR_VAL(lval_as_str), ZSTR_LEN(lval_as_str), ZSTR_VAL(str), ZSTR_LEN(str)); @@ -2295,6 +2307,18 @@ static int compare_double_to_string(double dval, zend_string *str) /* {{{ */ return ZEND_THREEWAY_COMPARE(dval, str_dval); } + /* String is non-numeric. In transitive mode, enforce consistent ordering. + * Empty string < numeric < non-numeric string. + * Since str is non-numeric, check if it's empty. */ + if (UNEXPECTED(EG(transitive_compare_mode))) { + /* Empty string comes before everything */ + if (ZSTR_LEN(str) == 0) { + return 1; /* dval > empty string */ + } + /* Non-empty, non-numeric string comes after numbers */ + return -1; /* dval < non-numeric string */ + } + zend_string *dval_as_str = zend_double_to_str(dval); int cmp_result = zend_binary_strcmp( ZSTR_VAL(dval_as_str), ZSTR_LEN(dval_as_str), ZSTR_VAL(str), ZSTR_LEN(str)); @@ -3425,8 +3449,30 @@ ZEND_API int ZEND_FASTCALL zendi_smart_strcmp(zend_string *s1, zend_string *s2) zend_long lval1 = 0, lval2 = 0; double dval1 = 0.0, dval2 = 0.0; - if ((ret1 = is_numeric_string_ex(s1->val, s1->len, &lval1, &dval1, false, &oflow1, NULL)) && - (ret2 = is_numeric_string_ex(s2->val, s2->len, &lval2, &dval2, false, &oflow2, NULL))) { + ret1 = is_numeric_string_ex(s1->val, s1->len, &lval1, &dval1, false, &oflow1, NULL); + ret2 = is_numeric_string_ex(s2->val, s2->len, &lval2, &dval2, false, &oflow2, NULL); + + /* When in transitive comparison mode (used by SORT_REGULAR), enforce transitivity + * by consistently ordering numeric vs non-numeric strings. */ + bool num1 = ret1 != 0; + bool num2 = ret2 != 0; + if (UNEXPECTED(EG(transitive_compare_mode)) && (num1 ^ num2)) { + /* One is numeric, one is not. + * Special case: empty strings are non-numeric but sort BEFORE numeric strings. + * Order: empty < numeric < non-numeric (matches PHP 8+ comparison semantics) */ + bool is_empty1 = (s1->len == 0); + bool is_empty2 = (s2->len == 0); + + if (is_empty1 || is_empty2) { + /* If one is empty, empty comes first */ + return is_empty1 ? -1 : 1; + } + + /* Neither is empty: numeric < non-numeric */ + return ret1 ? -1 : 1; + } + + if (ret1 && ret2) { #if ZEND_ULONG_MAX == 0xFFFFFFFF if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0. && ((oflow1 == 1 && dval1 > 9007199254740991. /*0x1FFFFFFFFFFFFF*/) diff --git a/ext/standard/array.c b/ext/standard/array.c index 4097d71899011..881d21d649b50 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -285,7 +285,16 @@ static zend_always_inline int php_array_key_compare_string_locale_unstable_i(Buc static zend_always_inline int php_array_data_compare_unstable_i(Bucket *f, Bucket *s) /* {{{ */ { + /* Enable transitive comparison mode for this comparison tree. + * Save the previous state to handle reentrancy (e.g., usort with callback that calls sort). */ + bool old_transitive_mode = EG(transitive_compare_mode); + EG(transitive_compare_mode) = true; + int result = zend_compare(&f->val, &s->val); + + /* Restore previous state */ + EG(transitive_compare_mode) = old_transitive_mode; + /* Special enums handling for array_unique. We don't want to add this logic to zend_compare as * that would be observable via comparison operators. */ zval *rhs = &s->val; diff --git a/ext/standard/tests/array/gh20262.phpt b/ext/standard/tests/array/gh20262.phpt new file mode 100644 index 0000000000000..4a64c8d8ce357 --- /dev/null +++ b/ext/standard/tests/array/gh20262.phpt @@ -0,0 +1,93 @@ +--TEST-- +GH-20262 (array_unique() with SORT_REGULAR returns duplicate values) +--FILE-- +streetNumber; +} +echo "\n"; + +echo "\nTest 4: Nested arrays\n"; +$addresses = [ + ['streetNumber' => '5', 'streetName' => 'Main St'], + ['streetNumber' => '10', 'streetName' => 'Main St'], + ['streetNumber' => '10', 'streetName' => 'Main St'], + ['streetNumber' => '3A', 'streetName' => 'Main St'], + ['streetNumber' => '5', 'streetName' => 'Main St'], +]; + +$unique = array_unique($addresses, SORT_REGULAR); +echo "Unique count: " . count($unique) . " (expected 3)\n"; +echo "Street numbers:"; +foreach ($unique as $addr) { + echo " " . $addr['streetNumber']; +} +echo "\n"; + +echo "\nTest 5: sort() consistency with SORT_REGULAR\n"; +$arr1 = ["5", "10", "3A"]; +$arr2 = ["3A", "10", "5"]; +sort($arr1, SORT_REGULAR); +sort($arr2, SORT_REGULAR); +echo "arr1 sorted: ['" . implode("', '", $arr1) . "']\n"; +echo "arr2 sorted: ['" . implode("', '", $arr2) . "']\n"; +echo "Results match: " . ($arr1 === $arr2 ? "yes" : "no") . "\n"; + +?> +--EXPECT-- +Test 1: Scalar array (original bug report) +Array +( + [0] => 5 + [1] => 10 + [3] => 3A +) + +Test 2: Same array with SORT_STRING +Array +( + [0] => 5 + [1] => 10 + [3] => 3A +) + +Test 3: Objects +Unique count: 3 (expected 3) +Street numbers: 5 10 3A + +Test 4: Nested arrays +Unique count: 3 (expected 3) +Street numbers: 5 10 3A + +Test 5: sort() consistency with SORT_REGULAR +arr1 sorted: ['5', '10', '3A'] +arr2 sorted: ['5', '10', '3A'] +Results match: yes diff --git a/ext/standard/tests/array/sort/sort_variation_numeric_strings.phpt b/ext/standard/tests/array/sort/sort_variation_numeric_strings.phpt new file mode 100644 index 0000000000000..5cf9d555f37f3 --- /dev/null +++ b/ext/standard/tests/array/sort/sort_variation_numeric_strings.phpt @@ -0,0 +1,241 @@ +--TEST-- +Test sort() function: SORT_REGULAR with numeric string edge cases +--FILE-- + +--EXPECTF-- +*** Testing sort() : SORT_REGULAR with numeric edge cases *** + +-- Test 1: Empty string and zero variations -- +array(4) { + [0]=> + string(0) "" + [1]=> + string(1) "0" + [2]=> + string(2) "00" + [3]=> + string(1) "A" +} + +-- Test 2: Numeric strings with whitespace and signs -- +array(5) { + [0]=> + string(2) "-0" + [1]=> + string(1) "0" + [2]=> + string(2) " 5" + [3]=> + string(2) "+5" + [4]=> + string(1) "A" +} + +-- Test 3: Scientific notation and special floats -- +array(5) { + [0]=> + string(3) "5e2" + [1]=> + string(3) "500" + [2]=> + string(4) "-INF" + [3]=> + string(3) "INF" + [4]=> + string(3) "NAN" +} + +-- Test 4: Hexadecimal, binary and decimal strings -- +array(3) { + [0]=> + string(2) "16" + [1]=> + string(7) "0b10000" + [2]=> + string(4) "0x10" +} + +-- Test 5: Mixed integers and numeric strings -- +array(5) { + [0]=> + string(0) "" + [1]=> + int(5) + [2]=> + int(10) + [3]=> + string(2) "10" + [4]=> + string(2) "3A" +} + +-- Test 6: LONG_MAX boundary -- +array(3) { + [0]=> + string(19) "9223372036854775807" + [1]=> + string(19) "9223372036854775808" + [2]=> + %r(int\(9223372036854775807\)|float\(9\.22337203685477[0-9]E\+18\))%r +} + +-- Test 7: Leading/trailing whitespace -- +array(5) { + [0]=> + string(1) "5" + [1]=> + string(2) " 5" + [2]=> + string(2) "5 " + [3]=> + string(3) " 5 " + [4]=> + string(1) "A" +} + +-- Test 8: Zero variations with signs -- +array(5) { + [0]=> + string(1) "0" + [1]=> + string(2) "-0" + [2]=> + string(2) "+0" + [3]=> + string(3) "0.0" + [4]=> + string(4) "-0.0" +} + +-- Test 9: Multiple plus/minus signs -- +array(5) { + [0]=> + string(1) "5" + [1]=> + string(3) "++5" + [2]=> + string(3) "+-5" + [3]=> + string(3) "-+5" + [4]=> + string(3) "--5" +} + +-- Test 10: Decimal point variations -- +array(5) { + [0]=> + string(2) "0." + [1]=> + string(2) ".0" + [2]=> + string(3) "0.0" + [3]=> + string(1) "." + [4]=> + string(1) "A" +} + +-- Test 11: Leading zeros with different values -- +array(5) { + [0]=> + string(2) "00" + [1]=> + string(1) "0" + [2]=> + string(2) "01" + [3]=> + string(3) "001" + [4]=> + string(1) "1" +} + +-- Test 12: Scientific notation variations -- +array(5) { + [0]=> + string(4) "1e-2" + [1]=> + string(3) "1e2" + [2]=> + string(3) "1E2" + [3]=> + string(4) "1e+2" + [4]=> + string(3) "100" +} + +-- Test 13: Consistency check -- +All runs produce same result: yes +Done