From aa738ff95f7f2321e9ea606af0838525430cd7e4 Mon Sep 17 00:00:00 2001 From: Jason Marble Date: Sun, 26 Oct 2025 18:16:40 -0700 Subject: [PATCH] Fix #20262: array_unique() with SORT_REGULAR misses duplicates Fixed transitivity violation in SORT_REGULAR comparison that caused sort() and array_unique() to produce incorrect results with mixed numeric and non-numeric strings. The fix adds special handling in php_array_data_compare_unstable_i() to consistently order non-numeric strings before numeric strings when one is numeric and one is not. This ensures transitivity for sorting operations. Importantly, this fix does NOT change the behavior of comparison operators like <=>, maintaining backward compatibility. The fix only affects sorting and array_unique() operations with SORT_REGULAR, similar to how enum comparison is handled specially for sorting. --- ext/standard/array.c | 14 ++++ ext/standard/tests/array/gh20262.phpt | 99 +++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 ext/standard/tests/array/gh20262.phpt diff --git a/ext/standard/array.c b/ext/standard/array.c index 4097d71899011..a3c0ae4d81633 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -305,6 +305,20 @@ static zend_always_inline int php_array_data_compare_unstable_i(Bucket *f, Bucke return -1; } } + + zval *lhs = &f->val; + ZVAL_DEREF(lhs); + if (Z_TYPE_P(lhs) == IS_STRING && Z_TYPE_P(rhs) == IS_STRING) { + bool lhs_is_numeric = is_numeric_string(Z_STRVAL_P(lhs), Z_STRLEN_P(lhs), NULL, NULL, false); + bool rhs_is_numeric = is_numeric_string(Z_STRVAL_P(rhs), Z_STRLEN_P(rhs), NULL, NULL, false); + + if (lhs_is_numeric != rhs_is_numeric) { + /* One is numeric, one is not. For transitivity, we order: + * non-numeric < numeric (to maintain common expectations). */ + return lhs_is_numeric ? 1 : -1; + } + } + return result; } /* }}} */ diff --git a/ext/standard/tests/array/gh20262.phpt b/ext/standard/tests/array/gh20262.phpt new file mode 100644 index 0000000000000..9874795a43556 --- /dev/null +++ b/ext/standard/tests/array/gh20262.phpt @@ -0,0 +1,99 @@ +--TEST-- +GH-20262: array_unique() with SORT_REGULAR fails to identify duplicates with mixed numeric/alphanumeric strings +--FILE-- + $val) { + if ($val === '5') { + $positions[] = $idx; + } +} + +$consecutive = true; +for ($i = 0; $i < count($positions) - 1; $i++) { + if ($positions[$i] + 1 !== $positions[$i + 1]) { + $consecutive = false; + break; + } +} + +echo "\nAll '5' values grouped together: " . ($consecutive ? "yes" : "no") . "\n"; + +// Verify <=> operator behavior is NOT changed +echo "\nComparison operator behavior (unchanged):\n"; +echo '"5" <=> "3A" = ' . ('5' <=> '3A') . " (lexicographic)\n"; +echo '"10" <=> "3A" = ' . ('10' <=> '3A') . " (lexicographic)\n"; +?> +--EXPECT-- +Input array: +array(6) { + [0]=> + string(1) "5" + [1]=> + string(2) "10" + [2]=> + string(1) "5" + [3]=> + string(2) "3A" + [4]=> + string(1) "5" + [5]=> + string(1) "5" +} + +Result of array_unique(): +array(3) { + [0]=> + string(1) "5" + [1]=> + string(2) "10" + [3]=> + string(2) "3A" +} + +Expected: 3 unique values ('5', '10', '3A') +Actual count: 3 + +Sorted array (equal values should be grouped): +array(6) { + [0]=> + string(2) "3A" + [1]=> + string(1) "5" + [2]=> + string(1) "5" + [3]=> + string(1) "5" + [4]=> + string(1) "5" + [5]=> + string(2) "10" +} + +All '5' values grouped together: yes + +Comparison operator behavior (unchanged): +"5" <=> "3A" = 1 (lexicographic) +"10" <=> "3A" = -1 (lexicographic)