Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions ext/standard/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,20 @@ static zend_always_inline int php_array_data_compare_unstable_i(Bucket *f, Bucke
return -1;
}
}

zval *lhs = &f->val;
ZVAL_DEREF(lhs);
if (Z_TYPE_P(lhs) == IS_STRING && Z_TYPE_P(rhs) == IS_STRING) {
bool lhs_is_numeric = is_numeric_string(Z_STRVAL_P(lhs), Z_STRLEN_P(lhs), NULL, NULL, false);
bool rhs_is_numeric = is_numeric_string(Z_STRVAL_P(rhs), Z_STRLEN_P(rhs), NULL, NULL, false);

if (lhs_is_numeric != rhs_is_numeric) {
/* One is numeric, one is not. For transitivity, we order:
* non-numeric < numeric (to maintain common expectations). */
return lhs_is_numeric ? 1 : -1;
}
}

return result;
}
/* }}} */
Expand Down
99 changes: 99 additions & 0 deletions ext/standard/tests/array/gh20262.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
--TEST--
GH-20262: array_unique() with SORT_REGULAR fails to identify duplicates with mixed numeric/alphanumeric strings
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be good idea to verify this PR using naive impl. using 2 loops and == operator and random data.

Copy link
Author

@jmarble jmarble Oct 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's a simple test with a small amount of random data, looped 1k times:
https://gist.github.com/jmarble/c7debbadbca5f2dc21a160f0a48a65af

--FILE--
<?php
// Original bug report: array_unique() with SORT_REGULAR doesn't properly
// identify duplicates when array contains mixed numeric and alphanumeric strings
$units = ['5', '10', '5', '3A', '5', '5'];
$unique = array_unique($units, SORT_REGULAR);

echo "Input array:\n";
var_dump($units);

echo "\nResult of array_unique():\n";
var_dump($unique);

echo "\nExpected: 3 unique values ('5', '10', '3A')\n";
echo "Actual count: " . count($unique) . "\n";

// Additional test: verify sort() groups equal values correctly
$arr = ['5', '10', '5', '3A', '5', '5'];
sort($arr);

echo "\nSorted array (equal values should be grouped):\n";
var_dump($arr);

// Verify '5' values are consecutive
$positions = [];
foreach ($arr as $idx => $val) {
if ($val === '5') {
$positions[] = $idx;
}
}

$consecutive = true;
for ($i = 0; $i < count($positions) - 1; $i++) {
if ($positions[$i] + 1 !== $positions[$i + 1]) {
$consecutive = false;
break;
}
}

echo "\nAll '5' values grouped together: " . ($consecutive ? "yes" : "no") . "\n";

// Verify <=> operator behavior is NOT changed
echo "\nComparison operator behavior (unchanged):\n";
echo '"5" <=> "3A" = ' . ('5' <=> '3A') . " (lexicographic)\n";
echo '"10" <=> "3A" = ' . ('10' <=> '3A') . " (lexicographic)\n";
?>
--EXPECT--
Input array:
array(6) {
[0]=>
string(1) "5"
[1]=>
string(2) "10"
[2]=>
string(1) "5"
[3]=>
string(2) "3A"
[4]=>
string(1) "5"
[5]=>
string(1) "5"
}

Result of array_unique():
array(3) {
[0]=>
string(1) "5"
[1]=>
string(2) "10"
[3]=>
string(2) "3A"
}

Expected: 3 unique values ('5', '10', '3A')
Actual count: 3

Sorted array (equal values should be grouped):
array(6) {
[0]=>
string(2) "3A"
[1]=>
string(1) "5"
[2]=>
string(1) "5"
[3]=>
string(1) "5"
[4]=>
string(1) "5"
[5]=>
string(2) "10"
}

All '5' values grouped together: yes

Comparison operator behavior (unchanged):
"5" <=> "3A" = 1 (lexicographic)
"10" <=> "3A" = -1 (lexicographic)
Loading