Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ext/mbstring/mbstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -3070,6 +3070,12 @@ static size_t count_demerits(struct candidate *array, size_t length, bool strict
uint32_t wchar_buf[128];
unsigned int finished = 0; /* For how many candidate encodings have we processed all the input? */

for (size_t i = 0; i < length; i++) {
if (array[i].in_len == 0) {
finished++;
}
}

while ((strict || length > 1) && finished < length) {
/* Iterate in reverse order to avoid moving candidates that can be eliminated. */
for (size_t i = length - 1; i != (size_t)-1; i--) {
Expand Down
13 changes: 13 additions & 0 deletions ext/mbstring/tests/mb_detect_encoding.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ print("Bad ASCII (strict): " . mb_detect_encoding("\xDD\x92", ['ASCII', 'UTF-8']
print("Bad ASCII/UTF-8, with more errors for ASCII (non-strict): " . mb_detect_encoding("\xD6\x8A\x8A", ['ASCII', 'UTF-8'], false) . "\n");
print("Bad ASCII/UTF-8, with more errors for ASCII (strict): " . var_export(mb_detect_encoding("\xD6\x8A\x8A", ['ASCII', 'UTF-8'], true), true) . "\n");

print("UTF-8 BOM (non-strict): " . mb_detect_encoding("\xEF\xBB\xBF", ["UTF-8", "ASCII"], false) . "\n");
print("UTF-8 BOM (strict): " . mb_detect_encoding("\xEF\xBB\xBF", ["UTF-8", "ASCII"], true) . "\n");
print("UTF-16BE BOM (non-strict): " . mb_detect_encoding("\xFE\xFF", ["UTF-8", "UTF-16BE", "UTF-16LE"], false) . "\n");
print("UTF-16BE BOM (strict): " . mb_detect_encoding("\xFE\xFF", ["UTF-8", "UTF-16BE", "UTF-16LE"], true) . "\n");
print("UTF-16LE BOM (non-strict): " . mb_detect_encoding("\xFF\xFE", ["UTF-8", "UTF-16BE", "UTF-16LE"], false) . "\n");
print("UTF-16LE BOM (strict): " . mb_detect_encoding("\xFF\xFE", ["UTF-8", "UTF-16BE", "UTF-16LE"], true) . "\n");

print("SJIS: " . mb_detect_encoding($sjis, 'SJIS', true) . "\n");
print("JIS: " . mb_detect_encoding($jis, 'JIS', true) . "\n");
print("EUC-JP (strict): " . mb_detect_encoding($euc_jp, 'UTF-8,EUC-JP,JIS', true) . "\n");
Expand Down Expand Up @@ -399,6 +406,12 @@ Bad ASCII (non-strict): UTF-8
Bad ASCII (strict): UTF-8
Bad ASCII/UTF-8, with more errors for ASCII (non-strict): UTF-8
Bad ASCII/UTF-8, with more errors for ASCII (strict): false
UTF-8 BOM (non-strict): UTF-8
UTF-8 BOM (strict): UTF-8
UTF-16BE BOM (non-strict): UTF-16BE
UTF-16BE BOM (strict): UTF-16BE
UTF-16LE BOM (non-strict): UTF-16LE
UTF-16LE BOM (strict): UTF-16LE
SJIS: SJIS
JIS: JIS
EUC-JP (strict): EUC-JP
Expand Down