Skip to content

Commit c250bb9

Browse files
committed
fix: handle mb_encoding_aliases() behavior differences between PHP 7.4 and 8.0+
1 parent 5712cf3 commit c250bb9

File tree

1 file changed

+40
-15
lines changed

1 file changed

+40
-15
lines changed

src/Document.php

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,31 +31,56 @@ public function __construct(Extractor $extractor)
3131
preg_match('/charset=(?:"|\')?(.*?)(?=$|\s|;|"|\'|>)/i', $contentType, $match);
3232
if (!empty($match[1])) {
3333
$encoding = trim($match[1], ',');
34-
try {
35-
$ret = mb_encoding_aliases($encoding ?? '');
36-
if ($ret === false) {
37-
$encoding = null;
38-
}
39-
} catch (\ValueError $exception) {
40-
$encoding = null;
41-
}
34+
$encoding = $this->getValidEncoding($encoding);
4235
}
4336
if (is_null($encoding) && !empty($html)) {
4437
preg_match('/charset=(?:"|\')?(.*?)(?=$|\s|;|"|\'|>)/i', $html, $match);
4538
if (!empty($match[1])) {
4639
$encoding = trim($match[1], ',');
40+
$encoding = $this->getValidEncoding($encoding);
4741
}
42+
}
43+
$this->document = !empty($html) ? Parser::parse($html, $encoding) : new DOMDocument();
44+
$this->initXPath();
45+
}
46+
47+
/**
48+
* Get valid encoding name if it exists, otherwise return null
49+
*
50+
* Uses mb_encoding_aliases() to verify the encoding is valid.
51+
*
52+
* TODO: When dropping PHP 7.4 support, remove the PHP_VERSION_ID < 80000 branch.
53+
* PHP version differences:
54+
* - PHP 7.4: mb_encoding_aliases() returns false for invalid encoding and throws Warning for empty string
55+
* - PHP 8.0+: mb_encoding_aliases() throws ValueError for invalid/empty encoding
56+
*
57+
* @see https://www.php.net/manual/en/function.mb-encoding-aliases.php
58+
*/
59+
private function getValidEncoding(?string $encoding): ?string
60+
{
61+
if (PHP_VERSION_ID < 80000) {
62+
// PHP 7.4: Check return value (false = invalid encoding)
63+
// Need to check empty() first to avoid Warning
64+
// TODO: Remove this entire branch when PHP 7.4 support is dropped
65+
if (empty($encoding)) {
66+
return null;
67+
}
68+
$ret = mb_encoding_aliases($encoding);
69+
if ($ret === false) {
70+
return null;
71+
} else {
72+
return $encoding;
73+
}
74+
} else {
75+
// PHP 8.0+: ValueError exception is thrown for invalid/empty encoding
4876
try {
49-
$ret = mb_encoding_aliases($encoding ?? '');
50-
if ($ret === false) {
51-
$encoding = null;
52-
}
77+
$aliases = mb_encoding_aliases($encoding ?? '');
78+
// Check if aliases array is not empty (valid encoding should have at least one alias)
79+
return !empty($aliases) ? $encoding : null;
5380
} catch (\ValueError $exception) {
54-
$encoding = null;
81+
return null;
5582
}
5683
}
57-
$this->document = !empty($html) ? Parser::parse($html, $encoding) : new DOMDocument();
58-
$this->initXPath();
5984
}
6085

6186
private function initXPath()

0 commit comments

Comments
 (0)