@@ -31,31 +31,56 @@ public function __construct(Extractor $extractor)
3131        preg_match ('/charset=(?:"| \')?(.*?)(?=$|\s|;|"| \'|>)/i ' , $ contentType , $ match );
3232        if  (!empty ($ match [1 ])) {
3333            $ encoding  = trim ($ match [1 ], ', ' );
34-             try  {
35-                 $ ret  = mb_encoding_aliases ($ encoding  ?? '' );
36-                 if  ($ ret  === false ) {
37-                     $ encoding  = null ;
38-                 }
39-             } catch  (\ValueError   $ exception ) {
40-                 $ encoding  = null ;
41-             }
34+             $ encoding  = $ this  ->getValidEncoding ($ encoding );
4235        }
4336        if  (is_null ($ encoding ) && !empty ($ html )) {
4437            preg_match ('/charset=(?:"| \')?(.*?)(?=$|\s|;|"| \'|>)/i ' , $ html , $ match );
4538            if  (!empty ($ match [1 ])) {
4639                $ encoding  = trim ($ match [1 ], ', ' );
40+                 $ encoding  = $ this  ->getValidEncoding ($ encoding );
4741            }
42+         }
43+         $ this  ->document  = !empty ($ html ) ? Parser::parse ($ html , $ encoding ) : new  DOMDocument ();
44+         $ this  ->initXPath ();
45+     }
46+ 
47+     /** 
48+      * Get valid encoding name if it exists, otherwise return null 
49+      * 
50+      * Uses mb_encoding_aliases() to verify the encoding is valid. 
51+      * 
52+      * TODO: When dropping PHP 7.4 support, remove the PHP_VERSION_ID < 80000 branch. 
53+      * PHP version differences: 
54+      * - PHP 7.4: mb_encoding_aliases() returns false for invalid encoding and throws Warning for empty string 
55+      * - PHP 8.0+: mb_encoding_aliases() throws ValueError for invalid/empty encoding 
56+      * 
57+      * @see https://www.php.net/manual/en/function.mb-encoding-aliases.php 
58+      */ 
59+     private  function  getValidEncoding (?string  $ encoding ): ?string 
60+     {
61+         if  (PHP_VERSION_ID  < 80000 ) {
62+             // PHP 7.4: Check return value (false = invalid encoding) 
63+             // Need to check empty() first to avoid Warning 
64+             // TODO: Remove this entire branch when PHP 7.4 support is dropped 
65+             if  (empty ($ encoding )) {
66+                 return  null ;
67+             }
68+             $ ret  = mb_encoding_aliases ($ encoding );
69+             if  ($ ret  === false ) {
70+                 return  null ;
71+             } else  {
72+                 return  $ encoding ;
73+             }
74+         } else  {
75+             // PHP 8.0+: ValueError exception is thrown for invalid/empty encoding 
4876            try  {
49-                 $ ret  = mb_encoding_aliases ($ encoding  ?? '' );
50-                 if  ($ ret  === false ) {
51-                     $ encoding  = null ;
52-                 }
77+                 $ aliases  = mb_encoding_aliases ($ encoding  ?? '' );
78+                 // Check if aliases array is not empty (valid encoding should have at least one alias) 
79+                 return  !empty ($ aliases ) ? $ encoding  : null ;
5380            } catch  (\ValueError   $ exception ) {
54-                 $ encoding  =  null ;
81+                 return  null ;
5582            }
5683        }
57-         $ this  ->document  = !empty ($ html ) ? Parser::parse ($ html , $ encoding ) : new  DOMDocument ();
58-         $ this  ->initXPath ();
5984    }
6085
6186    private  function  initXPath ()
0 commit comments