diff --git a/lib/nokogiri/html/document.rb b/lib/nokogiri/html/document.rb index ba10b7a126..de2a883fd9 100644 --- a/lib/nokogiri/html/document.rb +++ b/lib/nokogiri/html/document.rb @@ -6,8 +6,12 @@ class Document < Nokogiri::XML::Document # then nil is returned. def meta_encoding meta = meta_content_type and - match = /charset\s*=\s*([\w-]+)/i.match(meta['content']) and + match = /charset\s*=\s*([\w-]+)/i.match(meta['content']) + if match match[1] + else + cs = at('meta[@charset]') and cs[:charset] + end end ### diff --git a/test/html/test_document.rb b/test/html/test_document.rb index b1cf4c66f6..12126d4939 100644 --- a/test/html/test_document.rb +++ b/test/html/test_document.rb @@ -166,6 +166,20 @@ def test_meta_encoding_handles_malformed_content_charset assert_nil doc.meta_encoding end + def test_meta_encoding_checks_charset + doc = Nokogiri::HTML(<<-eohtml) + + + + + + foo + + + eohtml + assert_equal 'UTF-8', doc.meta_encoding + end + def test_meta_encoding= @html.meta_encoding = 'EUC-JP' assert_equal 'EUC-JP', @html.meta_encoding