Add encoding alias for libxml2 #836

Merged
merged 2 commits into from Oct 22, 2013
View
@@ -109,6 +109,10 @@ def Slop(*args, &block)
Nokogiri(*args, &block).slop!
end
end
+
+ ###
+ # Alias for libxml2.
+ EncodingHandler.alias('CP932', 'Windows-31J')
end
###
@@ -0,0 +1,9 @@
+<html>
+ <head>
+ <title>こんにちは!</title>
+ </head>
+ <body>
+ <h1>This is a Shift_JIS File</h1>
+ <h2>こんにちは!</h2>
+ </body>
+</html>
View
@@ -26,6 +26,7 @@ class TestCase < MiniTest::Spec
PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
PO_XML_FILE = File.join(ASSETS_DIR, 'po.xml')
SHIFT_JIS_HTML = File.join(ASSETS_DIR, 'shift_jis.html')
+ SHIFT_JIS_NO_CHARSET= File.join(ASSETS_DIR, 'shift_jis_no_charset.html')
SHIFT_JIS_XML = File.join(ASSETS_DIR, 'shift_jis.xml')
SNUGGLES_FILE = File.join(ASSETS_DIR, 'snuggles.xml')
XML_FILE = File.join(ASSETS_DIR, 'staff.xml')
@@ -19,6 +19,16 @@ def test_encoding
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
end
+ def test_encoding_without_charset
+ doc = Nokogiri::HTML File.open(SHIFT_JIS_NO_CHARSET, 'r:cp932:cp932').read
+
+ hello = "こんにちは"
+
+ assert_match hello, doc.content
+ assert_match hello, doc.to_html(:encoding => 'UTF-8')
+ assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
+ end
+
def test_default_to_encoding_from_string
bad_charset = <<-eohtml
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">