Permalink
Browse files

Add spec for Shift-JIS encoded html that has no charset

  • Loading branch information...
1 parent 38c8c70 commit 1b0d33e087dd752d2563a1259c2e1c617f6fcb13 @tricknotes committed Jan 16, 2013
Showing with 20 additions and 0 deletions.
  1. +9 −0 test/files/shift_jis_no_charset.html
  2. +1 −0 test/helper.rb
  3. +10 −0 test/html/test_document_encoding.rb
@@ -0,0 +1,9 @@
+<html>
+ <head>
+ <title>こんにちは!</title>
+ </head>
+ <body>
+ <h1>This is a Shift_JIS File</h1>
+ <h2>こんにちは!</h2>
+ </body>
+</html>
View
@@ -26,6 +26,7 @@ class TestCase < MiniTest::Spec
PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
PO_XML_FILE = File.join(ASSETS_DIR, 'po.xml')
SHIFT_JIS_HTML = File.join(ASSETS_DIR, 'shift_jis.html')
+ SHIFT_JIS_NO_CHARSET= File.join(ASSETS_DIR, 'shift_jis_no_charset.html')
SHIFT_JIS_XML = File.join(ASSETS_DIR, 'shift_jis.xml')
SNUGGLES_FILE = File.join(ASSETS_DIR, 'snuggles.xml')
XML_FILE = File.join(ASSETS_DIR, 'staff.xml')
@@ -19,6 +19,16 @@ def test_encoding
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
end
+ def test_encoding_without_charset
+ doc = Nokogiri::HTML File.open(SHIFT_JIS_NO_CHARSET, 'r:cp932:cp932').read
+
+ hello = "こんにちは"
+
+ assert_match hello, doc.content
+ assert_match hello, doc.to_html(:encoding => 'UTF-8')
+ assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
+ end
+
def test_default_to_encoding_from_string
bad_charset = <<-eohtml
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

0 comments on commit 1b0d33e

Please sign in to comment.