Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge pull request #19 from otherinbox/rm/html_special_chars

improving HTML special character handling
  • Loading branch information...
commit 75eeb127ddb09b52260bfec01472ac75c34290fe 2 parents 249d999 + a1be5c2
Ben Hamill benhamill authored
5 lib/muddle/filter/schema_validation.rb
View
@@ -2,12 +2,13 @@
module Muddle::Filter::SchemaValidation
def self.filter(body_string)
- doc = Nokogiri::XML(body_string)
+ doc = Nokogiri::HTML(body_string)
+ doc.encoding = 'UTF-8'
if doc.internal_subset.nil?
doc.create_internal_subset("html", "-//W3C//DTD XHTML 1.0 Strict//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd")
end
- doc.to_xhtml
+ doc.to_xhtml(:encoding => 'US-ASCII')
end
end
3  spec/filter/boilerplate_attributes_spec.rb
View
@@ -1,6 +1,9 @@
require 'spec_helper'
+require 'filter/unmodified_content'
describe Muddle::Filter::BoilerplateAttributes do
+ include_examples "unmodified content in minimal email", described_class
+
context "with a minimal email" do
subject { Muddle::Filter::BoilerplateAttributes.filter(minimal_email_body) }
3  spec/filter/boilerplate_css_spec.rb
View
@@ -1,6 +1,9 @@
require 'spec_helper'
+require 'filter/unmodified_content'
describe Muddle::Filter::BoilerplateCSS do
+ include_examples "unmodified content in minimal email", described_class
+
context "with a minimal email" do
subject { Muddle::Filter::BoilerplateCSS.filter(minimal_email_body) }
3  spec/filter/boilerplate_style_element_spec.rb
View
@@ -1,6 +1,9 @@
require 'spec_helper'
+require 'filter/unmodified_content'
describe Muddle::Filter::BoilerplateStyleElement do
+ include_examples "unmodified content in minimal email", described_class
+
context "with a minimal email" do
subject { Muddle::Filter::BoilerplateStyleElement.filter(minimal_email_body) }
3  spec/filter/premailer_spec.rb
View
@@ -1,6 +1,9 @@
require 'spec_helper'
+require 'filter/unmodified_content'
describe Muddle::Filter::Premailer do
+ include_examples "unmodified content in minimal email", described_class
+
context "with a minimal email" do
subject { Muddle::Filter::Premailer.filter(minimal_email_body) }
25 spec/filter/schema_validation_spec.rb
View
@@ -0,0 +1,25 @@
+# encoding: utf-8
+require 'spec_helper'
+require 'filter/unmodified_content'
+
+describe Muddle::Filter::SchemaValidation do
+ context "with a minimal email" do
+ subject { Muddle::Filter::SchemaValidation.filter(minimal_email_body) }
+
+ it "converts HTML special characters to US-ASCII" do
+ subject.should include("©")
+ subject.should include("™")
+ subject.should include("¶")
+ end
+
+ it "converts named entities to numbered entities" do
+ subject.should include("®")
+ end
+
+ it "doesn't leave any special characters" do
+ subject.should_not include "©"
+ subject.should_not include ""
+ subject.should_not include ""
+ end
+ end
+end
22 spec/filter/unmodified_content.rb
View
@@ -0,0 +1,22 @@
+# encoding: utf-8
+require 'spec_helper'
+
+shared_examples_for "unmodified content in minimal email" do |described_class|
+ describe "with content that shouldn't be modified" do
+ subject { described_class.filter(minimal_email_body) }
+
+ it "doesn't modify UTF8 special characters" do
+ subject.should include "©"
+ subject.should include ""
+ subject.should include ""
+ end
+
+ it "doesn't modify named HTML special characters" do
+ subject.should include "©"
+ end
+
+ it "doesn't modify numbered HTML special characters" do
+ subject.should include "™"
+ end
+ end
+end
2  spec/muddle_spec.rb
View
@@ -1,4 +1,4 @@
-require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
+require 'spec_helper'
describe Muddle do
before(:each) do
11 spec/resources/minimal_email.html
View
@@ -32,6 +32,17 @@
<h3><a>h3 a</a></h3>
<h4><a>h4 a</a></h4>
<h5><a>h5 a</a></h5>
+
+ <p id="utf8-special-chars">
+ ©
+ ™
+ ¶
+ </p>
+ <p id="html-special-chars">
+ &copy;
+ &#8482;
+ &reg;
+ </p>
</td>
</tr>
</table>
Please sign in to comment.
Something went wrong with that request. Please try again.