Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Merge pull request #19 from otherinbox/rm/html_special_chars

improving HTML special character handling
  • Loading branch information...
commit 75eeb127ddb09b52260bfec01472ac75c34290fe 2 parents 249d999 + a1be5c2
Ben Hamill benhamill authored
5 lib/muddle/filter/schema_validation.rb
@@ -2,12 +2,13 @@
2 2
3 3 module Muddle::Filter::SchemaValidation
4 4 def self.filter(body_string)
5   - doc = Nokogiri::XML(body_string)
  5 + doc = Nokogiri::HTML(body_string)
  6 + doc.encoding = 'UTF-8'
6 7
7 8 if doc.internal_subset.nil?
8 9 doc.create_internal_subset("html", "-//W3C//DTD XHTML 1.0 Strict//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd")
9 10 end
10 11
11   - doc.to_xhtml
  12 + doc.to_xhtml(:encoding => 'US-ASCII')
12 13 end
13 14 end
3  spec/filter/boilerplate_attributes_spec.rb
... ... @@ -1,6 +1,9 @@
1 1 require 'spec_helper'
  2 +require 'filter/unmodified_content'
2 3
3 4 describe Muddle::Filter::BoilerplateAttributes do
  5 + include_examples "unmodified content in minimal email", described_class
  6 +
4 7 context "with a minimal email" do
5 8 subject { Muddle::Filter::BoilerplateAttributes.filter(minimal_email_body) }
6 9
3  spec/filter/boilerplate_css_spec.rb
... ... @@ -1,6 +1,9 @@
1 1 require 'spec_helper'
  2 +require 'filter/unmodified_content'
2 3
3 4 describe Muddle::Filter::BoilerplateCSS do
  5 + include_examples "unmodified content in minimal email", described_class
  6 +
4 7 context "with a minimal email" do
5 8 subject { Muddle::Filter::BoilerplateCSS.filter(minimal_email_body) }
6 9
3  spec/filter/boilerplate_style_element_spec.rb
... ... @@ -1,6 +1,9 @@
1 1 require 'spec_helper'
  2 +require 'filter/unmodified_content'
2 3
3 4 describe Muddle::Filter::BoilerplateStyleElement do
  5 + include_examples "unmodified content in minimal email", described_class
  6 +
4 7 context "with a minimal email" do
5 8 subject { Muddle::Filter::BoilerplateStyleElement.filter(minimal_email_body) }
6 9
3  spec/filter/premailer_spec.rb
... ... @@ -1,6 +1,9 @@
1 1 require 'spec_helper'
  2 +require 'filter/unmodified_content'
2 3
3 4 describe Muddle::Filter::Premailer do
  5 + include_examples "unmodified content in minimal email", described_class
  6 +
4 7 context "with a minimal email" do
5 8 subject { Muddle::Filter::Premailer.filter(minimal_email_body) }
6 9
25 spec/filter/schema_validation_spec.rb
... ... @@ -0,0 +1,25 @@
  1 +# encoding: utf-8
  2 +require 'spec_helper'
  3 +require 'filter/unmodified_content'
  4 +
  5 +describe Muddle::Filter::SchemaValidation do
  6 + context "with a minimal email" do
  7 + subject { Muddle::Filter::SchemaValidation.filter(minimal_email_body) }
  8 +
  9 + it "converts HTML special characters to US-ASCII" do
  10 + subject.should include("©")
  11 + subject.should include("™")
  12 + subject.should include("¶")
  13 + end
  14 +
  15 + it "converts named entities to numbered entities" do
  16 + subject.should include("®")
  17 + end
  18 +
  19 + it "doesn't leave any special characters" do
  20 + subject.should_not include "©"
  21 + subject.should_not include "™"
  22 + subject.should_not include "¶"
  23 + end
  24 + end
  25 +end
22 spec/filter/unmodified_content.rb
... ... @@ -0,0 +1,22 @@
  1 +# encoding: utf-8
  2 +require 'spec_helper'
  3 +
  4 +shared_examples_for "unmodified content in minimal email" do |described_class|
  5 + describe "with content that shouldn't be modified" do
  6 + subject { described_class.filter(minimal_email_body) }
  7 +
  8 + it "doesn't modify UTF8 special characters" do
  9 + subject.should include "©"
  10 + subject.should include "™"
  11 + subject.should include "¶"
  12 + end
  13 +
  14 + it "doesn't modify named HTML special characters" do
  15 + subject.should include "©"
  16 + end
  17 +
  18 + it "doesn't modify numbered HTML special characters" do
  19 + subject.should include "™"
  20 + end
  21 + end
  22 +end
2  spec/muddle_spec.rb
... ... @@ -1,4 +1,4 @@
1   -require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
  1 +require 'spec_helper'
2 2
3 3 describe Muddle do
4 4 before(:each) do
11 spec/resources/minimal_email.html
@@ -32,6 +32,17 @@
32 32 <h3><a>h3 a</a></h3>
33 33 <h4><a>h4 a</a></h4>
34 34 <h5><a>h5 a</a></h5>
  35 +
  36 + <p id="utf8-special-chars">
  37 + ©
  38 + ™
  39 + ¶
  40 + </p>
  41 + <p id="html-special-chars">
  42 + &copy;
  43 + &#8482;
  44 + &reg;
  45 + </p>
35 46 </td>
36 47 </tr>
37 48 </table>

0 comments on commit 75eeb12

Please sign in to comment.
Something went wrong with that request. Please try again.