From 0f46921a97677b83b106366c805063105c5e9f20 Mon Sep 17 00:00:00 2001
From: Zaid Zawaideh <zaid@sandglaz.com>
Date: Mon, 11 Feb 2013 14:17:32 -0500
Subject: [PATCH 1/6] added handling of invalide UTF-8 byte sequence exceptions

---
 lib/openid/consumer/html_parse.rb |  6 +++++-
 test/test_linkparse.rb            | 10 +++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/lib/openid/consumer/html_parse.rb b/lib/openid/consumer/html_parse.rb
index fca39456..222fc0b9 100644
--- a/lib/openid/consumer/html_parse.rb
+++ b/lib/openid/consumer/html_parse.rb
@@ -34,7 +34,11 @@ def OpenID.unescape_hash(h)
 
 
   def OpenID.parse_link_attrs(html)
-    stripped = html.gsub(REMOVED_RE,'')
+    begin
+      stripped = html.gsub(REMOVED_RE,'')
+    rescue ArgumentError
+      stripped = html.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(REMOVED_RE,'')
+    end
     parser = HTMLTokenizer.new(stripped)
 
     links = []
diff --git a/test/test_linkparse.rb b/test/test_linkparse.rb
index 6360d507..ef19a9c2 100644
--- a/test/test_linkparse.rb
+++ b/test/test_linkparse.rb
@@ -84,7 +84,8 @@ def test_linkparse
           assert(false, "datafile parsing error: bad header #{h}")
         end
       }
-      links = OpenID::parse_link_attrs(html)
+
+      links = OpenID::parse_link_attrs(html.force_encoding('UTF-8'))
       
       found = links.dup
       expected = expected_links.dup
@@ -97,5 +98,12 @@ def test_linkparse
       end
     }
     assert_equal(numtests, testnum, "Number of tests")
+
+    # test handling of invalid UTF-8 byte sequences
+    html = "<html><body>hello joel\255</body></html>".force_encoding("UTF-8")
+    assert_nothing_raised do 
+      OpenID::parse_link_attrs(html)
+    end
+
   end
 end

From a647c12316e859dfbf2a10eb812f3d1d585baddb Mon Sep 17 00:00:00 2001
From: Zaid <zawaideh@gmail.com>
Date: Tue, 12 Feb 2013 14:06:58 -0500
Subject: [PATCH 2/6] Update to use 1.8 style hash

---
 lib/openid/consumer/html_parse.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/openid/consumer/html_parse.rb b/lib/openid/consumer/html_parse.rb
index 222fc0b9..559cd4fe 100644
--- a/lib/openid/consumer/html_parse.rb
+++ b/lib/openid/consumer/html_parse.rb
@@ -37,7 +37,7 @@ def OpenID.parse_link_attrs(html)
     begin
       stripped = html.gsub(REMOVED_RE,'')
     rescue ArgumentError
-      stripped = html.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '').gsub(REMOVED_RE,'')
+      stripped = html.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '').gsub(REMOVED_RE,'')
     end
     parser = HTMLTokenizer.new(stripped)
 

From abdcf65e1e7c6cc58aded36c86db866384ae639b Mon Sep 17 00:00:00 2001
From: Zaid <zawaideh@gmail.com>
Date: Tue, 12 Feb 2013 14:34:48 -0500
Subject: [PATCH 3/6] fix problem in force_encoding in tests

force_encoding doesn't exist in ruby 1.8. Pass string as is in 1.8 and only force_encoding if string responds to it
---
 test/test_linkparse.rb | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/test/test_linkparse.rb b/test/test_linkparse.rb
index ef19a9c2..dc9d394a 100644
--- a/test/test_linkparse.rb
+++ b/test/test_linkparse.rb
@@ -84,8 +84,8 @@ def test_linkparse
           assert(false, "datafile parsing error: bad header #{h}")
         end
       }
-
-      links = OpenID::parse_link_attrs(html.force_encoding('UTF-8'))
+      html = html.force_encoding('UTF-8') if html.respond_to? :force_encoding
+      links = OpenID::parse_link_attrs(html)
       
       found = links.dup
       expected = expected_links.dup
@@ -100,7 +100,8 @@ def test_linkparse
     assert_equal(numtests, testnum, "Number of tests")
 
     # test handling of invalid UTF-8 byte sequences
-    html = "<html><body>hello joel\255</body></html>".force_encoding("UTF-8")
+    html = "<html><body>hello joel\255</body></html>"
+    html = html.force_encoding('UTF-8') if html.respond_to? :force_encoding
     assert_nothing_raised do 
       OpenID::parse_link_attrs(html)
     end

From 542cac428d93aed3101677a591334650b8db1f4e Mon Sep 17 00:00:00 2001
From: Zaid Zawaideh <zaid@sandglaz.com>
Date: Tue, 12 Feb 2013 14:54:58 -0500
Subject: [PATCH 4/6] catch Encoding::UndefinedConversionError for
 compatibility with JRuby 1.9 mode

---
 lib/openid/consumer/html_parse.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/openid/consumer/html_parse.rb b/lib/openid/consumer/html_parse.rb
index 559cd4fe..2e12bb6c 100644
--- a/lib/openid/consumer/html_parse.rb
+++ b/lib/openid/consumer/html_parse.rb
@@ -36,7 +36,7 @@ def OpenID.unescape_hash(h)
   def OpenID.parse_link_attrs(html)
     begin
       stripped = html.gsub(REMOVED_RE,'')
-    rescue ArgumentError
+    rescue ArgumentError, Encoding::UndefinedConversionError
       stripped = html.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '').gsub(REMOVED_RE,'')
     end
     parser = HTMLTokenizer.new(stripped)

From b1d0c38fe8dd6d64b58ce417ac20e76900807781 Mon Sep 17 00:00:00 2001
From: Zaid Zawaideh <zaid@sandglaz.com>
Date: Tue, 12 Feb 2013 15:23:36 -0500
Subject: [PATCH 5/6] jruby 1.9 mode still complaining about string encoding,
 try forcing it immediately

---
 test/test_linkparse.rb | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/test_linkparse.rb b/test/test_linkparse.rb
index dc9d394a..9a504290 100644
--- a/test/test_linkparse.rb
+++ b/test/test_linkparse.rb
@@ -100,8 +100,11 @@ def test_linkparse
     assert_equal(numtests, testnum, "Number of tests")
 
     # test handling of invalid UTF-8 byte sequences
-    html = "<html><body>hello joel\255</body></html>"
-    html = html.force_encoding('UTF-8') if html.respond_to? :force_encoding
+    if "".respond_to? :force_encoding
+      html = "<html><body>hello joel\255</body></html>".force_encoding('UTF-8') 
+    else
+      html = "<html><body>hello joel\255</body></html>"
+    end
     assert_nothing_raised do 
       OpenID::parse_link_attrs(html)
     end

From d3dca2faa653695cdaf2823ee6b9c4622a83ece3 Mon Sep 17 00:00:00 2001
From: Zaid Zawaideh <zaid@sandglaz.com>
Date: Tue, 12 Feb 2013 23:09:04 -0500
Subject: [PATCH 6/6] fixed issue with jruby in 1.9 mode not handling string
 encoding from binary properly. Now falling back to using ASCII as source

---
 lib/openid/consumer/html_parse.rb | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/openid/consumer/html_parse.rb b/lib/openid/consumer/html_parse.rb
index 2e12bb6c..e127dbef 100644
--- a/lib/openid/consumer/html_parse.rb
+++ b/lib/openid/consumer/html_parse.rb
@@ -36,8 +36,12 @@ def OpenID.unescape_hash(h)
   def OpenID.parse_link_attrs(html)
     begin
       stripped = html.gsub(REMOVED_RE,'')
-    rescue ArgumentError, Encoding::UndefinedConversionError
-      stripped = html.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '').gsub(REMOVED_RE,'')
+    rescue ArgumentError
+      begin
+        stripped = html.encode('UTF-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '').gsub(REMOVED_RE,'')
+      rescue Encoding::UndefinedConversionError #needed for a problem in JRuby where it can't handle the conversion
+        stripped = html.encode('UTF-8', 'ASCII', :invalid => :replace, :undef => :replace, :replace => '').gsub(REMOVED_RE,'')
+      end
     end
     parser = HTMLTokenizer.new(stripped)