Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fix problem with decoding of HTML that has mixture of unicode and ent…

…ities.

Original commits:
 - 08382e5
 - 8ced77c
  • Loading branch information...
commit 4ed2deb9739adaa83f7a3196dd145bdc23122d13 1 parent c3d9bff
@ziima ziima authored
View
9 openid/test/data/test_discover/unicode.html
@@ -0,0 +1,9 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html>
+ <head>
+ <title param="ěščřžýáíé &raquo;">Identity Page for Smoker</title>
+ </head>
+ <body>
+ <p>foo</p>
+ </body>
+</html>
View
7 openid/test/test_discover.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
import sys
import unittest
import datadriven
@@ -248,6 +249,12 @@ def test_404(self):
self.failUnlessRaises(DiscoveryFailure,
discover.discover, self.id_url + '/404')
+ def test_unicode(self):
+ self._discover(
+ content_type='text/html;charset=utf-8',
+ data=readDataFile('unicode.html'),
+ expected_services=0)
+
def test_noOpenID(self):
services = self._discover(content_type='text/plain',
data="junk",
View
18 openid/yadis/discover.py
@@ -1,7 +1,7 @@
# -*- test-case-name: openid.test.test_yadis_discover -*-
__all__ = ['discover', 'DiscoveryResult', 'DiscoveryFailure']
-from cStringIO import StringIO
+from StringIO import StringIO
from openid import fetchers
@@ -126,8 +126,22 @@ def whereIsYadis(resp):
# XXX: do we want to do something with content-type, like
# have a whitelist or a blacklist (for detecting that it's
# HTML)?
+
+ # Decode body by encoding of file
+ content_type = content_type or ''
+ encoding = content_type.rsplit(';', 1)
+ if len(encoding) == 2 and encoding[1].strip().startswith('charset='):
+ encoding = encoding[1].split('=', 1)[1]
+ else:
+ encoding = 'UTF-8'
+
+ try:
+ content = resp.body.decode(encoding)
+ except UnicodeError:
+ content = resp.body
+
try:
- yadis_loc = findHTMLMeta(StringIO(resp.body))
+ yadis_loc = findHTMLMeta(StringIO(content))
except MetaNotFound:
pass
Please sign in to comment.
Something went wrong with that request. Please try again.