Skip to content

Commit

Permalink
Decode HTTP responses with correct charset before trying to parse.
Browse files Browse the repository at this point in the history
  • Loading branch information
carljm committed Jun 12, 2013
1 parent ec90e13 commit 05e3770
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
4 changes: 4 additions & 0 deletions micawber/compat.py
Expand Up @@ -7,8 +7,12 @@
from urllib.parse import urlencode
text_type = str
string_types = str,
def get_charset(response):
return response.headers.get_param('charset')
else:
from urllib2 import Request, urlopen, URLError, HTTPError
from urllib import urlencode
text_type = unicode
string_types = basestring,
def get_charset(response):
return response.headers.getparam('charset')
7 changes: 5 additions & 2 deletions micawber/providers.py
Expand Up @@ -2,7 +2,7 @@
import pickle
import re
import socket
from .compat import urlencode, Request, urlopen, URLError, HTTPError
from .compat import urlencode, Request, urlopen, URLError, HTTPError, get_charset
try:
import simplejson as json
except ImportError:
Expand Down Expand Up @@ -35,7 +35,10 @@ def fetch(self, url):
if resp.code < 200 or resp.code >= 300:
return False

content = resp.read()
# by RFC, default HTTP charset is ISO-8859-1
charset = get_charset(resp) or 'iso-8859-1'

content = resp.read().decode(charset)
resp.close()
return content

Expand Down

0 comments on commit 05e3770

Please sign in to comment.