Skip to content
Browse files

Specify character encoding in HTML HEAD

This adds an explicit declaration of the UTF-8 character encoding
to the Qt generated HTML dump (since we've just explicitly requested
UTF-8 conversion from QString, this declaration should be correct),
c.f. http://www.w3.org/International/O-charset

This patch fixes incorrect characters (e.g., A-hat for tab) in, e.g.,
Firefox's default rendering of exported HTML.

Applying the same fix to both HTML and XHTML export even though
Firefox and WebKit appear to assume UTF-8 for XHTML even without
an explicit declaration.
  • Loading branch information...
1 parent 1024e5d commit f467f96827d11b2420e921308177517f1f8ce49a @markvoorhies committed
Showing with 28 additions and 1 deletion.
  1. +28 −1 IPython/frontend/qt/console/console_widget.py
View
29 IPython/frontend/qt/console/console_widget.py
@@ -549,9 +549,11 @@ def export_html(self, parent = None, inline = False):
# N.B. this is overly restrictive, but Qt's output is
# predictable...
img_re = re.compile(r'<img src="(?P<name>[\d]+)" />')
+ html = self.fix_html_encoding(
+ str(self._control.toHtml().toUtf8()))
f.write(img_re.sub(
lambda x: self.image_tag(x, path = path, format = "png"),
- str(self._control.toHtml().toUtf8())))
+ html))
finally:
f.close()
return filename
@@ -578,6 +580,8 @@ def export_xhtml(self, parent = None):
assert(offset > -1)
html = ('<html xmlns="http://www.w3.org/1999/xhtml">\n'+
html[offset+6:])
+ # And now declare UTF-8 encoding
+ html = self.fix_html_encoding(html)
f.write(img_re.sub(
lambda x: self.image_tag(x, path = None, format = "svg"),
html))
@@ -586,6 +590,29 @@ def export_xhtml(self, parent = None):
return filename
return None
+ def fix_html_encoding(self, html):
+ """ Return html string, with a UTF-8 declaration added to <HEAD>.
+
+ Assumes that html is Qt generated and has already been UTF-8 encoded
+ and coerced to a python string. If the expected head element is
+ not found, the given object is returned unmodified.
+
+ This patching is needed for proper rendering of some characters
+ (e.g., indented commands) when viewing exported HTML on a local
+ system (i.e., without seeing an encoding declaration in an HTTP
+ header).
+
+ C.f. http://www.w3.org/International/O-charset for details.
+ """
+ offset = html.find("<head>")
+ if(offset > -1):
+ html = (html[:offset+6]+
+ '\n<meta http-equiv="Content-Type" '+
+ 'content="text/html; charset=utf-8" />\n'+
+ html[offset+6:])
+
+ return html
+
def image_tag(self, match, path = None, format = "png"):
""" Return (X)HTML mark-up for the image-tag given by match.

0 comments on commit f467f96

Please sign in to comment.
Something went wrong with that request. Please try again.