diff --git a/src/sage/doctest/forker.py b/src/sage/doctest/forker.py index 6553d96b967..33d70c89b16 100644 --- a/src/sage/doctest/forker.py +++ b/src/sage/doctest/forker.py @@ -514,7 +514,13 @@ def _run(self, test, compileflags, out): finally: if self.debugger is not None: self.debugger.set_continue() # ==== Example Finished ==== - got = self._fakeout.getvalue() # the actual output + got = self._fakeout.getvalue() + try: + got = got.decode('utf-8') + except UnicodeDecodeError: + got = got.decode('latin1') + # the actual output + outcome = FAILURE # guilty until proved innocent or insane # If the example executed without raising any exceptions, @@ -768,7 +774,7 @@ def update_digests(self, example): sage: DTR.running_global_digest.hexdigest() '3cb44104292c3a3ab4da3112ce5dc35c' """ - s = pre_hash(get_source(example)) + s = pre_hash(get_source(example)).encode('utf-8') self.running_global_digest.update(s) self.running_doctest_digest.update(s) if example.predecessors is not None: diff --git a/src/sage/doctest/parsing.py b/src/sage/doctest/parsing.py index 77a1c8f8488..01746f7f558 100644 --- a/src/sage/doctest/parsing.py +++ b/src/sage/doctest/parsing.py @@ -22,8 +22,9 @@ # the License, or (at your option) any later version. # http://www.gnu.org/licenses/ #***************************************************************************** -from __future__ import print_function, absolute_import +from __future__ import print_function, absolute_import, unicode_literals from sage.misc.six import u +from six import text_type import re import sys @@ -291,7 +292,7 @@ def reduce_hex(fingerprints): return "%032x" % res -class MarkedOutput(str): +class MarkedOutput(text_type): """ A subclass of string with context for whether another string matches it. @@ -306,6 +307,9 @@ class MarkedOutput(str): 'abc' sage: s.rel_tol 0.0500000000000000 + + sage: MarkedOutput(u"56 µs") + u'56 \xb5s' """ random = False rel_tol = 0 diff --git a/src/sage/doctest/sources.py b/src/sage/doctest/sources.py index c8bd433e4c7..f3e05a6654e 100644 --- a/src/sage/doctest/sources.py +++ b/src/sage/doctest/sources.py @@ -19,7 +19,7 @@ # http://www.gnu.org/licenses/ #***************************************************************************** from __future__ import print_function -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import os import sys @@ -61,6 +61,9 @@ sagestart = re.compile(r"^\s*(>>> |sage: )\s*[^#\s]") untested = re.compile("(not implemented|not tested)") +# For parsing a PEP 0263 encoding declaration +pep_0263 = re.compile(r'coding[:=]\s*([-\w.]+)') + # Source line number in warning output doctest_line_number = re.compile(r"^\s*doctest:[0-9]") @@ -226,7 +229,7 @@ def _process_doc(self, doctests, doc, namespace, start): def _create_doctests(self, namespace, tab_okay=None): """ - Creates a list doctests defined in this source. + Creates a list of doctests defined in this source. This function collects functionality common to file and string sources, and is called by @@ -511,10 +514,13 @@ def __init__(self, path, options): base, ext = os.path.splitext(path) if ext in ('.py', '.pyx', '.pxd', '.pxi', '.sage', '.spyx'): self.__class__ = dynamic_class('PythonFileSource',(FileDocTestSource,PythonSource)) + self.encoding = "utf-8" elif ext == '.tex': self.__class__ = dynamic_class('TexFileSource',(FileDocTestSource,TexSource)) + self.encoding = "utf-8" elif ext == '.rst': self.__class__ = dynamic_class('RestFileSource',(FileDocTestSource,RestSource)) + self.encoding = "utf-8" else: raise ValueError("unknown file extension %r"%ext) @@ -536,10 +542,38 @@ def __iter__(self): 1 sage: 2 + 2 2 4 3 ''' + + The encoding is "utf-8" by default:: + + sage: FDS.encoding + 'utf-8' + + We create a file with a Latin-1 encoding without declaring it:: + + sage: s = "'''\nRegardons le polyn\xF4me...\n'''\n" + sage: open(filename, 'w').write(s) + sage: FDS = FileDocTestSource(filename, DocTestDefaults()) + sage: L = list(FDS) + Traceback (most recent call last): + ... + UnicodeDecodeError: 'utf8' codec can't decode byte 0xf4 in position 18: invalid continuation byte + + This works if we add a PEP 0263 encoding declaration:: + + sage: s = "#!/usr/bin/env python\n# -*- coding: latin-1 -*-\n" + s + sage: open(filename, 'w').write(s) + sage: FDS = FileDocTestSource(filename, DocTestDefaults()) + sage: L = list(FDS) + sage: FDS.encoding + 'latin-1' """ with open(self.path) as source: for lineno, line in enumerate(source): - yield lineno, line + if lineno < 2: + match = pep_0263.search(line) + if match: + self.encoding = match.group(1) + yield lineno, unicode(line, self.encoding) @lazy_attribute def printpath(self): diff --git a/src/sage/interfaces/gap.py b/src/sage/interfaces/gap.py index fb05adc78b8..dc1bfcbeaa9 100644 --- a/src/sage/interfaces/gap.py +++ b/src/sage/interfaces/gap.py @@ -188,6 +188,7 @@ from sage.structure.element import ModuleElement import re import os +import io import pexpect import time import platform @@ -1329,8 +1330,9 @@ def help(self, s, pager=True): else: tmp_to_use = self._local_tmpfile() self.eval('SetGAPDocTextTheme("none")') - self.eval(r'\$SAGE.tempfile := "%s";'%tmp_to_use) - line = Expect.eval(self, "? %s"%s) + gap_encoding = str(self('GAPInfo.TermEncoding;')) + self.eval(r'\$SAGE.tempfile := "%s";' % tmp_to_use) + line = Expect.eval(self, "? %s" % s) Expect.eval(self, "? 1") match = re.search("Page from (\d+)", line) if match is None: @@ -1339,7 +1341,7 @@ def help(self, s, pager=True): (sline,) = match.groups() if self.is_remote(): self._get_tmpfile() - F = open(self._local_tmpfile(),"r") + F = io.open(self._local_tmpfile(), "r", encoding=gap_encoding) help = F.read() if pager: from IPython.core.page import page diff --git a/src/sage/interfaces/r.py b/src/sage/interfaces/r.py index 9db603f8eb3..2406d3b7f50 100644 --- a/src/sage/interfaces/r.py +++ b/src/sage/interfaces/r.py @@ -668,7 +668,11 @@ def library(self, library_name): ... ImportError: ... """ - ret = self.eval('require("%s")'%library_name) + ret = self.eval('require("%s")' % library_name) + try: + ret = ret.decode('utf-8') + except UnicodeDecodeError: + ret = ret.decode('latin-1') # try hard to parse the message string in a locale-independent way if ' library(' in ret: # locale-independent key-word raise ImportError("%s"%ret) diff --git a/src/sage/plot/graphics.py b/src/sage/plot/graphics.py index 2c8a48c1085..8fd3998927b 100644 --- a/src/sage/plot/graphics.py +++ b/src/sage/plot/graphics.py @@ -693,8 +693,7 @@ def axes_labels(self, l=None): sage: c = circle((0,0), 1) sage: c.axes_labels(['axe des abscisses', u'axe des ordonnées']) sage: c._axes_labels - ('axe des abscisses', u'axe des ordonn\xc3\xa9es') - + ('axe des abscisses', u'axe des ordonn\xe9es') """ if l is None: try: diff --git a/src/sage/repl/rich_output/backend_ipython.py b/src/sage/repl/rich_output/backend_ipython.py index 894b7d274e8..5085c3dca58 100644 --- a/src/sage/repl/rich_output/backend_ipython.py +++ b/src/sage/repl/rich_output/backend_ipython.py @@ -236,7 +236,7 @@ def displayhook(self, plain_text, rich_output): sage: from sage.repl.rich_output import get_display_manager sage: dm = get_display_manager() sage: dm.displayhook(Foo()) - ({u'text/plain': u'Mot\xc3\xb6rhead'}, {}) + ({u'text/plain': u'Mot\xf6rhead'}, {}) """ if isinstance(rich_output, OutputPlainText): return ({u'text/plain': rich_output.text.get_unicode()}, {}) diff --git a/src/sage/structure/dynamic_class.py b/src/sage/structure/dynamic_class.py index 2269bd78e39..3d2ed2fae49 100644 --- a/src/sage/structure/dynamic_class.py +++ b/src/sage/structure/dynamic_class.py @@ -123,6 +123,7 @@ class MyPermutation(UniqueRepresentation, PermutationCycleType, PosetElement, Gr from sage.misc.classcall_metaclass import ClasscallMetaclass from sage.misc.inherit_comparison import InheritComparisonMetaclass, InheritComparisonClasscallMetaclass + def dynamic_class(name, bases, cls=None, reduction=None, doccls=None, prepend_cls_bases=True, cache=True): r""" @@ -309,6 +310,10 @@ def dynamic_class(name, bases, cls=None, reduction=None, doccls=None, """ bases = tuple(bases) #assert(len(bases) > 0 ) + try: + name = str(name) + except UnicodeEncodeError: + pass assert(isinstance(name, str)) # assert(cls is None or issubtype(type(cls), type) or type(cls) is classobj) if cache is True: