Skip to content

Commit

Permalink
Trac #14153: Add Unicode support to the doctesting framework
Browse files Browse the repository at this point in the history
Support
1. Doctest (i.e. `sage:`) lines containing non-ASCII characters.
1. Doctest results containing non-ASCII characters.
1. Print statements in doctests outputting unicode.
1. Source files with a [http://www.python.org/dev/peps/pep-0263/ PEP
0263] encoding declaration.

see also #18370

URL: https://trac.sagemath.org/14153
Reported by: jdemeyer
Ticket author(s): Frédéric Chapoton
Reviewer(s): Erik Bray, Volker Bruan, David Roe
  • Loading branch information
Release Manager authored and vbraun committed Aug 9, 2017
2 parents 8ca3ace + ed1e3b5 commit 44fba6b
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 14 deletions.
10 changes: 8 additions & 2 deletions src/sage/doctest/forker.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,13 @@ def _run(self, test, compileflags, out):
finally:
if self.debugger is not None:
self.debugger.set_continue() # ==== Example Finished ====
got = self._fakeout.getvalue() # the actual output
got = self._fakeout.getvalue()
try:
got = got.decode('utf-8')
except UnicodeDecodeError:
got = got.decode('latin1')
# the actual output

outcome = FAILURE # guilty until proved innocent or insane

# If the example executed without raising any exceptions,
Expand Down Expand Up @@ -768,7 +774,7 @@ def update_digests(self, example):
sage: DTR.running_global_digest.hexdigest()
'3cb44104292c3a3ab4da3112ce5dc35c'
"""
s = pre_hash(get_source(example))
s = pre_hash(get_source(example)).encode('utf-8')
self.running_global_digest.update(s)
self.running_doctest_digest.update(s)
if example.predecessors is not None:
Expand Down
8 changes: 6 additions & 2 deletions src/sage/doctest/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
# the License, or (at your option) any later version.
# http://www.gnu.org/licenses/
#*****************************************************************************
from __future__ import print_function, absolute_import
from __future__ import print_function, absolute_import, unicode_literals
from sage.misc.six import u
from six import text_type

import re
import sys
Expand Down Expand Up @@ -291,7 +292,7 @@ def reduce_hex(fingerprints):
return "%032x" % res


class MarkedOutput(str):
class MarkedOutput(text_type):
"""
A subclass of string with context for whether another string
matches it.
Expand All @@ -306,6 +307,9 @@ class MarkedOutput(str):
'abc'
sage: s.rel_tol
0.0500000000000000
sage: MarkedOutput(u"56 µs")
u'56 \xb5s'
"""
random = False
rel_tol = 0
Expand Down
40 changes: 37 additions & 3 deletions src/sage/doctest/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# http://www.gnu.org/licenses/
#*****************************************************************************
from __future__ import print_function
from __future__ import absolute_import
from __future__ import absolute_import, unicode_literals

import os
import sys
Expand Down Expand Up @@ -61,6 +61,9 @@
sagestart = re.compile(r"^\s*(>>> |sage: )\s*[^#\s]")
untested = re.compile("(not implemented|not tested)")

# For parsing a PEP 0263 encoding declaration
pep_0263 = re.compile(r'coding[:=]\s*([-\w.]+)')

# Source line number in warning output
doctest_line_number = re.compile(r"^\s*doctest:[0-9]")

Expand Down Expand Up @@ -226,7 +229,7 @@ def _process_doc(self, doctests, doc, namespace, start):

def _create_doctests(self, namespace, tab_okay=None):
"""
Creates a list doctests defined in this source.
Creates a list of doctests defined in this source.
This function collects functionality common to file and string
sources, and is called by
Expand Down Expand Up @@ -511,10 +514,13 @@ def __init__(self, path, options):
base, ext = os.path.splitext(path)
if ext in ('.py', '.pyx', '.pxd', '.pxi', '.sage', '.spyx'):
self.__class__ = dynamic_class('PythonFileSource',(FileDocTestSource,PythonSource))
self.encoding = "utf-8"
elif ext == '.tex':
self.__class__ = dynamic_class('TexFileSource',(FileDocTestSource,TexSource))
self.encoding = "utf-8"
elif ext == '.rst':
self.__class__ = dynamic_class('RestFileSource',(FileDocTestSource,RestSource))
self.encoding = "utf-8"
else:
raise ValueError("unknown file extension %r"%ext)

Expand All @@ -536,10 +542,38 @@ def __iter__(self):
1 sage: 2 + 2
2 4
3 '''
The encoding is "utf-8" by default::
sage: FDS.encoding
'utf-8'
We create a file with a Latin-1 encoding without declaring it::
sage: s = "'''\nRegardons le polyn\xF4me...\n'''\n"
sage: open(filename, 'w').write(s)
sage: FDS = FileDocTestSource(filename, DocTestDefaults())
sage: L = list(FDS)
Traceback (most recent call last):
...
UnicodeDecodeError: 'utf8' codec can't decode byte 0xf4 in position 18: invalid continuation byte
This works if we add a PEP 0263 encoding declaration::
sage: s = "#!/usr/bin/env python\n# -*- coding: latin-1 -*-\n" + s
sage: open(filename, 'w').write(s)
sage: FDS = FileDocTestSource(filename, DocTestDefaults())
sage: L = list(FDS)
sage: FDS.encoding
'latin-1'
"""
with open(self.path) as source:
for lineno, line in enumerate(source):
yield lineno, line
if lineno < 2:
match = pep_0263.search(line)
if match:
self.encoding = match.group(1)
yield lineno, unicode(line, self.encoding)

@lazy_attribute
def printpath(self):
Expand Down
8 changes: 5 additions & 3 deletions src/sage/interfaces/gap.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@
from sage.structure.element import ModuleElement
import re
import os
import io
import pexpect
import time
import platform
Expand Down Expand Up @@ -1329,8 +1330,9 @@ def help(self, s, pager=True):
else:
tmp_to_use = self._local_tmpfile()
self.eval('SetGAPDocTextTheme("none")')
self.eval(r'\$SAGE.tempfile := "%s";'%tmp_to_use)
line = Expect.eval(self, "? %s"%s)
gap_encoding = str(self('GAPInfo.TermEncoding;'))
self.eval(r'\$SAGE.tempfile := "%s";' % tmp_to_use)
line = Expect.eval(self, "? %s" % s)
Expect.eval(self, "? 1")
match = re.search("Page from (\d+)", line)
if match is None:
Expand All @@ -1339,7 +1341,7 @@ def help(self, s, pager=True):
(sline,) = match.groups()
if self.is_remote():
self._get_tmpfile()
F = open(self._local_tmpfile(),"r")
F = io.open(self._local_tmpfile(), "r", encoding=gap_encoding)
help = F.read()
if pager:
from IPython.core.page import page
Expand Down
6 changes: 5 additions & 1 deletion src/sage/interfaces/r.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,11 @@ def library(self, library_name):
...
ImportError: ...
"""
ret = self.eval('require("%s")'%library_name)
ret = self.eval('require("%s")' % library_name)
try:
ret = ret.decode('utf-8')
except UnicodeDecodeError:
ret = ret.decode('latin-1')
# try hard to parse the message string in a locale-independent way
if ' library(' in ret: # locale-independent key-word
raise ImportError("%s"%ret)
Expand Down
3 changes: 1 addition & 2 deletions src/sage/plot/graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,8 +693,7 @@ def axes_labels(self, l=None):
sage: c = circle((0,0), 1)
sage: c.axes_labels(['axe des abscisses', u'axe des ordonnées'])
sage: c._axes_labels
('axe des abscisses', u'axe des ordonn\xc3\xa9es')
('axe des abscisses', u'axe des ordonn\xe9es')
"""
if l is None:
try:
Expand Down
2 changes: 1 addition & 1 deletion src/sage/repl/rich_output/backend_ipython.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def displayhook(self, plain_text, rich_output):
sage: from sage.repl.rich_output import get_display_manager
sage: dm = get_display_manager()
sage: dm.displayhook(Foo())
({u'text/plain': u'Mot\xc3\xb6rhead'}, {})
({u'text/plain': u'Mot\xf6rhead'}, {})
"""
if isinstance(rich_output, OutputPlainText):
return ({u'text/plain': rich_output.text.get_unicode()}, {})
Expand Down
5 changes: 5 additions & 0 deletions src/sage/structure/dynamic_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ class MyPermutation(UniqueRepresentation, PermutationCycleType, PosetElement, Gr
from sage.misc.classcall_metaclass import ClasscallMetaclass
from sage.misc.inherit_comparison import InheritComparisonMetaclass, InheritComparisonClasscallMetaclass


def dynamic_class(name, bases, cls=None, reduction=None, doccls=None,
prepend_cls_bases=True, cache=True):
r"""
Expand Down Expand Up @@ -309,6 +310,10 @@ def dynamic_class(name, bases, cls=None, reduction=None, doccls=None,
"""
bases = tuple(bases)
#assert(len(bases) > 0 )
try:
name = str(name)
except UnicodeEncodeError:
pass
assert(isinstance(name, str))
# assert(cls is None or issubtype(type(cls), type) or type(cls) is classobj)
if cache is True:
Expand Down

0 comments on commit 44fba6b

Please sign in to comment.