Permalink
Browse files

Py3: string encoding issues in doctest driver

  • Loading branch information...
1 parent 3ddb952 commit a849fc9c97649bfd960f78a07878034f21b1c9ac @larsmans larsmans committed May 19, 2012
Showing with 9 additions and 4 deletions.
  1. +4 −0 nltk/compat.py
  2. +5 −4 nltk/test/doctest_driver.py
View
@@ -8,6 +8,8 @@
PY3 = sys.version_info[0] == 3
if PY3:
+ def b(s):
+ return s.encode("latin-1")
def u(s):
return s
@@ -40,6 +42,8 @@ def u(s):
from urllib.parse import quote_plus, unquote_plus, urlencode
else:
+ def b(s):
+ return s
def u(s):
return unicode(s, "unicode_escape")
@@ -24,6 +24,7 @@
"""
from __future__ import print_function
+import codecs
import os, os.path, sys, unittest, pdb, bdb, re, tempfile, traceback
import textwrap
from doctest import *
@@ -35,7 +36,7 @@
root_dir = os.path.abspath(os.path.join(sys.path[0], '..', '..'))
sys.path.insert(0, root_dir)
-from nltk.compat import StringIO
+from nltk.compat import StringIO, b
__version__ = '0.1'
@@ -561,7 +562,7 @@ def find(name):
if testname is not None:
raise ValueError("test names can't be specified "
"for text files")
- s = open(filename).read().decode('utf8')
+ s = codecs.open(filename, encoding="utf-8").read()
test = MyDocTestParser().get_doctest(s, {}, name, filename, 0)
return [test]
else:
@@ -950,11 +951,11 @@ def __init__(self, term_stream=sys.stdout):
set_fg = self._tigetstr('setf')
if set_fg:
for i,color in enumerate(self._COLORS):
- setattr(self, color, curses.tparm(set_fg, i) or '')
+ setattr(self, color, curses.tparm(b(set_fg), i) or '')
set_fg_ansi = self._tigetstr('setaf')
if set_fg_ansi:
for i,color in enumerate(self._ANSICOLORS):
- setattr(self, color, curses.tparm(set_fg_ansi, i) or '')
+ setattr(self, color, curses.tparm(b(set_fg_ansi), i) or '')
def _tigetstr(self, cap_name):
# String capabilities can include "delays" of the form "$<2>".

0 comments on commit a849fc9

Please sign in to comment.