Skip to content

Commit

Permalink
bpo-13153: Use OS native encoding for converting between Python and Tcl.
Browse files Browse the repository at this point in the history
On Windows use UTF-16 (or UTF-32 for 32-bit Tcl_UniChar) with the
"surrogatepass" error handler for converting to/from Tcl Unicode objects.

On Linux use UTF-8 with the "surrogateescape" error handler for converting
to/from Tcl String objects.

Converting strings from Tcl to Python and back now never fails
(except MemoryError).
  • Loading branch information
serhiy-storchaka committed Oct 2, 2019
1 parent b3e7045 commit f4db0e7
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 182 deletions.
10 changes: 0 additions & 10 deletions Lib/idlelib/pyshell.py
Original file line number Diff line number Diff line change
Expand Up @@ -1298,16 +1298,6 @@ def resetoutput(self):
self.set_line_and_column()

def write(self, s, tags=()):
if isinstance(s, str) and len(s) and max(s) > '\uffff':
# Tk doesn't support outputting non-BMP characters
# Let's assume what printed string is not very long,
# find first non-BMP character and construct informative
# UnicodeEncodeError exception.
for start, char in enumerate(s):
if char > '\uffff':
break
raise UnicodeEncodeError("UCS-2", char, start, start+1,
'Non-BMP character not supported in Tk')
try:
self.text.mark_gravity("iomark", "right")
count = OutputWindow.write(self, s, tags, "iomark")
Expand Down
5 changes: 5 additions & 0 deletions Lib/test/test_tcl.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,9 +429,12 @@ def passValue(value):
self.assertEqual(passValue(False), False if self.wantobjects else '0')
self.assertEqual(passValue('string'), 'string')
self.assertEqual(passValue('string\u20ac'), 'string\u20ac')
self.assertEqual(passValue('string\U0001f4bb'), 'string\U0001f4bb')
self.assertEqual(passValue('str\x00ing'), 'str\x00ing')
self.assertEqual(passValue('str\x00ing\xbd'), 'str\x00ing\xbd')
self.assertEqual(passValue('str\x00ing\u20ac'), 'str\x00ing\u20ac')
self.assertEqual(passValue('str\x00ing\U0001f4bb'),
'str\x00ing\U0001f4bb')
self.assertEqual(passValue(b'str\x00ing'),
b'str\x00ing' if self.wantobjects else 'str\x00ing')
self.assertEqual(passValue(b'str\xc0\x80ing'),
Expand Down Expand Up @@ -490,6 +493,7 @@ def float_eq(actual, expected):
check('string')
check('string\xbd')
check('string\u20ac')
check('string\U0001f4bb')
check('')
check(b'string', 'string')
check(b'string\xe2\x82\xac', 'string\xe2\x82\xac')
Expand Down Expand Up @@ -531,6 +535,7 @@ def test_splitlist(self):
('a\n b\t\r c\n ', ('a', 'b', 'c')),
(b'a\n b\t\r c\n ', ('a', 'b', 'c')),
('a \u20ac', ('a', '\u20ac')),
('a \U0001f4bb', ('a', '\U0001f4bb')),
(b'a \xe2\x82\xac', ('a', '\u20ac')),
(b'a\xc0\x80b c\xc0\x80d', ('a\x00b', 'c\x00d')),
('a {b c}', ('a', 'b c')),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
OS native encoding is now used for converting between Python strings and
Tcl objects. This allows to display, copy and paste to clipboard emoji and
other non-BMP characters. Converting strings from Tcl to Python and back
now never fails (except MemoryError).
Loading

0 comments on commit f4db0e7

Please sign in to comment.