Permalink
Browse files

add text.getdefaultencoding() for central default encoding guess

This is a central location for the many places we call sys.stdin.encoding or sys.getdefaultencoding(), which
now adds locale.getpreferredencoding(False) after stdin.encoding,
which should be a better guess when stdin.encoding is None.
  • Loading branch information...
1 parent 5a3b97e commit 6392cebc99715783320c34092ae63357707f4f38 @minrk committed Sep 6, 2011
Showing with 34 additions and 7 deletions.
  1. +4 −3 IPython/config/loader.py
  2. +2 −1 IPython/utils/_process_win32.py
  3. +2 −1 IPython/utils/jsonutil.py
  4. +24 −0 IPython/utils/text.py
  5. +2 −2 IPython/zmq/iostream.py
@@ -24,7 +24,7 @@
from IPython.external import argparse
from IPython.utils.path import filefind, get_ipython_dir
-from IPython.utils import py3compat, warn
+from IPython.utils import py3compat, text, warn
#-----------------------------------------------------------------------------
# Exceptions
@@ -425,7 +425,7 @@ def _decode_argv(self, argv, enc=None):
"""decode argv if bytes, using stin.encoding, falling back on default enc"""
uargv = []
if enc is None:
- enc = sys.stdin.encoding or sys.getdefaultencoding()
+ enc = text.getdefaultencoding()
for arg in argv:
if not isinstance(arg, unicode):
# only decode if not already decoded
@@ -586,7 +586,8 @@ def _add_arguments(self, aliases=None, flags=None):
def _parse_args(self, args):
"""self.parser->self.parsed_data"""
# decode sys.argv to support unicode command-line options
- uargs = [py3compat.cast_unicode(a) for a in args]
+ enc = text.getdefaultencoding()
+ uargs = [py3compat.cast_unicode(a, enc) for a in args]
self.parsed_data, self.extra_args = self.parser.parse_known_args(uargs)
def _convert_to_config(self):
@@ -23,6 +23,7 @@
# our own imports
from ._process_common import read_no_interrupt, process_handler
+from . import text
#-----------------------------------------------------------------------------
# Function definitions
@@ -88,7 +89,7 @@ def _find_cmd(cmd):
def _system_body(p):
"""Callback for _system."""
- enc = sys.stdin.encoding or sys.getdefaultencoding()
+ enc = text.getdefaultencoding()
for line in read_no_interrupt(p.stdout).splitlines():
line = line.decode(enc, 'replace')
print(line, file=sys.stdout)
@@ -17,6 +17,7 @@
from datetime import datetime
from IPython.utils import py3compat
+from IPython.utils import text
next_attr_name = '__next__' if py3compat.PY3 else 'next'
#-----------------------------------------------------------------------------
@@ -134,7 +135,7 @@ def json_clean(obj):
return obj
if isinstance(obj, bytes):
- return obj.decode(sys.getdefaultencoding(), 'replace')
+ return obj.decode(text.getdefaultencoding(), 'replace')
if isinstance(obj, container_to_list) or (
hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
View
@@ -16,9 +16,11 @@
import __main__
+import locale
import os
import re
import shutil
+import sys
import textwrap
from string import Formatter
@@ -31,6 +33,28 @@
# Code
#-----------------------------------------------------------------------------
+# Less conservative replacement for sys.getdefaultencoding, that will try
+# to match the environment.
+# Defined here as central function, so if we find better choices, we
+# won't need to make changes all over IPython.
+def getdefaultencoding():
+ """Return IPython's guess for the default encoding for bytes as text.
+
+ Asks for stdin.encoding first, to match the calling Terminal, but that
+ is often None for subprocesses. Fall back on locale.getpreferredencoding()
+ which should be a sensible platform default (that respects LANG environment),
+ and finally to sys.getdefaultencoding() which is the most conservative option,
+ and usually ASCII.
+ """
+ enc = sys.stdin.encoding
+ if not enc:
+ try:
+ # There are reports of getpreferredencoding raising errors
+ # in some cases, which may well be fixed, but let's be conservative here.
+ enc = locale.getpreferredencoding(False)
+ except Exception:
+ pass
+ return enc or sys.getdefaultencoding()
def unquote_ends(istr):
"""Remove a single pair of quotes from the endpoints of a string."""
@@ -4,7 +4,7 @@
from session import extract_header, Message
-from IPython.utils import io
+from IPython.utils import io, text
#-----------------------------------------------------------------------------
# Globals
@@ -69,7 +69,7 @@ def write(self, string):
else:
# Make sure that we're handling unicode
if not isinstance(string, unicode):
- enc = sys.stdin.encoding or sys.getdefaultencoding()
+ enc = text.getdefaultencoding()
string = string.decode(enc, 'replace')
self._buffer.write(string)

0 comments on commit 6392ceb

Please sign in to comment.