Skip to content

Commit

Permalink
i18n: unicode encoding does not throw error (RhBug:1155877)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Silhan authored and radekholy24 committed Feb 2, 2015
1 parent 17801bb commit 543922c
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
7 changes: 4 additions & 3 deletions dnf/i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def _full_ucd_support(encoding):

def _guess_encoding():
""" Take the best shot at the current system's string encoding. """
return locale.getpreferredencoding()
encoding = locale.getpreferredencoding()
return 'utf-8' if encoding.startswith("ANSI") else encoding

def setup_locale():
try:
Expand Down Expand Up @@ -112,7 +113,7 @@ def ucd(obj):
""" Like the builtin unicode() but tries to use a reasonable encoding. """
if PY3:
if is_py3bytes(obj):
return str(obj, _guess_encoding())
return str(obj, _guess_encoding(), errors='ignore')
elif isinstance(obj, str):
return obj
return str(obj)
Expand All @@ -127,7 +128,7 @@ def ucd(obj):
return unicode(obj)
except UnicodeError:
pass
return unicode(str(obj), _guess_encoding())
return unicode(str(obj), _guess_encoding(), errors='ignore')


# functions for formating output according to terminal width,
Expand Down
18 changes: 16 additions & 2 deletions tests/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
import dnf.i18n
import sys

UC_TEXT = u'Šířka' # means 'Width' in Czech
UC_TEXT_OSERROR = u'Soubor již existuje' # 'File already exists'
UC_TEXT = 'Šířka' # means 'Width' in Czech
UC_TEXT_OSERROR = 'Soubor již existuje' # 'File already exists'
STR_TEXT_OSERROR = 'Soubor již existuje'

@mock.patch('locale.setlocale')
Expand Down Expand Up @@ -114,6 +114,20 @@ def test_download_error_unicode(self):
self.assertEqual("e1: x\ne1: y", str(err))
self.assertEqual("e1: x\ne1: y", dnf.i18n.ucd(err))

@mock.patch('locale.getpreferredencoding', return_value='ANSI_X3.4-1968')
def test_ucd_acii(self, _unused):
s = UC_TEXT.encode('utf8')
# ascii coding overridden by utf8
u = dnf.i18n.ucd(s)
self.assertEqual(u, UC_TEXT)

@mock.patch('dnf.i18n._guess_encoding', return_value='utf-8')
def test_ucd_skip(self, _unused):
s = UC_TEXT.encode('iso-8859-2')
# not decoded chars are skipped
u = dnf.i18n.ucd(s)
self.assertEqual(u, "ka")


class TestFormatedOutput(TestCase):
def test_fill_exact_width(self):
Expand Down

0 comments on commit 543922c

Please sign in to comment.