Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/mog_commons/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.3'
__version__ = '0.1.4'
9 changes: 9 additions & 0 deletions src/mog_commons/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import six

__all__ = ['get_single_item', 'get_single_key', 'get_single_value', 'distinct']


def get_single_item(d):
"""Get an item from a dict which contains just one item."""
Expand All @@ -19,3 +21,10 @@ def get_single_value(d):
"""Get a value from a dict which contains just one item."""
assert len(d) == 1, 'Single-item dict must have just one item, not %d.' % len(d)
return next(six.itervalues(d))


def distinct(xs):
"""Get the list of distinct values with preserving order."""
# don't use collections.OrderedDict because we do support Python 2.6
seen = set()
return [x for x in xs if x not in seen and not seen.add(x)]
34 changes: 34 additions & 0 deletions src/mog_commons/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,21 @@
from unicodedata import east_asian_width
import six

from mog_commons.collection import distinct

__all__ = [
'is_unicode',
'is_strlike',
'unicode_width',
'to_unicode',
'to_str',
'to_bytes',
'edge_just',
'unicode_right',
'unicode_left',
'unicode_decode',
]

__unicode_width_mapping = {'F': 2, 'H': 1, 'W': 2, 'Na': 1, 'A': 2, 'N': 1}


Expand Down Expand Up @@ -104,3 +119,22 @@ def unicode_right(s, width):
break
i -= 1
return s[i:]


def unicode_decode(data, encoding_list):
"""
Decode string data with one or more encodings, trying sequentially
:param data: bytes: encoded string data
:param encoding_list: list[string] or string: encoding names
:return: string: decoded string
"""
assert encoding_list, 'encodings must not be empty.'

xs = distinct(encoding_list if isinstance(encoding_list, list) else [encoding_list])
init, last = xs[:-1], xs[-1]
for encoding in init:
try:
return data.decode(encoding)
except UnicodeDecodeError:
pass
return data.decode(last)
15 changes: 12 additions & 3 deletions src/mog_commons/unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class StringBuffer(object):

We don't use StringIO because there are many differences between PY2 and PY3.
"""

def __init__(self, init_buffer=None):
self._buffer = init_buffer or b''

Expand All @@ -37,9 +38,17 @@ def getvalue(self, encoding='utf-8', errors='strict'):

class TestCase(base_unittest.TestCase):
def assertRaisesRegexp(self, expected_exception, expected_regexp, callable_obj=None, *args, **kwargs):
"""Accept difference of the function name between PY2 and PY3."""
f = base_unittest.TestCase.assertRaisesRegex if six.PY3 else base_unittest.TestCase.assertRaisesRegexp
f(self, expected_exception, expected_regexp, callable_obj, *args, **kwargs)
"""
Accept difference of the function name between PY2 and PY3.

We don't use built-in assertRaisesRegexp because it is unicode-unsafe.
"""
with self.assertRaises(expected_exception) as cm:
callable_obj(*args, **kwargs)
if six.PY2:
self.assertRegexpMatches(str(cm.exception), expected_regexp)
else:
self.assertRegex(str(cm.exception), expected_regexp)

def assertOutput(self, expected_stdout, expected_stderr, function, encoding='utf-8'):
with self.withOutput() as (out, err):
Expand Down
11 changes: 10 additions & 1 deletion tests/mog_commons/test_collection.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import division, print_function, absolute_import, unicode_literals

from mog_commons.collection import get_single_item, get_single_key, get_single_value
from mog_commons.collection import *
from mog_commons import unittest


Expand Down Expand Up @@ -31,3 +31,12 @@ def test_get_single_value_error(self):
{})
self.assertRaisesRegexp(AssertionError, 'Single-item dict must have just one item, not 2.', get_single_value,
{'x': 123, 'y': 45})

def test_distinct(self):
self.assertEqual(distinct([]), [])
self.assertEqual(distinct([1]), [1])
self.assertEqual(distinct([1] * 100), [1])
self.assertEqual(distinct([1, 2, 3, 4, 5]), [1, 2, 3, 4, 5])
self.assertEqual(distinct([1, 2, 1, 2, 1]), [1, 2])
self.assertEqual(distinct([2, 1, 2, 1, 1]), [2, 1])
self.assertEqual(distinct('mog-commons-python'), ['m', 'o', 'g', '-', 'c', 'n', 's', 'p', 'y', 't', 'h'])
16 changes: 16 additions & 0 deletions tests/mog_commons/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,19 @@ def test_unicode_right(self):
self.assertEqual(string.unicode_right('あいうえお', 11), 'あいうえお')
self.assertEqual(string.unicode_right('あxいxうxえxお', 4), 'xお')
self.assertEqual(string.unicode_right('あxいxうxえxお', 5), 'えxお')

def test_unicode_decode(self):
self.assertRaisesRegexp(AssertionError, 'encodings must not be empty.', string.unicode_decode, 'abc', [])
self.assertEqual(string.unicode_decode(b'abc', 'ascii'), 'abc')
self.assertEqual(string.unicode_decode(b'abc', ['ascii']), 'abc')
self.assertRaisesRegexp(
UnicodeDecodeError, "'ascii' codec can't decode",
string.unicode_decode, 'あいうえお'.encode('utf-8'), 'ascii')
self.assertEqual(string.unicode_decode('あいうえお'.encode('utf-8'), ['ascii', 'sjis', 'utf-8']), 'あいうえお')
self.assertEqual(string.unicode_decode('あいうえお'.encode('utf-8'), ['ascii', 'utf-8', 'sjis']), 'あいうえお')
self.assertEqual(string.unicode_decode('あいうえお'.encode('utf-8'), ['utf-8', 'ascii', 'sjis']), 'あいうえお')
self.assertEqual(string.unicode_decode('あいうえお'.encode('utf-8'), ['utf-8', 'utf-8', 'utf-8']), 'あいうえお')
self.assertEqual(string.unicode_decode('あいうえお'.encode('sjis'), ['ascii', 'utf-8', 'sjis']), 'あいうえお')
self.assertRaisesRegexp(
UnicodeDecodeError, "'shift_jis' codec can't decode",
string.unicode_decode, 'あいうえお'.encode('utf-8'), ['ascii', 'sjis'])