From 9d48d3560924df8ddde49844d763fc03ba6add45 Mon Sep 17 00:00:00 2001 From: cclauss Date: Wed, 23 Aug 2017 03:54:42 +0200 Subject: [PATCH 1/2] Simplify native_to_unicode() & unicode_to_native() The first [uses feature detection, instead of version detection](https://docs.python.org/3/howto/pyporting.html#use-feature-detection-instead-of-version-detection) and the second [avoids assigning a lambda expression to a variable](https://docs.quantifiedcode.com/python-anti-patterns/correctness/assigning_a_lambda_to_a_variable.html). --- tensor2tensor/data_generators/text_encoder.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index b628a538f..5a3220876 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -56,19 +56,18 @@ _ESCAPE_CHARS = set(u"\\_u;0123456789") -def native_to_unicode_py2(s): - """Python 2: transform native string to Unicode.""" - return s if isinstance(s, unicode) else s.decode("utf8") +def native_to_unicode(s): + """Transform native string to Unicode.""" + try: # Python 2 + return s if isinstance(s, unicode) else s.decode("utf8") + except NameError: # Python 3: unicode() was dropped + return s # Conversion between Unicode and UTF-8, if required (on Python2) -if six.PY2: - native_to_unicode = native_to_unicode_py2 - unicode_to_native = lambda s: s.encode("utf-8") -else: - # No conversion required on Python3 - native_to_unicode = lambda s: s - unicode_to_native = lambda s: s +def unicode_to_native(s): + """Transform Unicode to native string.""" + return s.encode("utf-8") if six.PY2 else s # No conversion required on Python3 class TextEncoder(object): From 33e798a5fe004bfda0150d929c31728f1a181c45 Mon Sep 17 00:00:00 2001 From: cclauss Date: Wed, 23 Aug 2017 13:31:31 +0200 Subject: [PATCH 2/2] A much cleaner approach --- tensor2tensor/data_generators/text_encoder.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/tensor2tensor/data_generators/text_encoder.py b/tensor2tensor/data_generators/text_encoder.py index 5a3220876..f6897d04d 100644 --- a/tensor2tensor/data_generators/text_encoder.py +++ b/tensor2tensor/data_generators/text_encoder.py @@ -56,18 +56,13 @@ _ESCAPE_CHARS = set(u"\\_u;0123456789") -def native_to_unicode(s): - """Transform native string to Unicode.""" - try: # Python 2 - return s if isinstance(s, unicode) else s.decode("utf8") - except NameError: # Python 3: unicode() was dropped - return s - - -# Conversion between Unicode and UTF-8, if required (on Python2) -def unicode_to_native(s): - """Transform Unicode to native string.""" - return s.encode("utf-8") if six.PY2 else s # No conversion required on Python3 +if six.PY2: + def native_to_unicode(s): return s if isinstance(s, unicode) else s.decode("utf8") # noqa: F821 + def unicode_to_native(s): return s.encode("utf-8") +else: + # No conversion required on Python >= 3 + def native_to_unicode(s): return s + def unicode_to_native(s): return s class TextEncoder(object):