tensorflow · rmlarsen · Feb 8, 2018 · Jan 23, 2018 · Jan 23, 2018 · Jan 25, 2018
diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -212,6 +213,16 @@ def testAlias(self):
       value.op.run()
       self.assertAllEqual(np_array, [1.0, 2.0])
 
+  def testReturnUnicodeString(self):
+    with self.test_session():
+      correct = u"你好 世界"
+
+      def unicode_string():
+        return correct
+
+      z, = script_ops.py_func(unicode_string, [], [dtypes.string])
+      self.assertEqual(z.eval(), correct.encode("utf8"))
+
   def testBadNumpyReturnType(self):
     with self.test_session():
 

diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py
@@ -95,7 +95,7 @@ def _convert(value, dtype=None):
     components of a tensor have different lengths.  This is bad: ignoring the
     padding is wrong for text data, and removing the padding is wrong for binary
     data.  To avoid this bug, we redo the conversion using an object dtype.
-    Additionally, we convert unicode strings to (byte-)strings for Python3
+    Additionally, we convert unicode strings to (byte-)strings for
     compatibility.
 
     Args:
@@ -109,7 +109,7 @@ def _convert(value, dtype=None):
     if result.dtype.char == "S" and result is not value:
       return np.asarray(value, order="C", dtype=object)
     elif result.dtype.char == "U" and result is not value:
-      value = np.vectorize(lambda x: x.encode())(value)
+      value = np.vectorize(lambda x: x.encode("utf8"))(value)
       return np.asarray(value, order="C", dtype=object)
     elif result.dtype.char == "U":
       return result.astype(np.bytes_)