python3-only to_utf8 (#1979)

Co-authored-by: Michael Waskom <mwaskom@users.noreply.github.com> (cherry picked from commit 3f7761d)
mwaskom · Mar 17, 2020 · 8f95512 · 8f95512
1 parent ac401d4
commit 8f95512
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 177 deletions.
diff --git a/seaborn/tests/test_axisgrid.py b/seaborn/tests/test_axisgrid.py
@@ -573,145 +573,6 @@ def test_dropna(self):
         g = ag.FacetGrid(df, dropna=True, row="hasna")
         nt.assert_equal(g._not_na.sum(), 50)
 
-    def test_unicode_column_label_with_rows(self):
-
-        # use a smaller copy of the default testing data frame:
-        df = self.df.copy()
-        df = df[["a", "b", "x"]]
-
-        # rename column 'a' (which will be used for the columns in the grid)
-        # by using a Unicode string:
-        unicode_column_label = u"\u01ff\u02ff\u03ff"
-        df = df.rename(columns={"a": unicode_column_label})
-
-        # ensure that the data frame columns have the expected names:
-        nt.assert_equal(list(df.columns), [unicode_column_label, "b", "x"])
-
-        # plot the grid -- if successful, no UnicodeEncodingError should
-        # occur:
-        g = ag.FacetGrid(df, col=unicode_column_label, row="b")
-        g = g.map(plt.plot, "x")
-
-    def test_unicode_column_label_no_rows(self):
-
-        # use a smaller copy of the default testing data frame:
-        df = self.df.copy()
-        df = df[["a", "x"]]
-
-        # rename column 'a' (which will be used for the columns in the grid)
-        # by using a Unicode string:
-        unicode_column_label = u"\u01ff\u02ff\u03ff"
-        df = df.rename(columns={"a": unicode_column_label})
-
-        # ensure that the data frame columns have the expected names:
-        nt.assert_equal(list(df.columns), [unicode_column_label, "x"])
-
-        # plot the grid -- if successful, no UnicodeEncodingError should
-        # occur:
-        g = ag.FacetGrid(df, col=unicode_column_label)
-        g = g.map(plt.plot, "x")
-
-    def test_unicode_row_label_with_columns(self):
-
-        # use a smaller copy of the default testing data frame:
-        df = self.df.copy()
-        df = df[["a", "b", "x"]]
-
-        # rename column 'b' (which will be used for the rows in the grid)
-        # by using a Unicode string:
-        unicode_row_label = u"\u01ff\u02ff\u03ff"
-        df = df.rename(columns={"b": unicode_row_label})
-
-        # ensure that the data frame columns have the expected names:
-        nt.assert_equal(list(df.columns), ["a", unicode_row_label, "x"])
-
-        # plot the grid -- if successful, no UnicodeEncodingError should
-        # occur:
-        g = ag.FacetGrid(df, col="a", row=unicode_row_label)
-        g = g.map(plt.plot, "x")
-
-    def test_unicode_row_label_no_columns(self):
-
-        # use a smaller copy of the default testing data frame:
-        df = self.df.copy()
-        df = df[["b", "x"]]
-
-        # rename column 'b' (which will be used for the rows in the grid)
-        # by using a Unicode string:
-        unicode_row_label = u"\u01ff\u02ff\u03ff"
-        df = df.rename(columns={"b": unicode_row_label})
-
-        # ensure that the data frame columns have the expected names:
-        nt.assert_equal(list(df.columns), [unicode_row_label, "x"])
-
-        # plot the grid -- if successful, no UnicodeEncodingError should
-        # occur:
-        g = ag.FacetGrid(df, row=unicode_row_label)
-        g = g.map(plt.plot, "x")
-
-    @pytest.mark.skipif(pd.__version__.startswith("0.24"),
-                        reason="known bug in pandas")
-    def test_unicode_content_with_row_and_column(self):
-
-        df = self.df.copy()
-
-        # replace content of column 'a' (which will form the columns in the
-        # grid) by Unicode characters:
-        unicode_column_val = np.repeat((u'\u01ff', u'\u02ff', u'\u03ff'), 20)
-        df["a"] = unicode_column_val
-
-        # make sure that the replacement worked as expected:
-        nt.assert_equal(
-            list(df["a"]),
-            [u'\u01ff'] * 20 + [u'\u02ff'] * 20 + [u'\u03ff'] * 20)
-
-        # plot the grid -- if successful, no UnicodeEncodingError should
-        # occur:
-        g = ag.FacetGrid(df, col="a", row="b")
-        g = g.map(plt.plot, "x")
-
-    @pytest.mark.skipif(pd.__version__.startswith("0.24"),
-                        reason="known bug in pandas")
-    def test_unicode_content_no_rows(self):
-
-        df = self.df.copy()
-
-        # replace content of column 'a' (which will form the columns in the
-        # grid) by Unicode characters:
-        unicode_column_val = np.repeat((u'\u01ff', u'\u02ff', u'\u03ff'), 20)
-        df["a"] = unicode_column_val
-
-        # make sure that the replacement worked as expected:
-        nt.assert_equal(
-            list(df["a"]),
-            [u'\u01ff'] * 20 + [u'\u02ff'] * 20 + [u'\u03ff'] * 20)
-
-        # plot the grid -- if successful, no UnicodeEncodingError should
-        # occur:
-        g = ag.FacetGrid(df, col="a")
-        g = g.map(plt.plot, "x")
-
-    @pytest.mark.skipif(pd.__version__.startswith("0.24"),
-                        reason="known bug in pandas")
-    def test_unicode_content_no_columns(self):
-
-        df = self.df.copy()
-
-        # replace content of column 'a' (which will form the rows in the
-        # grid) by Unicode characters:
-        unicode_column_val = np.repeat((u'\u01ff', u'\u02ff', u'\u03ff'), 20)
-        df["b"] = unicode_column_val
-
-        # make sure that the replacement worked as expected:
-        nt.assert_equal(
-            list(df["b"]),
-            [u'\u01ff'] * 20 + [u'\u02ff'] * 20 + [u'\u03ff'] * 20)
-
-        # plot the grid -- if successful, no UnicodeEncodingError should
-        # occur:
-        g = ag.FacetGrid(df, row="b")
-        g = g.map(plt.plot, "x")
-
     def test_categorical_column_missing_categories(self):
 
         df = self.df.copy()

diff --git a/seaborn/tests/test_utils.py b/seaborn/tests/test_utils.py
@@ -5,7 +5,8 @@
 import pandas as pd
 import matplotlib as mpl
 import matplotlib.pyplot as plt
-from cycler import cycler
+from cycle import cycler
+
 import pytest
 import nose
 import nose.tools as nt
@@ -120,12 +121,25 @@ def test_iqr():
     assert_equal(iqr, 2)
 
 
-def test_str_to_utf8():
-    """Test the to_utf8 function: string to Unicode"""
-    s = "\u01ff\u02ff"
+@pytest.mark.parametrize(
+    "s,exp",
+    [
+        ("a", "a"),
+        ("abc", "abc"),
+        (b"a", "a"),
+        (b"abc", "abc"),
+        (bytearray("abc", "utf-8"), "abc"),
+        (bytearray(), ""),
+        (1, "1"),
+        (0, "0"),
+        ([], str([])),
+    ],
+)
+def test_to_utf8(s, exp):
+    """Test the to_utf8 function: object to string"""
     u = utils.to_utf8(s)
-    assert_equal(type(s), type(str()))
-    assert_equal(type(u), type(u"\u01ff\u02ff"))
+    assert_equal(type(u), str)
+    assert_equal(u, exp)
 
 
 class TestSpineUtils(object):

diff --git a/seaborn/utils.py b/seaborn/utils.py
@@ -594,16 +594,14 @@ def relative_luminance(color):
 
 
 def to_utf8(obj):
-    """Return a Unicode string representing a Python object.
+    """Return a string representing a Python object.
 
-    Unicode strings (i.e. type ``unicode`` in Python 2.7 and type ``str`` in
-    Python 3.x) are returned unchanged.
+    Strings (i.e. type ``str``) are returned unchanged.
 
-    Byte strings (i.e. type ``str`` in Python 2.7 and type ``bytes`` in
-    Python 3.x) are returned as UTF-8-encoded strings.
+    Byte strings (i.e. type ``bytes``) are returned as UTF-8-decoded strings.
 
     For other objects, the method ``__str__()`` is called, and the result is
-    returned as a UTF-8-encoded string.
+    returned as a string.
 
     Parameters
     ----------
@@ -612,35 +610,15 @@ def to_utf8(obj):
 
     Returns
     -------
-    s : unicode (Python 2.7) / str (Python 3.x)
-        UTF-8-encoded string representation of ``obj``
+    s : str
+        UTF-8-decoded string representation of ``obj``
     """
     if isinstance(obj, str):
-        try:
-            # If obj is a string, try to return it as a Unicode-encoded
-            # string:
-            return obj.decode("utf-8")
-        except AttributeError:
-            # Python 3.x strings are already Unicode, and do not have a
-            # decode() method, so the unchanged string is returned
-            return obj
-
+        return obj
     try:
-        if isinstance(obj, unicode):
-            # do not attemt a conversion if string is already a Unicode
-            # string:
-            return obj
-        else:
-            # call __str__() for non-string object, and return the
-            # result to Unicode:
-            return obj.__str__().decode("utf-8")
-    except NameError:
-        # NameError is raised in Python 3.x as type 'unicode' is not
-        # defined.
-        if isinstance(obj, bytes):
-            return obj.decode("utf-8")
-        else:
-            return obj.__str__()
+        return obj.decode(encoding="utf-8")
+    except AttributeError:  # obj is not bytes-like
+        return str(obj)
 
 
 def _network(t=None, url='https://google.com'):