From 8f95512878979f4f00f2fa6792715e731495d571 Mon Sep 17 00:00:00 2001 From: Maoz Gelbart <13831112+MaozGelbart@users.noreply.github.com> Date: Tue, 17 Mar 2020 23:40:43 +0200 Subject: [PATCH] python3-only to_utf8 (#1979) Co-authored-by: Michael Waskom (cherry picked from commit 3f7761de1a47eb8b27c2e9c4b2a66979e54856b4) --- seaborn/tests/test_axisgrid.py | 139 --------------------------------- seaborn/tests/test_utils.py | 26 ++++-- seaborn/utils.py | 42 +++------- 3 files changed, 30 insertions(+), 177 deletions(-) diff --git a/seaborn/tests/test_axisgrid.py b/seaborn/tests/test_axisgrid.py index 4f2a62248b..97c3a9307b 100644 --- a/seaborn/tests/test_axisgrid.py +++ b/seaborn/tests/test_axisgrid.py @@ -573,145 +573,6 @@ def test_dropna(self): g = ag.FacetGrid(df, dropna=True, row="hasna") nt.assert_equal(g._not_na.sum(), 50) - def test_unicode_column_label_with_rows(self): - - # use a smaller copy of the default testing data frame: - df = self.df.copy() - df = df[["a", "b", "x"]] - - # rename column 'a' (which will be used for the columns in the grid) - # by using a Unicode string: - unicode_column_label = u"\u01ff\u02ff\u03ff" - df = df.rename(columns={"a": unicode_column_label}) - - # ensure that the data frame columns have the expected names: - nt.assert_equal(list(df.columns), [unicode_column_label, "b", "x"]) - - # plot the grid -- if successful, no UnicodeEncodingError should - # occur: - g = ag.FacetGrid(df, col=unicode_column_label, row="b") - g = g.map(plt.plot, "x") - - def test_unicode_column_label_no_rows(self): - - # use a smaller copy of the default testing data frame: - df = self.df.copy() - df = df[["a", "x"]] - - # rename column 'a' (which will be used for the columns in the grid) - # by using a Unicode string: - unicode_column_label = u"\u01ff\u02ff\u03ff" - df = df.rename(columns={"a": unicode_column_label}) - - # ensure that the data frame columns have the expected names: - nt.assert_equal(list(df.columns), [unicode_column_label, "x"]) - - # plot the grid -- if successful, no UnicodeEncodingError should - # occur: - g = ag.FacetGrid(df, col=unicode_column_label) - g = g.map(plt.plot, "x") - - def test_unicode_row_label_with_columns(self): - - # use a smaller copy of the default testing data frame: - df = self.df.copy() - df = df[["a", "b", "x"]] - - # rename column 'b' (which will be used for the rows in the grid) - # by using a Unicode string: - unicode_row_label = u"\u01ff\u02ff\u03ff" - df = df.rename(columns={"b": unicode_row_label}) - - # ensure that the data frame columns have the expected names: - nt.assert_equal(list(df.columns), ["a", unicode_row_label, "x"]) - - # plot the grid -- if successful, no UnicodeEncodingError should - # occur: - g = ag.FacetGrid(df, col="a", row=unicode_row_label) - g = g.map(plt.plot, "x") - - def test_unicode_row_label_no_columns(self): - - # use a smaller copy of the default testing data frame: - df = self.df.copy() - df = df[["b", "x"]] - - # rename column 'b' (which will be used for the rows in the grid) - # by using a Unicode string: - unicode_row_label = u"\u01ff\u02ff\u03ff" - df = df.rename(columns={"b": unicode_row_label}) - - # ensure that the data frame columns have the expected names: - nt.assert_equal(list(df.columns), [unicode_row_label, "x"]) - - # plot the grid -- if successful, no UnicodeEncodingError should - # occur: - g = ag.FacetGrid(df, row=unicode_row_label) - g = g.map(plt.plot, "x") - - @pytest.mark.skipif(pd.__version__.startswith("0.24"), - reason="known bug in pandas") - def test_unicode_content_with_row_and_column(self): - - df = self.df.copy() - - # replace content of column 'a' (which will form the columns in the - # grid) by Unicode characters: - unicode_column_val = np.repeat((u'\u01ff', u'\u02ff', u'\u03ff'), 20) - df["a"] = unicode_column_val - - # make sure that the replacement worked as expected: - nt.assert_equal( - list(df["a"]), - [u'\u01ff'] * 20 + [u'\u02ff'] * 20 + [u'\u03ff'] * 20) - - # plot the grid -- if successful, no UnicodeEncodingError should - # occur: - g = ag.FacetGrid(df, col="a", row="b") - g = g.map(plt.plot, "x") - - @pytest.mark.skipif(pd.__version__.startswith("0.24"), - reason="known bug in pandas") - def test_unicode_content_no_rows(self): - - df = self.df.copy() - - # replace content of column 'a' (which will form the columns in the - # grid) by Unicode characters: - unicode_column_val = np.repeat((u'\u01ff', u'\u02ff', u'\u03ff'), 20) - df["a"] = unicode_column_val - - # make sure that the replacement worked as expected: - nt.assert_equal( - list(df["a"]), - [u'\u01ff'] * 20 + [u'\u02ff'] * 20 + [u'\u03ff'] * 20) - - # plot the grid -- if successful, no UnicodeEncodingError should - # occur: - g = ag.FacetGrid(df, col="a") - g = g.map(plt.plot, "x") - - @pytest.mark.skipif(pd.__version__.startswith("0.24"), - reason="known bug in pandas") - def test_unicode_content_no_columns(self): - - df = self.df.copy() - - # replace content of column 'a' (which will form the rows in the - # grid) by Unicode characters: - unicode_column_val = np.repeat((u'\u01ff', u'\u02ff', u'\u03ff'), 20) - df["b"] = unicode_column_val - - # make sure that the replacement worked as expected: - nt.assert_equal( - list(df["b"]), - [u'\u01ff'] * 20 + [u'\u02ff'] * 20 + [u'\u03ff'] * 20) - - # plot the grid -- if successful, no UnicodeEncodingError should - # occur: - g = ag.FacetGrid(df, row="b") - g = g.map(plt.plot, "x") - def test_categorical_column_missing_categories(self): df = self.df.copy() diff --git a/seaborn/tests/test_utils.py b/seaborn/tests/test_utils.py index 6ffc1f3663..aa9141b774 100644 --- a/seaborn/tests/test_utils.py +++ b/seaborn/tests/test_utils.py @@ -5,7 +5,8 @@ import pandas as pd import matplotlib as mpl import matplotlib.pyplot as plt -from cycler import cycler +from cycle import cycler + import pytest import nose import nose.tools as nt @@ -120,12 +121,25 @@ def test_iqr(): assert_equal(iqr, 2) -def test_str_to_utf8(): - """Test the to_utf8 function: string to Unicode""" - s = "\u01ff\u02ff" +@pytest.mark.parametrize( + "s,exp", + [ + ("a", "a"), + ("abc", "abc"), + (b"a", "a"), + (b"abc", "abc"), + (bytearray("abc", "utf-8"), "abc"), + (bytearray(), ""), + (1, "1"), + (0, "0"), + ([], str([])), + ], +) +def test_to_utf8(s, exp): + """Test the to_utf8 function: object to string""" u = utils.to_utf8(s) - assert_equal(type(s), type(str())) - assert_equal(type(u), type(u"\u01ff\u02ff")) + assert_equal(type(u), str) + assert_equal(u, exp) class TestSpineUtils(object): diff --git a/seaborn/utils.py b/seaborn/utils.py index 71d67ab715..3b4ce348ca 100644 --- a/seaborn/utils.py +++ b/seaborn/utils.py @@ -594,16 +594,14 @@ def relative_luminance(color): def to_utf8(obj): - """Return a Unicode string representing a Python object. + """Return a string representing a Python object. - Unicode strings (i.e. type ``unicode`` in Python 2.7 and type ``str`` in - Python 3.x) are returned unchanged. + Strings (i.e. type ``str``) are returned unchanged. - Byte strings (i.e. type ``str`` in Python 2.7 and type ``bytes`` in - Python 3.x) are returned as UTF-8-encoded strings. + Byte strings (i.e. type ``bytes``) are returned as UTF-8-decoded strings. For other objects, the method ``__str__()`` is called, and the result is - returned as a UTF-8-encoded string. + returned as a string. Parameters ---------- @@ -612,35 +610,15 @@ def to_utf8(obj): Returns ------- - s : unicode (Python 2.7) / str (Python 3.x) - UTF-8-encoded string representation of ``obj`` + s : str + UTF-8-decoded string representation of ``obj`` """ if isinstance(obj, str): - try: - # If obj is a string, try to return it as a Unicode-encoded - # string: - return obj.decode("utf-8") - except AttributeError: - # Python 3.x strings are already Unicode, and do not have a - # decode() method, so the unchanged string is returned - return obj - + return obj try: - if isinstance(obj, unicode): - # do not attemt a conversion if string is already a Unicode - # string: - return obj - else: - # call __str__() for non-string object, and return the - # result to Unicode: - return obj.__str__().decode("utf-8") - except NameError: - # NameError is raised in Python 3.x as type 'unicode' is not - # defined. - if isinstance(obj, bytes): - return obj.decode("utf-8") - else: - return obj.__str__() + return obj.decode(encoding="utf-8") + except AttributeError: # obj is not bytes-like + return str(obj) def _network(t=None, url='https://google.com'):