Skip to content

Commit

Permalink
python3-only to_utf8 (#1979)
Browse files Browse the repository at this point in the history
Co-authored-by: Michael Waskom <mwaskom@users.noreply.github.com>
(cherry picked from commit 3f7761d)
  • Loading branch information
MaozGelbart authored and mwaskom committed Mar 17, 2020
1 parent ac401d4 commit 8f95512
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 177 deletions.
139 changes: 0 additions & 139 deletions seaborn/tests/test_axisgrid.py
Expand Up @@ -573,145 +573,6 @@ def test_dropna(self):
g = ag.FacetGrid(df, dropna=True, row="hasna")
nt.assert_equal(g._not_na.sum(), 50)

def test_unicode_column_label_with_rows(self):

# use a smaller copy of the default testing data frame:
df = self.df.copy()
df = df[["a", "b", "x"]]

# rename column 'a' (which will be used for the columns in the grid)
# by using a Unicode string:
unicode_column_label = u"\u01ff\u02ff\u03ff"
df = df.rename(columns={"a": unicode_column_label})

# ensure that the data frame columns have the expected names:
nt.assert_equal(list(df.columns), [unicode_column_label, "b", "x"])

# plot the grid -- if successful, no UnicodeEncodingError should
# occur:
g = ag.FacetGrid(df, col=unicode_column_label, row="b")
g = g.map(plt.plot, "x")

def test_unicode_column_label_no_rows(self):

# use a smaller copy of the default testing data frame:
df = self.df.copy()
df = df[["a", "x"]]

# rename column 'a' (which will be used for the columns in the grid)
# by using a Unicode string:
unicode_column_label = u"\u01ff\u02ff\u03ff"
df = df.rename(columns={"a": unicode_column_label})

# ensure that the data frame columns have the expected names:
nt.assert_equal(list(df.columns), [unicode_column_label, "x"])

# plot the grid -- if successful, no UnicodeEncodingError should
# occur:
g = ag.FacetGrid(df, col=unicode_column_label)
g = g.map(plt.plot, "x")

def test_unicode_row_label_with_columns(self):

# use a smaller copy of the default testing data frame:
df = self.df.copy()
df = df[["a", "b", "x"]]

# rename column 'b' (which will be used for the rows in the grid)
# by using a Unicode string:
unicode_row_label = u"\u01ff\u02ff\u03ff"
df = df.rename(columns={"b": unicode_row_label})

# ensure that the data frame columns have the expected names:
nt.assert_equal(list(df.columns), ["a", unicode_row_label, "x"])

# plot the grid -- if successful, no UnicodeEncodingError should
# occur:
g = ag.FacetGrid(df, col="a", row=unicode_row_label)
g = g.map(plt.plot, "x")

def test_unicode_row_label_no_columns(self):

# use a smaller copy of the default testing data frame:
df = self.df.copy()
df = df[["b", "x"]]

# rename column 'b' (which will be used for the rows in the grid)
# by using a Unicode string:
unicode_row_label = u"\u01ff\u02ff\u03ff"
df = df.rename(columns={"b": unicode_row_label})

# ensure that the data frame columns have the expected names:
nt.assert_equal(list(df.columns), [unicode_row_label, "x"])

# plot the grid -- if successful, no UnicodeEncodingError should
# occur:
g = ag.FacetGrid(df, row=unicode_row_label)
g = g.map(plt.plot, "x")

@pytest.mark.skipif(pd.__version__.startswith("0.24"),
reason="known bug in pandas")
def test_unicode_content_with_row_and_column(self):

df = self.df.copy()

# replace content of column 'a' (which will form the columns in the
# grid) by Unicode characters:
unicode_column_val = np.repeat((u'\u01ff', u'\u02ff', u'\u03ff'), 20)
df["a"] = unicode_column_val

# make sure that the replacement worked as expected:
nt.assert_equal(
list(df["a"]),
[u'\u01ff'] * 20 + [u'\u02ff'] * 20 + [u'\u03ff'] * 20)

# plot the grid -- if successful, no UnicodeEncodingError should
# occur:
g = ag.FacetGrid(df, col="a", row="b")
g = g.map(plt.plot, "x")

@pytest.mark.skipif(pd.__version__.startswith("0.24"),
reason="known bug in pandas")
def test_unicode_content_no_rows(self):

df = self.df.copy()

# replace content of column 'a' (which will form the columns in the
# grid) by Unicode characters:
unicode_column_val = np.repeat((u'\u01ff', u'\u02ff', u'\u03ff'), 20)
df["a"] = unicode_column_val

# make sure that the replacement worked as expected:
nt.assert_equal(
list(df["a"]),
[u'\u01ff'] * 20 + [u'\u02ff'] * 20 + [u'\u03ff'] * 20)

# plot the grid -- if successful, no UnicodeEncodingError should
# occur:
g = ag.FacetGrid(df, col="a")
g = g.map(plt.plot, "x")

@pytest.mark.skipif(pd.__version__.startswith("0.24"),
reason="known bug in pandas")
def test_unicode_content_no_columns(self):

df = self.df.copy()

# replace content of column 'a' (which will form the rows in the
# grid) by Unicode characters:
unicode_column_val = np.repeat((u'\u01ff', u'\u02ff', u'\u03ff'), 20)
df["b"] = unicode_column_val

# make sure that the replacement worked as expected:
nt.assert_equal(
list(df["b"]),
[u'\u01ff'] * 20 + [u'\u02ff'] * 20 + [u'\u03ff'] * 20)

# plot the grid -- if successful, no UnicodeEncodingError should
# occur:
g = ag.FacetGrid(df, row="b")
g = g.map(plt.plot, "x")

def test_categorical_column_missing_categories(self):

df = self.df.copy()
Expand Down
26 changes: 20 additions & 6 deletions seaborn/tests/test_utils.py
Expand Up @@ -5,7 +5,8 @@
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from cycler import cycler
from cycle import cycler

import pytest
import nose
import nose.tools as nt
Expand Down Expand Up @@ -120,12 +121,25 @@ def test_iqr():
assert_equal(iqr, 2)


def test_str_to_utf8():
"""Test the to_utf8 function: string to Unicode"""
s = "\u01ff\u02ff"
@pytest.mark.parametrize(
"s,exp",
[
("a", "a"),
("abc", "abc"),
(b"a", "a"),
(b"abc", "abc"),
(bytearray("abc", "utf-8"), "abc"),
(bytearray(), ""),
(1, "1"),
(0, "0"),
([], str([])),
],
)
def test_to_utf8(s, exp):
"""Test the to_utf8 function: object to string"""
u = utils.to_utf8(s)
assert_equal(type(s), type(str()))
assert_equal(type(u), type(u"\u01ff\u02ff"))
assert_equal(type(u), str)
assert_equal(u, exp)


class TestSpineUtils(object):
Expand Down
42 changes: 10 additions & 32 deletions seaborn/utils.py
Expand Up @@ -594,16 +594,14 @@ def relative_luminance(color):


def to_utf8(obj):
"""Return a Unicode string representing a Python object.
"""Return a string representing a Python object.
Unicode strings (i.e. type ``unicode`` in Python 2.7 and type ``str`` in
Python 3.x) are returned unchanged.
Strings (i.e. type ``str``) are returned unchanged.
Byte strings (i.e. type ``str`` in Python 2.7 and type ``bytes`` in
Python 3.x) are returned as UTF-8-encoded strings.
Byte strings (i.e. type ``bytes``) are returned as UTF-8-decoded strings.
For other objects, the method ``__str__()`` is called, and the result is
returned as a UTF-8-encoded string.
returned as a string.
Parameters
----------
Expand All @@ -612,35 +610,15 @@ def to_utf8(obj):
Returns
-------
s : unicode (Python 2.7) / str (Python 3.x)
UTF-8-encoded string representation of ``obj``
s : str
UTF-8-decoded string representation of ``obj``
"""
if isinstance(obj, str):
try:
# If obj is a string, try to return it as a Unicode-encoded
# string:
return obj.decode("utf-8")
except AttributeError:
# Python 3.x strings are already Unicode, and do not have a
# decode() method, so the unchanged string is returned
return obj

return obj
try:
if isinstance(obj, unicode):
# do not attemt a conversion if string is already a Unicode
# string:
return obj
else:
# call __str__() for non-string object, and return the
# result to Unicode:
return obj.__str__().decode("utf-8")
except NameError:
# NameError is raised in Python 3.x as type 'unicode' is not
# defined.
if isinstance(obj, bytes):
return obj.decode("utf-8")
else:
return obj.__str__()
return obj.decode(encoding="utf-8")
except AttributeError: # obj is not bytes-like
return str(obj)


def _network(t=None, url='https://google.com'):
Expand Down

0 comments on commit 8f95512

Please sign in to comment.