Skip to content

Commit

Permalink
Respect pandas categorical order in category plots
Browse files Browse the repository at this point in the history
This will include levels that appear in the `category` list, but that
do not appear in the data.

See #361
  • Loading branch information
mwaskom committed Mar 8, 2015
1 parent f5334fc commit 8583391
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 16 deletions.
20 changes: 4 additions & 16 deletions seaborn/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
from .external.six.moves import range

from . import utils
from .utils import desaturate, iqr
from .utils import desaturate, iqr, categorical_order
from .algorithms import bootstrap
from .palettes import color_palette, husl_palette, light_palette
from .axisgrid import FacetGrid


class _CategoricalPlotter(object):
Expand Down Expand Up @@ -189,7 +190,7 @@ def establish_variables(self, x=None, y=None, hue=None, data=None,
group_label = groups.name

# Get the order on the categorical axis
group_names = self._category_order(groups, order)
group_names = categorical_order(groups, order)

# Group the numeric data
plot_data, value_label = self._group_longform(vals, groups,
Expand All @@ -203,7 +204,7 @@ def establish_variables(self, x=None, y=None, hue=None, data=None,
else:

# Get the order of the hue levels
hue_names = self._category_order(hue, hue_order)
hue_names = categorical_order(hue, hue_order)

# Group the hue data
plot_hues, hue_title = self._group_longform(hue, groups,
Expand All @@ -228,15 +229,6 @@ def establish_variables(self, x=None, y=None, hue=None, data=None,
self.hue_names = hue_names
self.plot_units = plot_units

def _category_order(self, data, order):
"""Get the order of levels for a categorical variable."""
if order is None:
try:
order = data.unique()
except AttributeError:
order = pd.unique(data)
return list(order)

def _group_longform(self, vals, grouper, order):
"""Group a long-form variable by another with correct order."""
# Ensure that the groupby will work
Expand Down Expand Up @@ -2050,10 +2042,6 @@ def barplot(x=None, y=None, hue=None, data=None, order=None, hue_order=None,
kwargs.pop("dropna")
warn = True

if "label" in kwargs:
kwargs.pop("label")
warn = True

if "x_order" in kwargs:
order = kwargs.pop("x_order")
warn = True
Expand Down
10 changes: 10 additions & 0 deletions seaborn/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,11 @@ def test_order(self):
for group, vals in zip(["c", "b", "a"], p.plot_data):
npt.assert_array_equal(vals, self.y[self.g == group])

df.g = (df.g.cat.add_categories("d")
.cat.reorder_categories(["c", "b", "d", "a"]))
p.establish_variables("g", "y", data=df)
nt.assert_equal(p.group_names, ["c", "b", "d", "a"])

def test_hue_order(self):

p = cat._CategoricalPlotter()
Expand All @@ -311,6 +316,11 @@ def test_hue_order(self):
p.establish_variables("g", "y", "h", data=df)
nt.assert_equal(p.hue_names, ["n", "m"])

df.h = (df.h.cat.add_categories("o")
.cat.reorder_categories(["o", "m", "n"]))
p.establish_variables("g", "y", "h", data=df)
nt.assert_equal(p.hue_names, ["o", "m", "n"])

def test_plot_units(self):

p = cat._CategoricalPlotter()
Expand Down
37 changes: 37 additions & 0 deletions seaborn/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy.testing import assert_array_equal
import nose
import nose.tools as nt
from nose.tools import assert_equal, raises

from distutils.version import LooseVersion
pandas_has_categoricals = LooseVersion(pd.__version__) >= "0.15"

from .. import utils, rcmod


Expand Down Expand Up @@ -244,3 +248,36 @@ def test_ticklabels_overlap():
x, y = utils.axes_ticklabels_overlap(ax)
assert x
assert not y


def test_category_order():

x = ["a", "c", "c", "b", "a", "d"]
order = ["a", "b", "c", "d"]

out = utils.categorical_order(x)
nt.assert_equal(out, ["a", "c", "b", "d"])

out = utils.categorical_order(x, order)
nt.assert_equal(out, order)

out = utils.categorical_order(x, ["b", "a"])
nt.assert_equal(out, ["b", "a"])

out = utils.categorical_order(np.array(x))
nt.assert_equal(out, ["a", "c", "b", "d"])

out = utils.categorical_order(pd.Series(x))
nt.assert_equal(out, ["a", "c", "b", "d"])

if pandas_has_categoricals:
x = pd.Categorical(x, order)
out = utils.categorical_order(x)
nt.assert_equal(out, list(x.categories))

x = pd.Series(x)
out = utils.categorical_order(x)
nt.assert_equal(out, list(x.cat.categories))

out = utils.categorical_order(x, ["b", "a"])
nt.assert_equal(out, ["b", "a"])
18 changes: 18 additions & 0 deletions seaborn/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,3 +414,21 @@ def axes_ticklabels_overlap(ax):
"""
return (axis_ticklabels_overlap(ax.get_xticklabels()),
axis_ticklabels_overlap(ax.get_yticklabels()))


def categorical_order(values, order=None):
"""Return a list of unique data values.
"""
if order is None:
if hasattr(values, "cat"):
order = values.cat.categories
elif hasattr(values, "categories"):
order = values.categories
else:
try:
order = values.unique()
except AttributeError:
order = pd.unique(values)

return list(order)

0 comments on commit 8583391

Please sign in to comment.